Commit | Line | Data |
---|---|---|
273c3903 MM |
1 | #!/usr/bin/env python3 |
2 | # rpmconf-matt [--sync-only] | |
3 | ||
751c2a60 MM |
4 | # SECURITY NOTICE: If your system is missing the original version (.rpmbase) of |
5 | # a configuration file you have modified, rpmconf-matt will automatically get it | |
4f0f2877 MM |
6 | # by downloading the original package with "dnf download", which does not |
7 | # check the package's GPG signature even if dnf is normally configured to do so | |
8 | # (https://bugzilla.redhat.com/show_bug.cgi?id=1879791). rpmconf-matt checks the digest of | |
751c2a60 MM |
9 | # the file before using it, but a fuzzed package might be able to compromise |
10 | # your system during extraction. This is less of a problem if your repository | |
11 | # metadata is integrity protected (e.g., by SSL on the metalink or repomd) | |
4f0f2877 MM |
12 | # because "dnf download" refuses to save a package whose digest does not match |
13 | # the metadata. | |
751c2a60 | 14 | |
273c3903 MM |
15 | # Known blockers to submission to Fedora: Basically removing assumptions specific to my setup. |
16 | # - Decide how to name it vs. existing "rpmconf" in Fedora | |
17 | # - Make it configurable what part of the filesystem to scan. Is there a | |
18 | # default that's sufficiently safe for people to use on systems with untrusted | |
19 | # users? | |
20 | # - Should save its own files in another tree rather than blacklisting | |
21 | # directories known to cause problems if it adds its files there. | |
22 | # - Make diff program configurable / smarter default | |
23 | ||
24 | # TODO: Move the "sync" part into a dnf plugin that runs after every transaction | |
25 | # to make it more robust. Perhaps there can be an option to run the merge | |
26 | # interactively. With tighter integration, we might be able to skip scanning the | |
27 | # whole filesystem, but that would add one more kind of state that can get wrong. | |
28 | ||
29 | # Python 3 conversion, 2017-11-12 | |
30 | # One could argue we should treat filenames and RPM names as bytes, but it's too | |
31 | # much of a pain to fix all the resulting fallout now, and we don't expect | |
32 | # adversarial names anyway. ~ Matt 2017-11-12 | |
33 | # | |
34 | # Fedora 30 -> 32 upgrade: remove some .decode() calls (guess rpm-python changed | |
35 | # some things from bytes to str) ~ Matt 2020-06-01 | |
36 | ||
37 | import collections | |
38 | import hashlib | |
39 | import os | |
40 | import pwd, grp | |
41 | import re | |
42 | import shutil | |
43 | import stat | |
44 | import subprocess | |
45 | import sys | |
46 | import tempfile | |
47 | import time | |
48 | ||
49 | import rpm | |
50 | ||
51 | def getFileDigest(path, algoHint): | |
52 | # TODO: Get the digest algorithm from RPM?? | |
53 | # File a bug to have rpmfiDigestAlgo exposed to Python? | |
54 | if len(algoHint) == 32: # adobe packages | |
55 | algo = hashlib.md5 | |
56 | elif len(algoHint) == 64: | |
57 | algo = hashlib.sha256 | |
58 | else: | |
59 | raise ValueError('Failed to guess digest algorithm') | |
60 | with open(path, 'rb') as f: | |
61 | return algo(f.read()).hexdigest() | |
62 | ||
63 | def setAttributes(fe, path): | |
64 | # C.f. https://github.com/rpm-software-management/rpm/blob/rpm-4.13.0-rc1/lib/fsm.c#L713 | |
65 | # If user or group is not found, we'll get a KeyError. | |
66 | os.chown(path, pwd.getpwnam(fe.user).pw_uid, grp.getgrnam(fe.group).gr_gid) | |
67 | os.chmod(path, fe.mode & 0o7777) | |
68 | os.utime(path, (fe.mtime, fe.mtime)) | |
69 | ||
70 | def makeConfFindCommand(expr): | |
71 | # Make a find command intended to catch any files ever managed by RPM, | |
72 | # without getting confused by trees such as /proc | |
73 | # or even malicious user-created files. Unfortunately, we can't just ask RPM | |
74 | # which files it previously managed. The following heuristic should work for | |
75 | # ml*: root filesystem (and avoid an error on /mnt/root, which -xdev doesn't | |
76 | # catch), and exclude world-writable dirs such as /var/tmp. | |
77 | # TODO: Make the search criteria more general. | |
78 | return r"find / -xdev \( -path /mnt -or -path /var/www/html -or -perm /002 \) -prune -or " + expr | |
79 | ||
80 | def doMerge(a, b, c, output): | |
81 | # TODO: Make diff program customizable. | |
82 | # FIXME: Stop leaving ".orig" files around. At least name them distinctively. | |
83 | # kdiff3 wrapper to work around issue with Qt apps running as root under Qubes ~ 2015-08-20 | |
84 | args = ['rpmconf-matt-merge', output, a if os.path.lexists(a) else '', b, c] | |
85 | subprocess.check_call(args) | |
86 | ||
87 | class NeededPackage(object): | |
88 | def __init__(self, header): | |
89 | self.nvra = header.nvra | |
90 | self.paths = dict() # live path -> (digest, path to download to) | |
91 | ||
92 | def rpmconf(syncOnly=False): | |
93 | if os.geteuid() != 0: | |
94 | print('This tool needs to run as root.', file=sys.stderr) | |
95 | sys.exit(1) | |
96 | ||
97 | # First, rename any rpmsave files and corresponding rpmbase files before we | |
98 | # would overwrite the rpmbase files. We'll do this to any file type, even | |
99 | # though there will only ever be base files for regular files. | |
100 | print('Scanning for rpmsave files that need to be stamped.') | |
101 | saveStamp = str(int(time.time())) | |
102 | for savePath in subprocess.check_output(makeConfFindCommand("-name '*.rpmsave' -print"), shell=True).decode().splitlines(): | |
103 | livePath = savePath[:-len('.rpmsave')] | |
104 | liveBasePath = livePath + '.rpmbase' | |
105 | stampedSavePath = livePath + '.rpmsave-' + saveStamp | |
106 | stampedSaveBasePath = stampedSavePath + '-base' | |
107 | print('-- Timestamping rpmsave file for %s.' % livePath) | |
108 | # XXX: Make sure we are not clobbering existing files? | |
109 | os.rename(savePath, stampedSavePath) | |
110 | if os.path.lexists(liveBasePath): | |
111 | os.rename(liveBasePath, stampedSaveBasePath) | |
112 | ||
113 | filesToMerge = collections.defaultdict(lambda: [[], False]) # live path -> (list of rpmsave stamps, bool if rpmnew) | |
114 | ||
115 | print('Scanning for config files that need base files created.') | |
116 | filesDone = {} # live path -> (nevra, digest) | |
117 | needPackages = {} # nevra -> NeededPackage; no more defaultdict because NeededPackage needs header | |
118 | ts = rpm.ts() | |
119 | mi = ts.dbMatch() | |
120 | for header in mi: | |
121 | nevra = header.nevra | |
122 | for fe in rpm.files(header): | |
123 | # Only installed config files. | |
124 | if fe.state != rpm.RPMFILE_STATE_NORMAL: continue | |
125 | if not (fe.fflags & rpm.RPMFILE_CONFIG): continue | |
126 | # For a ghost, we have no base content to write. Probably best to let this be a two-way merge if the file becomes non-ghost later. | |
127 | if fe.fflags & rpm.RPMFILE_GHOST: continue | |
128 | # For now, we only handle regular files. Conflicts on config symlinks seem to be rare. | |
129 | if not stat.S_ISREG(fe.mode): continue | |
130 | if fe.caps != '': raise NotImplementedError('File capabilities are not implemented: %s' % fe.name) | |
131 | ||
132 | # Extension point directories whose readers can't handle additional *.rpm* files. | |
133 | # /etc/skel/ is not actually causing a problem but leads to ugly persistent state. | |
134 | # TODO: Find a better workaround. | |
135 | if re.search('^(/etc/skel/|/etc/rpm/macros|/etc/logrotate.d/|/etc/grub.d/)', fe.name): | |
136 | continue | |
137 | ||
138 | # We need this check to avoid thrashing a conflicted base file. | |
139 | if fe.name in filesDone: | |
140 | (oldNevra, oldDigest) = filesDone[fe.name] | |
141 | if fe.digest != oldDigest: | |
142 | print('Conflict at %s: have %s from %s, ignoring %s from %s' % | |
143 | (fe.name, oldDigest, oldNevra, fe.digest, nevra), | |
144 | file=sys.stderr) | |
145 | continue | |
146 | filesDone[fe.name] = (nevra, fe.digest) | |
147 | ||
148 | path_new = fe.name + '.rpmnew' | |
149 | download_path = None | |
150 | if os.path.lexists(path_new): | |
151 | filesToMerge[fe.name][1] = True | |
152 | # The live config file is not based on the current DB entry. | |
153 | # Hopefully we already have a base for the live config file; if not, there's nothing we can do about it now. | |
154 | # We do want to make sure the rpmnew file is correct. | |
155 | if getFileDigest(path_new, fe.digest) != fe.digest: | |
156 | download_path = path_new | |
157 | else: | |
158 | path_base = fe.name + '.rpmbase' | |
159 | if not (os.path.lexists(path_base) and getFileDigest(path_base, fe.digest) == fe.digest): | |
160 | if (os.path.lexists(fe.name) and getFileDigest(fe.name, fe.digest) == fe.digest): | |
161 | # The live file has the original content. | |
162 | # Copy the content and set the original attributes manually. | |
163 | path_tmp = fe.name + '.rpmbase-tmp' | |
164 | shutil.copyfile(fe.name, path_tmp) | |
165 | setAttributes(fe, path_tmp) | |
166 | os.rename(path_tmp, path_base) | |
167 | print('- %s: Copied %s from %s.' % (nevra, path_base, fe.name)) | |
168 | else: | |
169 | download_path = path_base | |
170 | if download_path: | |
171 | if nevra not in needPackages: | |
172 | needPackages[nevra] = NeededPackage(header) | |
173 | needPackages[nevra].paths[fe.name] = (fe.digest, download_path) | |
174 | print('- %s: Need to download %s.' % (nevra, download_path)) | |
175 | ||
176 | if needPackages: | |
177 | print('Downloading %d packages.' % len(needPackages)) | |
178 | packages_tmpdir = tempfile.mkdtemp(prefix='rpmconf-packages') | |
751c2a60 | 179 | subprocess.check_call(['dnf', 'download'] + list(needPackages), cwd=packages_tmpdir) |
273c3903 MM |
180 | for nevra, neededPkg in needPackages.items(): |
181 | packagePath = '%s/%s.rpm' % (packages_tmpdir, neededPkg.nvra) | |
182 | extract_tmpdir = tempfile.mkdtemp(prefix='rpmconf-extract-%s' % nevra) | |
183 | cpioNeedPaths = ['.' + p for p in neededPkg.paths] # go figure | |
184 | subprocess.check_call(['/bin/bash', '-c', 'p="$1"; shift; rpm2cpio "$p" | cpio --extract --quiet --preserve-modification-time --make-directories "$@"', '--', packagePath] + cpioNeedPaths, cwd=extract_tmpdir) | |
185 | print('- Extracted %s.' % nevra) | |
186 | for livePath, (needDigest, downloadPath) in neededPkg.paths.items(): | |
187 | tmpPath = extract_tmpdir + livePath | |
188 | tmpDigest = getFileDigest(tmpPath, needDigest) | |
189 | if tmpDigest != needDigest: | |
190 | print('%s: got digest %s, wanted %s' % (livePath, tmpDigest, needDigest), file=sys.stderr) | |
191 | continue | |
192 | # This is easiest in case it is cross-filesystem, etc. mv should preserve all attributes. | |
193 | subprocess.check_call(['mv', '-f', tmpPath, downloadPath]) | |
194 | print('-- Installed %s.' % downloadPath) | |
195 | shutil.rmtree(extract_tmpdir) | |
196 | shutil.rmtree(packages_tmpdir) | |
197 | ||
198 | print('Scanning for obsolete rpmnew files.') | |
199 | for newPath in subprocess.check_output(makeConfFindCommand("-type f -name '*.rpmnew' -print"), shell=True).decode().splitlines(): | |
200 | livePath = newPath[:-len('.rpmnew')] | |
201 | if livePath not in filesToMerge: # only rpmnew files will be recorded in filesToMerge yet | |
202 | print('-- Deleting %s. UNTESTED' % newPath) | |
203 | #os.unlink(newPath) | |
204 | ||
205 | # Remove rpmbase files for config files that are no longer managed, to not leave cruft. | |
206 | # This intentionally does not remove rpmsave-base files. ~ 2014-07-03 | |
207 | # Note: If the config file had been modified, RPM would move it to | |
208 | # rpmsave on package removal (whether or not it was noreplace), so we'd | |
209 | # stamp the base file before we get here. ~ 2017-11-12 | |
210 | print('Scanning for obsolete rpmbase files.') | |
211 | for basePath in subprocess.check_output(makeConfFindCommand("-type f -name '*.rpmbase' -print"), shell=True).decode().splitlines(): | |
212 | livePath = basePath[:-len('.rpmbase')] | |
213 | if livePath not in filesDone: | |
214 | print('-- Deleting %s.' % basePath) | |
215 | os.unlink(basePath) | |
216 | ||
217 | # "sync vs. merge" terminology is inspired by Perforce. We'll want a | |
218 | # better term for "sync" before releasing this to the public. | |
219 | if syncOnly: | |
220 | print('rpmconf sync complete.') | |
221 | return | |
222 | else: | |
223 | print('rpmconf sync complete. You can interrupt if you don\'t wish to merge now.') | |
224 | ||
225 | print('Scanning for rpmsave files.') | |
226 | for savePath in subprocess.check_output(makeConfFindCommand("-type f -name '*.rpmsave-*' -print"), shell=True).decode().splitlines(): | |
227 | m = re.search('^(.*)\.rpmsave-(\d+)$', savePath) | |
228 | if not m: continue | |
229 | (livePath, stamp) = (m.group(1), int(m.group(2))) | |
230 | filesToMerge[livePath][0].append(stamp) | |
231 | ||
232 | # Nested function for the ability to return from a nested loop... | |
233 | def mergeFile(livePath): | |
234 | print('- Merging %s.' % livePath) | |
235 | (saveStamps, haveRpmnew) = filesToMerge[livePath] | |
236 | saveStamps.sort() # mutates the original, that's OK | |
237 | # TODO: If a package was uninstalled, we could have an rpmsave and rpmsave-base with no live. | |
238 | # We want to alert the user that the configuration change is no longer having an effect. How? | |
239 | # Currently kdiff3 comes up with a bunch of error dialogs and I have to manually intervene. | |
240 | for i in range(len(saveStamps)): | |
241 | c_output = '%s.rpmsave-%d' % (livePath, saveStamps[i+1]) if i+1 < len(saveStamps) else livePath | |
242 | b = '%s.rpmsave-%d' % (livePath, saveStamps[i]) | |
243 | a = b + '-base' | |
244 | try: | |
245 | doMerge(a, b, c_output, c_output) | |
246 | except subprocess.CalledProcessError: | |
247 | print('- Leaving %s merge unfinished.' % livePath) | |
248 | return 1 | |
249 | os.unlink(b) | |
250 | if os.path.lexists(a): os.unlink(a) | |
251 | if haveRpmnew: | |
252 | try: | |
253 | doMerge(livePath + '.rpmbase', livePath, livePath + '.rpmnew', livePath) | |
254 | except subprocess.CalledProcessError: | |
255 | print('- Leaving %s merge unfinished.' % livePath) | |
256 | return 1 | |
257 | os.rename(livePath + '.rpmnew', livePath + '.rpmbase') | |
258 | print('- Merged %s.' % livePath) | |
259 | return 0 | |
260 | ||
261 | unfinishedFiles = 0 | |
262 | for livePath in filesToMerge: | |
263 | unfinishedFiles += mergeFile(livePath) | |
264 | ||
265 | if unfinishedFiles == 0: | |
266 | print('rpmconf merge complete!') | |
267 | else: | |
268 | print('No more files to consider. %d files left unfinished.' % unfinishedFiles) | |
269 | ||
270 | if __name__ == '__main__': | |
271 | # TODO: Adopt a real option-parsing library. | |
272 | args = sys.argv[1:] | |
273 | if args == []: | |
274 | rpmconf() | |
275 | elif args == ['--sync-only']: | |
276 | rpmconf(syncOnly=True) | |
277 | else: | |
278 | print('Unrecognized arguments.', file=sys.stderr) | |
279 | sys.exit(1) |