2 # rpmconf-matt [--sync-only]
4 # Known blockers to submission to Fedora: Basically removing assumptions specific to my setup.
5 # - Decide how to name it vs. existing "rpmconf" in Fedora
6 # - Make it configurable what part of the filesystem to scan. Is there a
7 # default that's sufficiently safe for people to use on systems with untrusted
9 # - Should save its own files in another tree rather than blacklisting
10 # directories known to cause problems if it adds its files there.
11 # - Make diff program configurable / smarter default
13 # TODO: Move the "sync" part into a dnf plugin that runs after every transaction
14 # to make it more robust. Perhaps there can be an option to run the merge
15 # interactively. With tighter integration, we might be able to skip scanning the
16 # whole filesystem, but that would add one more kind of state that can get wrong.
18 # Python 3 conversion, 2017-11-12
19 # One could argue we should treat filenames and RPM names as bytes, but it's too
20 # much of a pain to fix all the resulting fallout now, and we don't expect
21 # adversarial names anyway. ~ Matt 2017-11-12
23 # Fedora 30 -> 32 upgrade: remove some .decode() calls (guess rpm-python changed
24 # some things from bytes to str) ~ Matt 2020-06-01
40 def getFileDigest(path, algoHint):
41 # TODO: Get the digest algorithm from RPM??
42 # File a bug to have rpmfiDigestAlgo exposed to Python?
43 if len(algoHint) == 32: # adobe packages
45 elif len(algoHint) == 64:
48 raise ValueError('Failed to guess digest algorithm')
49 with open(path, 'rb') as f:
50 return algo(f.read()).hexdigest()
52 def setAttributes(fe, path):
53 # C.f. https://github.com/rpm-software-management/rpm/blob/rpm-4.13.0-rc1/lib/fsm.c#L713
54 # If user or group is not found, we'll get a KeyError.
55 os.chown(path, pwd.getpwnam(fe.user).pw_uid, grp.getgrnam(fe.group).gr_gid)
56 os.chmod(path, fe.mode & 0o7777)
57 os.utime(path, (fe.mtime, fe.mtime))
59 def makeConfFindCommand(expr):
60 # Make a find command intended to catch any files ever managed by RPM,
61 # without getting confused by trees such as /proc
62 # or even malicious user-created files. Unfortunately, we can't just ask RPM
63 # which files it previously managed. The following heuristic should work for
64 # ml*: root filesystem (and avoid an error on /mnt/root, which -xdev doesn't
65 # catch), and exclude world-writable dirs such as /var/tmp.
66 # TODO: Make the search criteria more general.
67 return r"find / -xdev \( -path /mnt -or -path /var/www/html -or -perm /002 \) -prune -or " + expr
69 def doMerge(a, b, c, output):
70 # TODO: Make diff program customizable.
71 # FIXME: Stop leaving ".orig" files around. At least name them distinctively.
72 # kdiff3 wrapper to work around issue with Qt apps running as root under Qubes ~ 2015-08-20
73 args = ['rpmconf-matt-merge', output, a if os.path.lexists(a) else '', b, c]
74 subprocess.check_call(args)
76 class NeededPackage(object):
77 def __init__(self, header):
78 self.nvra = header.nvra
79 self.paths = dict() # live path -> (digest, path to download to)
81 def rpmconf(syncOnly=False):
83 print('This tool needs to run as root.', file=sys.stderr)
86 # First, rename any rpmsave files and corresponding rpmbase files before we
87 # would overwrite the rpmbase files. We'll do this to any file type, even
88 # though there will only ever be base files for regular files.
89 print('Scanning for rpmsave files that need to be stamped.')
90 saveStamp = str(int(time.time()))
91 for savePath in subprocess.check_output(makeConfFindCommand("-name '*.rpmsave' -print"), shell=True).decode().splitlines():
92 livePath = savePath[:-len('.rpmsave')]
93 liveBasePath = livePath + '.rpmbase'
94 stampedSavePath = livePath + '.rpmsave-' + saveStamp
95 stampedSaveBasePath = stampedSavePath + '-base'
96 print('-- Timestamping rpmsave file for %s.' % livePath)
97 # XXX: Make sure we are not clobbering existing files?
98 os.rename(savePath, stampedSavePath)
99 if os.path.lexists(liveBasePath):
100 os.rename(liveBasePath, stampedSaveBasePath)
102 filesToMerge = collections.defaultdict(lambda: [[], False]) # live path -> (list of rpmsave stamps, bool if rpmnew)
104 print('Scanning for config files that need base files created.')
105 filesDone = {} # live path -> (nevra, digest)
106 needPackages = {} # nevra -> NeededPackage; no more defaultdict because NeededPackage needs header
111 for fe in rpm.files(header):
112 # Only installed config files.
113 if fe.state != rpm.RPMFILE_STATE_NORMAL: continue
114 if not (fe.fflags & rpm.RPMFILE_CONFIG): continue
115 # For a ghost, we have no base content to write. Probably best to let this be a two-way merge if the file becomes non-ghost later.
116 if fe.fflags & rpm.RPMFILE_GHOST: continue
117 # For now, we only handle regular files. Conflicts on config symlinks seem to be rare.
118 if not stat.S_ISREG(fe.mode): continue
119 if fe.caps != '': raise NotImplementedError('File capabilities are not implemented: %s' % fe.name)
121 # Extension point directories whose readers can't handle additional *.rpm* files.
122 # /etc/skel/ is not actually causing a problem but leads to ugly persistent state.
123 # TODO: Find a better workaround.
124 if re.search('^(/etc/skel/|/etc/rpm/macros|/etc/logrotate.d/|/etc/grub.d/)', fe.name):
127 # We need this check to avoid thrashing a conflicted base file.
128 if fe.name in filesDone:
129 (oldNevra, oldDigest) = filesDone[fe.name]
130 if fe.digest != oldDigest:
131 print('Conflict at %s: have %s from %s, ignoring %s from %s' %
132 (fe.name, oldDigest, oldNevra, fe.digest, nevra),
135 filesDone[fe.name] = (nevra, fe.digest)
137 path_new = fe.name + '.rpmnew'
139 if os.path.lexists(path_new):
140 filesToMerge[fe.name][1] = True
141 # The live config file is not based on the current DB entry.
142 # Hopefully we already have a base for the live config file; if not, there's nothing we can do about it now.
143 # We do want to make sure the rpmnew file is correct.
144 if getFileDigest(path_new, fe.digest) != fe.digest:
145 download_path = path_new
147 path_base = fe.name + '.rpmbase'
148 if not (os.path.lexists(path_base) and getFileDigest(path_base, fe.digest) == fe.digest):
149 if (os.path.lexists(fe.name) and getFileDigest(fe.name, fe.digest) == fe.digest):
150 # The live file has the original content.
151 # Copy the content and set the original attributes manually.
152 path_tmp = fe.name + '.rpmbase-tmp'
153 shutil.copyfile(fe.name, path_tmp)
154 setAttributes(fe, path_tmp)
155 os.rename(path_tmp, path_base)
156 print('- %s: Copied %s from %s.' % (nevra, path_base, fe.name))
158 download_path = path_base
160 if nevra not in needPackages:
161 needPackages[nevra] = NeededPackage(header)
162 needPackages[nevra].paths[fe.name] = (fe.digest, download_path)
163 print('- %s: Need to download %s.' % (nevra, download_path))
166 print('Downloading %d packages.' % len(needPackages))
167 packages_tmpdir = tempfile.mkdtemp(prefix='rpmconf-packages')
168 # Make sure the cpio archive is covered by a valid signature
169 # before we use it. Since dnf-download-signed currently doesn't
170 # check that the package is the one we asked for, this only
171 # ensures that the cpio archive is safe to extract. Then we
172 # check the digest on each needed file before using it. We're
173 # still correct if an attacker substitutes a different signed
174 # package in which the files we need have the same content.
177 # Ideally, we'd only require a signature if the package came
178 # from a repository with gpgcheck=1. Right now, I use no
179 # unsigned packages. If I build my own packages again, I can
180 # either sign them or just fix them manually if they reach this
183 subprocess.check_call(['dnf-download-signed'] + list(needPackages), cwd=packages_tmpdir)
184 for nevra, neededPkg in needPackages.items():
185 packagePath = '%s/%s.rpm' % (packages_tmpdir, neededPkg.nvra)
186 extract_tmpdir = tempfile.mkdtemp(prefix='rpmconf-extract-%s' % nevra)
187 cpioNeedPaths = ['.' + p for p in neededPkg.paths] # go figure
188 subprocess.check_call(['/bin/bash', '-c', 'p="$1"; shift; rpm2cpio "$p" | cpio --extract --quiet --preserve-modification-time --make-directories "$@"', '--', packagePath] + cpioNeedPaths, cwd=extract_tmpdir)
189 print('- Extracted %s.' % nevra)
190 for livePath, (needDigest, downloadPath) in neededPkg.paths.items():
191 tmpPath = extract_tmpdir + livePath
192 tmpDigest = getFileDigest(tmpPath, needDigest)
193 if tmpDigest != needDigest:
194 print('%s: got digest %s, wanted %s' % (livePath, tmpDigest, needDigest), file=sys.stderr)
196 # This is easiest in case it is cross-filesystem, etc. mv should preserve all attributes.
197 subprocess.check_call(['mv', '-f', tmpPath, downloadPath])
198 print('-- Installed %s.' % downloadPath)
199 shutil.rmtree(extract_tmpdir)
200 shutil.rmtree(packages_tmpdir)
202 print('Scanning for obsolete rpmnew files.')
203 for newPath in subprocess.check_output(makeConfFindCommand("-type f -name '*.rpmnew' -print"), shell=True).decode().splitlines():
204 livePath = newPath[:-len('.rpmnew')]
205 if livePath not in filesToMerge: # only rpmnew files will be recorded in filesToMerge yet
206 print('-- Deleting %s. UNTESTED' % newPath)
209 # Remove rpmbase files for config files that are no longer managed, to not leave cruft.
210 # This intentionally does not remove rpmsave-base files. ~ 2014-07-03
211 # Note: If the config file had been modified, RPM would move it to
212 # rpmsave on package removal (whether or not it was noreplace), so we'd
213 # stamp the base file before we get here. ~ 2017-11-12
214 print('Scanning for obsolete rpmbase files.')
215 for basePath in subprocess.check_output(makeConfFindCommand("-type f -name '*.rpmbase' -print"), shell=True).decode().splitlines():
216 livePath = basePath[:-len('.rpmbase')]
217 if livePath not in filesDone:
218 print('-- Deleting %s.' % basePath)
221 # "sync vs. merge" terminology is inspired by Perforce. We'll want a
222 # better term for "sync" before releasing this to the public.
224 print('rpmconf sync complete.')
227 print('rpmconf sync complete. You can interrupt if you don\'t wish to merge now.')
229 print('Scanning for rpmsave files.')
230 for savePath in subprocess.check_output(makeConfFindCommand("-type f -name '*.rpmsave-*' -print"), shell=True).decode().splitlines():
231 m = re.search('^(.*)\.rpmsave-(\d+)$', savePath)
233 (livePath, stamp) = (m.group(1), int(m.group(2)))
234 filesToMerge[livePath][0].append(stamp)
236 # Nested function for the ability to return from a nested loop...
237 def mergeFile(livePath):
238 print('- Merging %s.' % livePath)
239 (saveStamps, haveRpmnew) = filesToMerge[livePath]
240 saveStamps.sort() # mutates the original, that's OK
241 # TODO: If a package was uninstalled, we could have an rpmsave and rpmsave-base with no live.
242 # We want to alert the user that the configuration change is no longer having an effect. How?
243 # Currently kdiff3 comes up with a bunch of error dialogs and I have to manually intervene.
244 for i in range(len(saveStamps)):
245 c_output = '%s.rpmsave-%d' % (livePath, saveStamps[i+1]) if i+1 < len(saveStamps) else livePath
246 b = '%s.rpmsave-%d' % (livePath, saveStamps[i])
249 doMerge(a, b, c_output, c_output)
250 except subprocess.CalledProcessError:
251 print('- Leaving %s merge unfinished.' % livePath)
254 if os.path.lexists(a): os.unlink(a)
257 doMerge(livePath + '.rpmbase', livePath, livePath + '.rpmnew', livePath)
258 except subprocess.CalledProcessError:
259 print('- Leaving %s merge unfinished.' % livePath)
261 os.rename(livePath + '.rpmnew', livePath + '.rpmbase')
262 print('- Merged %s.' % livePath)
266 for livePath in filesToMerge:
267 unfinishedFiles += mergeFile(livePath)
269 if unfinishedFiles == 0:
270 print('rpmconf merge complete!')
272 print('No more files to consider. %d files left unfinished.' % unfinishedFiles)
274 if __name__ == '__main__':
275 # TODO: Adopt a real option-parsing library.
279 elif args == ['--sync-only']:
280 rpmconf(syncOnly=True)
282 print('Unrecognized arguments.', file=sys.stderr)