X-Git-Url: https://mattmccutchen.net/utils/utils.git/blobdiff_plain/40ea9b7868f2b7746e7cbabfba6aba982096392a..273c390351c42303171c25215304d1cfd6ca02d4:/rpmconf-matt diff --git a/rpmconf-matt b/rpmconf-matt new file mode 100755 index 0000000..a24f736 --- /dev/null +++ b/rpmconf-matt @@ -0,0 +1,283 @@ +#!/usr/bin/env python3 +# rpmconf-matt [--sync-only] + +# Known blockers to submission to Fedora: Basically removing assumptions specific to my setup. +# - Decide how to name it vs. existing "rpmconf" in Fedora +# - Make it configurable what part of the filesystem to scan. Is there a +# default that's sufficiently safe for people to use on systems with untrusted +# users? +# - Should save its own files in another tree rather than blacklisting +# directories known to cause problems if it adds its files there. +# - Make diff program configurable / smarter default + +# TODO: Move the "sync" part into a dnf plugin that runs after every transaction +# to make it more robust. Perhaps there can be an option to run the merge +# interactively. With tighter integration, we might be able to skip scanning the +# whole filesystem, but that would add one more kind of state that can get wrong. + +# Python 3 conversion, 2017-11-12 +# One could argue we should treat filenames and RPM names as bytes, but it's too +# much of a pain to fix all the resulting fallout now, and we don't expect +# adversarial names anyway. ~ Matt 2017-11-12 +# +# Fedora 30 -> 32 upgrade: remove some .decode() calls (guess rpm-python changed +# some things from bytes to str) ~ Matt 2020-06-01 + +import collections +import hashlib +import os +import pwd, grp +import re +import shutil +import stat +import subprocess +import sys +import tempfile +import time + +import rpm + +def getFileDigest(path, algoHint): + # TODO: Get the digest algorithm from RPM?? + # File a bug to have rpmfiDigestAlgo exposed to Python? + if len(algoHint) == 32: # adobe packages + algo = hashlib.md5 + elif len(algoHint) == 64: + algo = hashlib.sha256 + else: + raise ValueError('Failed to guess digest algorithm') + with open(path, 'rb') as f: + return algo(f.read()).hexdigest() + +def setAttributes(fe, path): + # C.f. https://github.com/rpm-software-management/rpm/blob/rpm-4.13.0-rc1/lib/fsm.c#L713 + # If user or group is not found, we'll get a KeyError. + os.chown(path, pwd.getpwnam(fe.user).pw_uid, grp.getgrnam(fe.group).gr_gid) + os.chmod(path, fe.mode & 0o7777) + os.utime(path, (fe.mtime, fe.mtime)) + +def makeConfFindCommand(expr): + # Make a find command intended to catch any files ever managed by RPM, + # without getting confused by trees such as /proc + # or even malicious user-created files. Unfortunately, we can't just ask RPM + # which files it previously managed. The following heuristic should work for + # ml*: root filesystem (and avoid an error on /mnt/root, which -xdev doesn't + # catch), and exclude world-writable dirs such as /var/tmp. + # TODO: Make the search criteria more general. + return r"find / -xdev \( -path /mnt -or -path /var/www/html -or -perm /002 \) -prune -or " + expr + +def doMerge(a, b, c, output): + # TODO: Make diff program customizable. + # FIXME: Stop leaving ".orig" files around. At least name them distinctively. + # kdiff3 wrapper to work around issue with Qt apps running as root under Qubes ~ 2015-08-20 + args = ['rpmconf-matt-merge', output, a if os.path.lexists(a) else '', b, c] + subprocess.check_call(args) + +class NeededPackage(object): + def __init__(self, header): + self.nvra = header.nvra + self.paths = dict() # live path -> (digest, path to download to) + +def rpmconf(syncOnly=False): + if os.geteuid() != 0: + print('This tool needs to run as root.', file=sys.stderr) + sys.exit(1) + + # First, rename any rpmsave files and corresponding rpmbase files before we + # would overwrite the rpmbase files. We'll do this to any file type, even + # though there will only ever be base files for regular files. + print('Scanning for rpmsave files that need to be stamped.') + saveStamp = str(int(time.time())) + for savePath in subprocess.check_output(makeConfFindCommand("-name '*.rpmsave' -print"), shell=True).decode().splitlines(): + livePath = savePath[:-len('.rpmsave')] + liveBasePath = livePath + '.rpmbase' + stampedSavePath = livePath + '.rpmsave-' + saveStamp + stampedSaveBasePath = stampedSavePath + '-base' + print('-- Timestamping rpmsave file for %s.' % livePath) + # XXX: Make sure we are not clobbering existing files? + os.rename(savePath, stampedSavePath) + if os.path.lexists(liveBasePath): + os.rename(liveBasePath, stampedSaveBasePath) + + filesToMerge = collections.defaultdict(lambda: [[], False]) # live path -> (list of rpmsave stamps, bool if rpmnew) + + print('Scanning for config files that need base files created.') + filesDone = {} # live path -> (nevra, digest) + needPackages = {} # nevra -> NeededPackage; no more defaultdict because NeededPackage needs header + ts = rpm.ts() + mi = ts.dbMatch() + for header in mi: + nevra = header.nevra + for fe in rpm.files(header): + # Only installed config files. + if fe.state != rpm.RPMFILE_STATE_NORMAL: continue + if not (fe.fflags & rpm.RPMFILE_CONFIG): continue + # For a ghost, we have no base content to write. Probably best to let this be a two-way merge if the file becomes non-ghost later. + if fe.fflags & rpm.RPMFILE_GHOST: continue + # For now, we only handle regular files. Conflicts on config symlinks seem to be rare. + if not stat.S_ISREG(fe.mode): continue + if fe.caps != '': raise NotImplementedError('File capabilities are not implemented: %s' % fe.name) + + # Extension point directories whose readers can't handle additional *.rpm* files. + # /etc/skel/ is not actually causing a problem but leads to ugly persistent state. + # TODO: Find a better workaround. + if re.search('^(/etc/skel/|/etc/rpm/macros|/etc/logrotate.d/|/etc/grub.d/)', fe.name): + continue + + # We need this check to avoid thrashing a conflicted base file. + if fe.name in filesDone: + (oldNevra, oldDigest) = filesDone[fe.name] + if fe.digest != oldDigest: + print('Conflict at %s: have %s from %s, ignoring %s from %s' % + (fe.name, oldDigest, oldNevra, fe.digest, nevra), + file=sys.stderr) + continue + filesDone[fe.name] = (nevra, fe.digest) + + path_new = fe.name + '.rpmnew' + download_path = None + if os.path.lexists(path_new): + filesToMerge[fe.name][1] = True + # The live config file is not based on the current DB entry. + # Hopefully we already have a base for the live config file; if not, there's nothing we can do about it now. + # We do want to make sure the rpmnew file is correct. + if getFileDigest(path_new, fe.digest) != fe.digest: + download_path = path_new + else: + path_base = fe.name + '.rpmbase' + if not (os.path.lexists(path_base) and getFileDigest(path_base, fe.digest) == fe.digest): + if (os.path.lexists(fe.name) and getFileDigest(fe.name, fe.digest) == fe.digest): + # The live file has the original content. + # Copy the content and set the original attributes manually. + path_tmp = fe.name + '.rpmbase-tmp' + shutil.copyfile(fe.name, path_tmp) + setAttributes(fe, path_tmp) + os.rename(path_tmp, path_base) + print('- %s: Copied %s from %s.' % (nevra, path_base, fe.name)) + else: + download_path = path_base + if download_path: + if nevra not in needPackages: + needPackages[nevra] = NeededPackage(header) + needPackages[nevra].paths[fe.name] = (fe.digest, download_path) + print('- %s: Need to download %s.' % (nevra, download_path)) + + if needPackages: + print('Downloading %d packages.' % len(needPackages)) + packages_tmpdir = tempfile.mkdtemp(prefix='rpmconf-packages') + # Make sure the cpio archive is covered by a valid signature + # before we use it. Since dnf-download-signed currently doesn't + # check that the package is the one we asked for, this only + # ensures that the cpio archive is safe to extract. Then we + # check the digest on each needed file before using it. We're + # still correct if an attacker substitutes a different signed + # package in which the files we need have the same content. + # ~ Matt 2019-05-18 + # + # Ideally, we'd only require a signature if the package came + # from a repository with gpgcheck=1. Right now, I use no + # unsigned packages. If I build my own packages again, I can + # either sign them or just fix them manually if they reach this + # code. + # ~ Matt 2017-11-11 + subprocess.check_call(['dnf-download-signed'] + list(needPackages), cwd=packages_tmpdir) + for nevra, neededPkg in needPackages.items(): + packagePath = '%s/%s.rpm' % (packages_tmpdir, neededPkg.nvra) + extract_tmpdir = tempfile.mkdtemp(prefix='rpmconf-extract-%s' % nevra) + cpioNeedPaths = ['.' + p for p in neededPkg.paths] # go figure + subprocess.check_call(['/bin/bash', '-c', 'p="$1"; shift; rpm2cpio "$p" | cpio --extract --quiet --preserve-modification-time --make-directories "$@"', '--', packagePath] + cpioNeedPaths, cwd=extract_tmpdir) + print('- Extracted %s.' % nevra) + for livePath, (needDigest, downloadPath) in neededPkg.paths.items(): + tmpPath = extract_tmpdir + livePath + tmpDigest = getFileDigest(tmpPath, needDigest) + if tmpDigest != needDigest: + print('%s: got digest %s, wanted %s' % (livePath, tmpDigest, needDigest), file=sys.stderr) + continue + # This is easiest in case it is cross-filesystem, etc. mv should preserve all attributes. + subprocess.check_call(['mv', '-f', tmpPath, downloadPath]) + print('-- Installed %s.' % downloadPath) + shutil.rmtree(extract_tmpdir) + shutil.rmtree(packages_tmpdir) + + print('Scanning for obsolete rpmnew files.') + for newPath in subprocess.check_output(makeConfFindCommand("-type f -name '*.rpmnew' -print"), shell=True).decode().splitlines(): + livePath = newPath[:-len('.rpmnew')] + if livePath not in filesToMerge: # only rpmnew files will be recorded in filesToMerge yet + print('-- Deleting %s. UNTESTED' % newPath) + #os.unlink(newPath) + + # Remove rpmbase files for config files that are no longer managed, to not leave cruft. + # This intentionally does not remove rpmsave-base files. ~ 2014-07-03 + # Note: If the config file had been modified, RPM would move it to + # rpmsave on package removal (whether or not it was noreplace), so we'd + # stamp the base file before we get here. ~ 2017-11-12 + print('Scanning for obsolete rpmbase files.') + for basePath in subprocess.check_output(makeConfFindCommand("-type f -name '*.rpmbase' -print"), shell=True).decode().splitlines(): + livePath = basePath[:-len('.rpmbase')] + if livePath not in filesDone: + print('-- Deleting %s.' % basePath) + os.unlink(basePath) + + # "sync vs. merge" terminology is inspired by Perforce. We'll want a + # better term for "sync" before releasing this to the public. + if syncOnly: + print('rpmconf sync complete.') + return + else: + print('rpmconf sync complete. You can interrupt if you don\'t wish to merge now.') + + print('Scanning for rpmsave files.') + for savePath in subprocess.check_output(makeConfFindCommand("-type f -name '*.rpmsave-*' -print"), shell=True).decode().splitlines(): + m = re.search('^(.*)\.rpmsave-(\d+)$', savePath) + if not m: continue + (livePath, stamp) = (m.group(1), int(m.group(2))) + filesToMerge[livePath][0].append(stamp) + + # Nested function for the ability to return from a nested loop... + def mergeFile(livePath): + print('- Merging %s.' % livePath) + (saveStamps, haveRpmnew) = filesToMerge[livePath] + saveStamps.sort() # mutates the original, that's OK + # TODO: If a package was uninstalled, we could have an rpmsave and rpmsave-base with no live. + # We want to alert the user that the configuration change is no longer having an effect. How? + # Currently kdiff3 comes up with a bunch of error dialogs and I have to manually intervene. + for i in range(len(saveStamps)): + c_output = '%s.rpmsave-%d' % (livePath, saveStamps[i+1]) if i+1 < len(saveStamps) else livePath + b = '%s.rpmsave-%d' % (livePath, saveStamps[i]) + a = b + '-base' + try: + doMerge(a, b, c_output, c_output) + except subprocess.CalledProcessError: + print('- Leaving %s merge unfinished.' % livePath) + return 1 + os.unlink(b) + if os.path.lexists(a): os.unlink(a) + if haveRpmnew: + try: + doMerge(livePath + '.rpmbase', livePath, livePath + '.rpmnew', livePath) + except subprocess.CalledProcessError: + print('- Leaving %s merge unfinished.' % livePath) + return 1 + os.rename(livePath + '.rpmnew', livePath + '.rpmbase') + print('- Merged %s.' % livePath) + return 0 + + unfinishedFiles = 0 + for livePath in filesToMerge: + unfinishedFiles += mergeFile(livePath) + + if unfinishedFiles == 0: + print('rpmconf merge complete!') + else: + print('No more files to consider. %d files left unfinished.' % unfinishedFiles) + +if __name__ == '__main__': + # TODO: Adopt a real option-parsing library. + args = sys.argv[1:] + if args == []: + rpmconf() + elif args == ['--sync-only']: + rpmconf(syncOnly=True) + else: + print('Unrecognized arguments.', file=sys.stderr) + sys.exit(1)