Import the remaining utilities.
[utils/utils.git] / rpmconf-matt
diff --git a/rpmconf-matt b/rpmconf-matt
new file mode 100755 (executable)
index 0000000..a24f736
--- /dev/null
@@ -0,0 +1,283 @@
+#!/usr/bin/env python3
+# rpmconf-matt [--sync-only]
+
+# Known blockers to submission to Fedora: Basically removing assumptions specific to my setup.
+# - Decide how to name it vs. existing "rpmconf" in Fedora
+# - Make it configurable what part of the filesystem to scan.  Is there a
+#   default that's sufficiently safe for people to use on systems with untrusted
+#   users?
+# - Should save its own files in another tree rather than blacklisting
+#   directories known to cause problems if it adds its files there.
+# - Make diff program configurable / smarter default
+
+# TODO: Move the "sync" part into a dnf plugin that runs after every transaction
+# to make it more robust.  Perhaps there can be an option to run the merge
+# interactively.  With tighter integration, we might be able to skip scanning the
+# whole filesystem, but that would add one more kind of state that can get wrong.
+
+# Python 3 conversion, 2017-11-12
+# One could argue we should treat filenames and RPM names as bytes, but it's too
+# much of a pain to fix all the resulting fallout now, and we don't expect
+# adversarial names anyway. ~ Matt 2017-11-12
+#
+# Fedora 30 -> 32 upgrade: remove some .decode() calls (guess rpm-python changed
+# some things from bytes to str) ~ Matt 2020-06-01
+
+import collections
+import hashlib
+import os
+import pwd, grp
+import re
+import shutil
+import stat
+import subprocess
+import sys
+import tempfile
+import time
+
+import rpm
+
+def getFileDigest(path, algoHint):
+       # TODO: Get the digest algorithm from RPM??
+       # File a bug to have rpmfiDigestAlgo exposed to Python?
+       if len(algoHint) == 32:  # adobe packages
+               algo = hashlib.md5
+       elif len(algoHint) == 64:
+               algo = hashlib.sha256
+       else:
+               raise ValueError('Failed to guess digest algorithm')
+       with open(path, 'rb') as f:
+               return algo(f.read()).hexdigest()
+
+def setAttributes(fe, path):
+  # C.f. https://github.com/rpm-software-management/rpm/blob/rpm-4.13.0-rc1/lib/fsm.c#L713
+       # If user or group is not found, we'll get a KeyError.
+       os.chown(path, pwd.getpwnam(fe.user).pw_uid, grp.getgrnam(fe.group).gr_gid)
+       os.chmod(path, fe.mode & 0o7777)
+       os.utime(path, (fe.mtime, fe.mtime))
+
+def makeConfFindCommand(expr):
+       # Make a find command intended to catch any files ever managed by RPM,
+       # without getting confused by trees such as /proc
+       # or even malicious user-created files.  Unfortunately, we can't just ask RPM
+       # which files it previously managed.  The following heuristic should work for
+       # ml*: root filesystem (and avoid an error on /mnt/root, which -xdev doesn't
+       # catch), and exclude world-writable dirs such as /var/tmp.
+       # TODO: Make the search criteria more general.
+       return r"find / -xdev \( -path /mnt -or -path /var/www/html -or -perm /002 \) -prune -or " + expr
+
+def doMerge(a, b, c, output):
+       # TODO: Make diff program customizable.
+       # FIXME: Stop leaving ".orig" files around.  At least name them distinctively.
+       # kdiff3 wrapper to work around issue with Qt apps running as root under Qubes ~ 2015-08-20
+       args = ['rpmconf-matt-merge', output, a if os.path.lexists(a) else '', b, c]
+       subprocess.check_call(args)
+
+class NeededPackage(object):
+       def __init__(self, header):
+               self.nvra = header.nvra
+               self.paths = dict()  # live path -> (digest, path to download to)
+
+def rpmconf(syncOnly=False):
+       if os.geteuid() != 0:
+               print('This tool needs to run as root.', file=sys.stderr)
+               sys.exit(1)
+
+       # First, rename any rpmsave files and corresponding rpmbase files before we
+       # would overwrite the rpmbase files.  We'll do this to any file type, even
+       # though there will only ever be base files for regular files.
+       print('Scanning for rpmsave files that need to be stamped.')
+       saveStamp = str(int(time.time()))
+       for savePath in subprocess.check_output(makeConfFindCommand("-name '*.rpmsave' -print"), shell=True).decode().splitlines():
+               livePath = savePath[:-len('.rpmsave')]
+               liveBasePath = livePath + '.rpmbase'
+               stampedSavePath = livePath + '.rpmsave-' + saveStamp
+               stampedSaveBasePath = stampedSavePath + '-base'
+               print('-- Timestamping rpmsave file for %s.' % livePath)
+               # XXX: Make sure we are not clobbering existing files?
+               os.rename(savePath, stampedSavePath)
+               if os.path.lexists(liveBasePath):
+                       os.rename(liveBasePath, stampedSaveBasePath)
+
+       filesToMerge = collections.defaultdict(lambda: [[], False])  # live path -> (list of rpmsave stamps, bool if rpmnew)
+
+       print('Scanning for config files that need base files created.')
+       filesDone = {}  # live path -> (nevra, digest)
+       needPackages = {}  # nevra -> NeededPackage; no more defaultdict because NeededPackage needs header
+       ts = rpm.ts()
+       mi = ts.dbMatch()
+       for header in mi:
+               nevra = header.nevra
+               for fe in rpm.files(header):
+                       # Only installed config files.
+                       if fe.state != rpm.RPMFILE_STATE_NORMAL: continue
+                       if not (fe.fflags & rpm.RPMFILE_CONFIG): continue
+                       # For a ghost, we have no base content to write.  Probably best to let this be a two-way merge if the file becomes non-ghost later.
+                       if fe.fflags & rpm.RPMFILE_GHOST: continue
+                       # For now, we only handle regular files.  Conflicts on config symlinks seem to be rare.
+                       if not stat.S_ISREG(fe.mode): continue
+                       if fe.caps != '': raise NotImplementedError('File capabilities are not implemented: %s' % fe.name)
+
+                       # Extension point directories whose readers can't handle additional *.rpm* files.
+                       # /etc/skel/ is not actually causing a problem but leads to ugly persistent state.
+                       # TODO: Find a better workaround.
+                       if re.search('^(/etc/skel/|/etc/rpm/macros|/etc/logrotate.d/|/etc/grub.d/)', fe.name):
+                               continue
+
+                       # We need this check to avoid thrashing a conflicted base file.
+                       if fe.name in filesDone:
+                               (oldNevra, oldDigest) = filesDone[fe.name]
+                               if fe.digest != oldDigest:
+                                       print('Conflict at %s: have %s from %s, ignoring %s from %s' %
+                                               (fe.name, oldDigest, oldNevra, fe.digest, nevra),
+                                               file=sys.stderr)
+                               continue
+                       filesDone[fe.name] = (nevra, fe.digest)
+
+                       path_new = fe.name + '.rpmnew'
+                       download_path = None
+                       if os.path.lexists(path_new):
+                               filesToMerge[fe.name][1] = True
+                               # The live config file is not based on the current DB entry.
+                               # Hopefully we already have a base for the live config file; if not, there's nothing we can do about it now.
+                               # We do want to make sure the rpmnew file is correct.
+                               if getFileDigest(path_new, fe.digest) != fe.digest:
+                                       download_path = path_new
+                       else:
+                               path_base = fe.name + '.rpmbase'
+                               if not (os.path.lexists(path_base) and getFileDigest(path_base, fe.digest) == fe.digest):
+                                       if (os.path.lexists(fe.name) and getFileDigest(fe.name, fe.digest) == fe.digest):
+                                               # The live file has the original content.
+                                               # Copy the content and set the original attributes manually.
+                                               path_tmp = fe.name + '.rpmbase-tmp'
+                                               shutil.copyfile(fe.name, path_tmp)
+                                               setAttributes(fe, path_tmp)
+                                               os.rename(path_tmp, path_base)
+                                               print('- %s: Copied %s from %s.' % (nevra, path_base, fe.name))
+                                       else:
+                                               download_path = path_base
+                       if download_path:
+                               if nevra not in needPackages:
+                                       needPackages[nevra] = NeededPackage(header)
+                               needPackages[nevra].paths[fe.name] = (fe.digest, download_path)
+                               print('- %s: Need to download %s.' % (nevra, download_path))
+
+       if needPackages:
+               print('Downloading %d packages.' % len(needPackages))
+               packages_tmpdir = tempfile.mkdtemp(prefix='rpmconf-packages')
+               # Make sure the cpio archive is covered by a valid signature
+               # before we use it.  Since dnf-download-signed currently doesn't
+               # check that the package is the one we asked for, this only
+               # ensures that the cpio archive is safe to extract.  Then we
+               # check the digest on each needed file before using it.  We're
+               # still correct if an attacker substitutes a different signed
+               # package in which the files we need have the same content.
+               # ~ Matt 2019-05-18
+               #
+               # Ideally, we'd only require a signature if the package came
+               # from a repository with gpgcheck=1.  Right now, I use no
+               # unsigned packages.  If I build my own packages again, I can
+               # either sign them or just fix them manually if they reach this
+               # code.
+               # ~ Matt 2017-11-11
+               subprocess.check_call(['dnf-download-signed'] + list(needPackages), cwd=packages_tmpdir)
+               for nevra, neededPkg in needPackages.items():
+                       packagePath = '%s/%s.rpm' % (packages_tmpdir, neededPkg.nvra)
+                       extract_tmpdir = tempfile.mkdtemp(prefix='rpmconf-extract-%s' % nevra)
+                       cpioNeedPaths = ['.' + p for p in neededPkg.paths]  # go figure
+                       subprocess.check_call(['/bin/bash', '-c', 'p="$1"; shift; rpm2cpio "$p" | cpio --extract --quiet --preserve-modification-time --make-directories "$@"', '--', packagePath] + cpioNeedPaths, cwd=extract_tmpdir)
+                       print('- Extracted %s.' % nevra)
+                       for livePath, (needDigest, downloadPath) in neededPkg.paths.items():
+                               tmpPath = extract_tmpdir + livePath
+                               tmpDigest = getFileDigest(tmpPath, needDigest)
+                               if tmpDigest != needDigest:
+                                       print('%s: got digest %s, wanted %s' % (livePath, tmpDigest, needDigest), file=sys.stderr)
+                                       continue
+                               # This is easiest in case it is cross-filesystem, etc.  mv should preserve all attributes.
+                               subprocess.check_call(['mv', '-f', tmpPath, downloadPath])
+                               print('-- Installed %s.' % downloadPath)
+                       shutil.rmtree(extract_tmpdir)
+               shutil.rmtree(packages_tmpdir)
+
+       print('Scanning for obsolete rpmnew files.')
+       for newPath in subprocess.check_output(makeConfFindCommand("-type f -name '*.rpmnew' -print"), shell=True).decode().splitlines():
+               livePath = newPath[:-len('.rpmnew')]
+               if livePath not in filesToMerge:  # only rpmnew files will be recorded in filesToMerge yet
+                       print('-- Deleting %s. UNTESTED' % newPath)
+                       #os.unlink(newPath)
+
+       # Remove rpmbase files for config files that are no longer managed, to not leave cruft.
+       # This intentionally does not remove rpmsave-base files. ~ 2014-07-03
+       # Note: If the config file had been modified, RPM would move it to
+       # rpmsave on package removal (whether or not it was noreplace), so we'd
+       # stamp the base file before we get here. ~ 2017-11-12
+       print('Scanning for obsolete rpmbase files.')
+       for basePath in subprocess.check_output(makeConfFindCommand("-type f -name '*.rpmbase' -print"), shell=True).decode().splitlines():
+               livePath = basePath[:-len('.rpmbase')]
+               if livePath not in filesDone:
+                       print('-- Deleting %s.' % basePath)
+                       os.unlink(basePath)
+
+       # "sync vs. merge" terminology is inspired by Perforce.  We'll want a
+       # better term for "sync" before releasing this to the public.
+       if syncOnly:
+               print('rpmconf sync complete.')
+               return
+       else:
+               print('rpmconf sync complete.  You can interrupt if you don\'t wish to merge now.')
+
+       print('Scanning for rpmsave files.')
+       for savePath in subprocess.check_output(makeConfFindCommand("-type f -name '*.rpmsave-*' -print"), shell=True).decode().splitlines():
+               m = re.search('^(.*)\.rpmsave-(\d+)$', savePath)
+               if not m: continue
+               (livePath, stamp) = (m.group(1), int(m.group(2)))
+               filesToMerge[livePath][0].append(stamp)
+
+       # Nested function for the ability to return from a nested loop...
+       def mergeFile(livePath):
+               print('- Merging %s.' % livePath)
+               (saveStamps, haveRpmnew) = filesToMerge[livePath]
+               saveStamps.sort()  # mutates the original, that's OK
+               # TODO: If a package was uninstalled, we could have an rpmsave and rpmsave-base with no live.
+               # We want to alert the user that the configuration change is no longer having an effect.  How?
+               # Currently kdiff3 comes up with a bunch of error dialogs and I have to manually intervene.
+               for i in range(len(saveStamps)):
+                       c_output = '%s.rpmsave-%d' % (livePath, saveStamps[i+1]) if i+1 < len(saveStamps) else livePath
+                       b = '%s.rpmsave-%d' % (livePath, saveStamps[i])
+                       a = b + '-base'
+                       try:
+                               doMerge(a, b, c_output, c_output)
+                       except subprocess.CalledProcessError:
+                               print('- Leaving %s merge unfinished.' % livePath)
+                               return 1
+                       os.unlink(b)
+                       if os.path.lexists(a): os.unlink(a)
+               if haveRpmnew:
+                       try:
+                               doMerge(livePath + '.rpmbase', livePath, livePath + '.rpmnew', livePath)
+                       except subprocess.CalledProcessError:
+                               print('- Leaving %s merge unfinished.' % livePath)
+                               return 1
+                       os.rename(livePath + '.rpmnew', livePath + '.rpmbase')
+               print('- Merged %s.' % livePath)
+               return 0
+
+       unfinishedFiles = 0
+       for livePath in filesToMerge:
+               unfinishedFiles += mergeFile(livePath)
+
+       if unfinishedFiles == 0:
+               print('rpmconf merge complete!')
+       else:
+               print('No more files to consider.  %d files left unfinished.' % unfinishedFiles)
+
+if __name__ == '__main__':
+       # TODO: Adopt a real option-parsing library.
+       args = sys.argv[1:]
+       if args == []:
+               rpmconf()
+       elif args == ['--sync-only']:
+               rpmconf(syncOnly=True)
+       else:
+               print('Unrecognized arguments.', file=sys.stderr)
+               sys.exit(1)