| 1 | #!/usr/bin/env python3 |
| 2 | # rpmconf-matt [--sync-only] |
| 3 | |
| 4 | # Known blockers to submission to Fedora: Basically removing assumptions specific to my setup. |
| 5 | # - Decide how to name it vs. existing "rpmconf" in Fedora |
| 6 | # - Make it configurable what part of the filesystem to scan. Is there a |
| 7 | # default that's sufficiently safe for people to use on systems with untrusted |
| 8 | # users? |
| 9 | # - Should save its own files in another tree rather than blacklisting |
| 10 | # directories known to cause problems if it adds its files there. |
| 11 | # - Make diff program configurable / smarter default |
| 12 | |
| 13 | # TODO: Move the "sync" part into a dnf plugin that runs after every transaction |
| 14 | # to make it more robust. Perhaps there can be an option to run the merge |
| 15 | # interactively. With tighter integration, we might be able to skip scanning the |
| 16 | # whole filesystem, but that would add one more kind of state that can get wrong. |
| 17 | |
| 18 | # Python 3 conversion, 2017-11-12 |
| 19 | # One could argue we should treat filenames and RPM names as bytes, but it's too |
| 20 | # much of a pain to fix all the resulting fallout now, and we don't expect |
| 21 | # adversarial names anyway. ~ Matt 2017-11-12 |
| 22 | # |
| 23 | # Fedora 30 -> 32 upgrade: remove some .decode() calls (guess rpm-python changed |
| 24 | # some things from bytes to str) ~ Matt 2020-06-01 |
| 25 | |
| 26 | import collections |
| 27 | import hashlib |
| 28 | import os |
| 29 | import pwd, grp |
| 30 | import re |
| 31 | import shutil |
| 32 | import stat |
| 33 | import subprocess |
| 34 | import sys |
| 35 | import tempfile |
| 36 | import time |
| 37 | |
| 38 | import rpm |
| 39 | |
| 40 | def getFileDigest(path, algoHint): |
| 41 | # TODO: Get the digest algorithm from RPM?? |
| 42 | # File a bug to have rpmfiDigestAlgo exposed to Python? |
| 43 | if len(algoHint) == 32: # adobe packages |
| 44 | algo = hashlib.md5 |
| 45 | elif len(algoHint) == 64: |
| 46 | algo = hashlib.sha256 |
| 47 | else: |
| 48 | raise ValueError('Failed to guess digest algorithm') |
| 49 | with open(path, 'rb') as f: |
| 50 | return algo(f.read()).hexdigest() |
| 51 | |
| 52 | def setAttributes(fe, path): |
| 53 | # C.f. https://github.com/rpm-software-management/rpm/blob/rpm-4.13.0-rc1/lib/fsm.c#L713 |
| 54 | # If user or group is not found, we'll get a KeyError. |
| 55 | os.chown(path, pwd.getpwnam(fe.user).pw_uid, grp.getgrnam(fe.group).gr_gid) |
| 56 | os.chmod(path, fe.mode & 0o7777) |
| 57 | os.utime(path, (fe.mtime, fe.mtime)) |
| 58 | |
| 59 | def makeConfFindCommand(expr): |
| 60 | # Make a find command intended to catch any files ever managed by RPM, |
| 61 | # without getting confused by trees such as /proc |
| 62 | # or even malicious user-created files. Unfortunately, we can't just ask RPM |
| 63 | # which files it previously managed. The following heuristic should work for |
| 64 | # ml*: root filesystem (and avoid an error on /mnt/root, which -xdev doesn't |
| 65 | # catch), and exclude world-writable dirs such as /var/tmp. |
| 66 | # TODO: Make the search criteria more general. |
| 67 | return r"find / -xdev \( -path /mnt -or -path /var/www/html -or -perm /002 \) -prune -or " + expr |
| 68 | |
| 69 | def doMerge(a, b, c, output): |
| 70 | # TODO: Make diff program customizable. |
| 71 | # FIXME: Stop leaving ".orig" files around. At least name them distinctively. |
| 72 | # kdiff3 wrapper to work around issue with Qt apps running as root under Qubes ~ 2015-08-20 |
| 73 | args = ['rpmconf-matt-merge', output, a if os.path.lexists(a) else '', b, c] |
| 74 | subprocess.check_call(args) |
| 75 | |
| 76 | class NeededPackage(object): |
| 77 | def __init__(self, header): |
| 78 | self.nvra = header.nvra |
| 79 | self.paths = dict() # live path -> (digest, path to download to) |
| 80 | |
| 81 | def rpmconf(syncOnly=False): |
| 82 | if os.geteuid() != 0: |
| 83 | print('This tool needs to run as root.', file=sys.stderr) |
| 84 | sys.exit(1) |
| 85 | |
| 86 | # First, rename any rpmsave files and corresponding rpmbase files before we |
| 87 | # would overwrite the rpmbase files. We'll do this to any file type, even |
| 88 | # though there will only ever be base files for regular files. |
| 89 | print('Scanning for rpmsave files that need to be stamped.') |
| 90 | saveStamp = str(int(time.time())) |
| 91 | for savePath in subprocess.check_output(makeConfFindCommand("-name '*.rpmsave' -print"), shell=True).decode().splitlines(): |
| 92 | livePath = savePath[:-len('.rpmsave')] |
| 93 | liveBasePath = livePath + '.rpmbase' |
| 94 | stampedSavePath = livePath + '.rpmsave-' + saveStamp |
| 95 | stampedSaveBasePath = stampedSavePath + '-base' |
| 96 | print('-- Timestamping rpmsave file for %s.' % livePath) |
| 97 | # XXX: Make sure we are not clobbering existing files? |
| 98 | os.rename(savePath, stampedSavePath) |
| 99 | if os.path.lexists(liveBasePath): |
| 100 | os.rename(liveBasePath, stampedSaveBasePath) |
| 101 | |
| 102 | filesToMerge = collections.defaultdict(lambda: [[], False]) # live path -> (list of rpmsave stamps, bool if rpmnew) |
| 103 | |
| 104 | print('Scanning for config files that need base files created.') |
| 105 | filesDone = {} # live path -> (nevra, digest) |
| 106 | needPackages = {} # nevra -> NeededPackage; no more defaultdict because NeededPackage needs header |
| 107 | ts = rpm.ts() |
| 108 | mi = ts.dbMatch() |
| 109 | for header in mi: |
| 110 | nevra = header.nevra |
| 111 | for fe in rpm.files(header): |
| 112 | # Only installed config files. |
| 113 | if fe.state != rpm.RPMFILE_STATE_NORMAL: continue |
| 114 | if not (fe.fflags & rpm.RPMFILE_CONFIG): continue |
| 115 | # For a ghost, we have no base content to write. Probably best to let this be a two-way merge if the file becomes non-ghost later. |
| 116 | if fe.fflags & rpm.RPMFILE_GHOST: continue |
| 117 | # For now, we only handle regular files. Conflicts on config symlinks seem to be rare. |
| 118 | if not stat.S_ISREG(fe.mode): continue |
| 119 | if fe.caps != '': raise NotImplementedError('File capabilities are not implemented: %s' % fe.name) |
| 120 | |
| 121 | # Extension point directories whose readers can't handle additional *.rpm* files. |
| 122 | # /etc/skel/ is not actually causing a problem but leads to ugly persistent state. |
| 123 | # TODO: Find a better workaround. |
| 124 | if re.search('^(/etc/skel/|/etc/rpm/macros|/etc/logrotate.d/|/etc/grub.d/)', fe.name): |
| 125 | continue |
| 126 | |
| 127 | # We need this check to avoid thrashing a conflicted base file. |
| 128 | if fe.name in filesDone: |
| 129 | (oldNevra, oldDigest) = filesDone[fe.name] |
| 130 | if fe.digest != oldDigest: |
| 131 | print('Conflict at %s: have %s from %s, ignoring %s from %s' % |
| 132 | (fe.name, oldDigest, oldNevra, fe.digest, nevra), |
| 133 | file=sys.stderr) |
| 134 | continue |
| 135 | filesDone[fe.name] = (nevra, fe.digest) |
| 136 | |
| 137 | path_new = fe.name + '.rpmnew' |
| 138 | download_path = None |
| 139 | if os.path.lexists(path_new): |
| 140 | filesToMerge[fe.name][1] = True |
| 141 | # The live config file is not based on the current DB entry. |
| 142 | # Hopefully we already have a base for the live config file; if not, there's nothing we can do about it now. |
| 143 | # We do want to make sure the rpmnew file is correct. |
| 144 | if getFileDigest(path_new, fe.digest) != fe.digest: |
| 145 | download_path = path_new |
| 146 | else: |
| 147 | path_base = fe.name + '.rpmbase' |
| 148 | if not (os.path.lexists(path_base) and getFileDigest(path_base, fe.digest) == fe.digest): |
| 149 | if (os.path.lexists(fe.name) and getFileDigest(fe.name, fe.digest) == fe.digest): |
| 150 | # The live file has the original content. |
| 151 | # Copy the content and set the original attributes manually. |
| 152 | path_tmp = fe.name + '.rpmbase-tmp' |
| 153 | shutil.copyfile(fe.name, path_tmp) |
| 154 | setAttributes(fe, path_tmp) |
| 155 | os.rename(path_tmp, path_base) |
| 156 | print('- %s: Copied %s from %s.' % (nevra, path_base, fe.name)) |
| 157 | else: |
| 158 | download_path = path_base |
| 159 | if download_path: |
| 160 | if nevra not in needPackages: |
| 161 | needPackages[nevra] = NeededPackage(header) |
| 162 | needPackages[nevra].paths[fe.name] = (fe.digest, download_path) |
| 163 | print('- %s: Need to download %s.' % (nevra, download_path)) |
| 164 | |
| 165 | if needPackages: |
| 166 | print('Downloading %d packages.' % len(needPackages)) |
| 167 | packages_tmpdir = tempfile.mkdtemp(prefix='rpmconf-packages') |
| 168 | # Make sure the cpio archive is covered by a valid signature |
| 169 | # before we use it. Since dnf-download-signed currently doesn't |
| 170 | # check that the package is the one we asked for, this only |
| 171 | # ensures that the cpio archive is safe to extract. Then we |
| 172 | # check the digest on each needed file before using it. We're |
| 173 | # still correct if an attacker substitutes a different signed |
| 174 | # package in which the files we need have the same content. |
| 175 | # ~ Matt 2019-05-18 |
| 176 | # |
| 177 | # Ideally, we'd only require a signature if the package came |
| 178 | # from a repository with gpgcheck=1. Right now, I use no |
| 179 | # unsigned packages. If I build my own packages again, I can |
| 180 | # either sign them or just fix them manually if they reach this |
| 181 | # code. |
| 182 | # ~ Matt 2017-11-11 |
| 183 | subprocess.check_call(['dnf-download-signed'] + list(needPackages), cwd=packages_tmpdir) |
| 184 | for nevra, neededPkg in needPackages.items(): |
| 185 | packagePath = '%s/%s.rpm' % (packages_tmpdir, neededPkg.nvra) |
| 186 | extract_tmpdir = tempfile.mkdtemp(prefix='rpmconf-extract-%s' % nevra) |
| 187 | cpioNeedPaths = ['.' + p for p in neededPkg.paths] # go figure |
| 188 | subprocess.check_call(['/bin/bash', '-c', 'p="$1"; shift; rpm2cpio "$p" | cpio --extract --quiet --preserve-modification-time --make-directories "$@"', '--', packagePath] + cpioNeedPaths, cwd=extract_tmpdir) |
| 189 | print('- Extracted %s.' % nevra) |
| 190 | for livePath, (needDigest, downloadPath) in neededPkg.paths.items(): |
| 191 | tmpPath = extract_tmpdir + livePath |
| 192 | tmpDigest = getFileDigest(tmpPath, needDigest) |
| 193 | if tmpDigest != needDigest: |
| 194 | print('%s: got digest %s, wanted %s' % (livePath, tmpDigest, needDigest), file=sys.stderr) |
| 195 | continue |
| 196 | # This is easiest in case it is cross-filesystem, etc. mv should preserve all attributes. |
| 197 | subprocess.check_call(['mv', '-f', tmpPath, downloadPath]) |
| 198 | print('-- Installed %s.' % downloadPath) |
| 199 | shutil.rmtree(extract_tmpdir) |
| 200 | shutil.rmtree(packages_tmpdir) |
| 201 | |
| 202 | print('Scanning for obsolete rpmnew files.') |
| 203 | for newPath in subprocess.check_output(makeConfFindCommand("-type f -name '*.rpmnew' -print"), shell=True).decode().splitlines(): |
| 204 | livePath = newPath[:-len('.rpmnew')] |
| 205 | if livePath not in filesToMerge: # only rpmnew files will be recorded in filesToMerge yet |
| 206 | print('-- Deleting %s. UNTESTED' % newPath) |
| 207 | #os.unlink(newPath) |
| 208 | |
| 209 | # Remove rpmbase files for config files that are no longer managed, to not leave cruft. |
| 210 | # This intentionally does not remove rpmsave-base files. ~ 2014-07-03 |
| 211 | # Note: If the config file had been modified, RPM would move it to |
| 212 | # rpmsave on package removal (whether or not it was noreplace), so we'd |
| 213 | # stamp the base file before we get here. ~ 2017-11-12 |
| 214 | print('Scanning for obsolete rpmbase files.') |
| 215 | for basePath in subprocess.check_output(makeConfFindCommand("-type f -name '*.rpmbase' -print"), shell=True).decode().splitlines(): |
| 216 | livePath = basePath[:-len('.rpmbase')] |
| 217 | if livePath not in filesDone: |
| 218 | print('-- Deleting %s.' % basePath) |
| 219 | os.unlink(basePath) |
| 220 | |
| 221 | # "sync vs. merge" terminology is inspired by Perforce. We'll want a |
| 222 | # better term for "sync" before releasing this to the public. |
| 223 | if syncOnly: |
| 224 | print('rpmconf sync complete.') |
| 225 | return |
| 226 | else: |
| 227 | print('rpmconf sync complete. You can interrupt if you don\'t wish to merge now.') |
| 228 | |
| 229 | print('Scanning for rpmsave files.') |
| 230 | for savePath in subprocess.check_output(makeConfFindCommand("-type f -name '*.rpmsave-*' -print"), shell=True).decode().splitlines(): |
| 231 | m = re.search('^(.*)\.rpmsave-(\d+)$', savePath) |
| 232 | if not m: continue |
| 233 | (livePath, stamp) = (m.group(1), int(m.group(2))) |
| 234 | filesToMerge[livePath][0].append(stamp) |
| 235 | |
| 236 | # Nested function for the ability to return from a nested loop... |
| 237 | def mergeFile(livePath): |
| 238 | print('- Merging %s.' % livePath) |
| 239 | (saveStamps, haveRpmnew) = filesToMerge[livePath] |
| 240 | saveStamps.sort() # mutates the original, that's OK |
| 241 | # TODO: If a package was uninstalled, we could have an rpmsave and rpmsave-base with no live. |
| 242 | # We want to alert the user that the configuration change is no longer having an effect. How? |
| 243 | # Currently kdiff3 comes up with a bunch of error dialogs and I have to manually intervene. |
| 244 | for i in range(len(saveStamps)): |
| 245 | c_output = '%s.rpmsave-%d' % (livePath, saveStamps[i+1]) if i+1 < len(saveStamps) else livePath |
| 246 | b = '%s.rpmsave-%d' % (livePath, saveStamps[i]) |
| 247 | a = b + '-base' |
| 248 | try: |
| 249 | doMerge(a, b, c_output, c_output) |
| 250 | except subprocess.CalledProcessError: |
| 251 | print('- Leaving %s merge unfinished.' % livePath) |
| 252 | return 1 |
| 253 | os.unlink(b) |
| 254 | if os.path.lexists(a): os.unlink(a) |
| 255 | if haveRpmnew: |
| 256 | try: |
| 257 | doMerge(livePath + '.rpmbase', livePath, livePath + '.rpmnew', livePath) |
| 258 | except subprocess.CalledProcessError: |
| 259 | print('- Leaving %s merge unfinished.' % livePath) |
| 260 | return 1 |
| 261 | os.rename(livePath + '.rpmnew', livePath + '.rpmbase') |
| 262 | print('- Merged %s.' % livePath) |
| 263 | return 0 |
| 264 | |
| 265 | unfinishedFiles = 0 |
| 266 | for livePath in filesToMerge: |
| 267 | unfinishedFiles += mergeFile(livePath) |
| 268 | |
| 269 | if unfinishedFiles == 0: |
| 270 | print('rpmconf merge complete!') |
| 271 | else: |
| 272 | print('No more files to consider. %d files left unfinished.' % unfinishedFiles) |
| 273 | |
| 274 | if __name__ == '__main__': |
| 275 | # TODO: Adopt a real option-parsing library. |
| 276 | args = sys.argv[1:] |
| 277 | if args == []: |
| 278 | rpmconf() |
| 279 | elif args == ['--sync-only']: |
| 280 | rpmconf(syncOnly=True) |
| 281 | else: |
| 282 | print('Unrecognized arguments.', file=sys.stderr) |
| 283 | sys.exit(1) |