web-logs/Makefile: Use only the previous day's logs to avoid overlap.
[utils/utils.git] / rpmconf-matt
CommitLineData
273c3903
MM
1#!/usr/bin/env python3
2# rpmconf-matt [--sync-only]
3
751c2a60
MM
4# SECURITY NOTICE: If your system is missing the original version (.rpmbase) of
5# a configuration file you have modified, rpmconf-matt will automatically get it
4f0f2877
MM
6# by downloading the original package with "dnf download", which does not
7# check the package's GPG signature even if dnf is normally configured to do so
8# (https://bugzilla.redhat.com/show_bug.cgi?id=1879791). rpmconf-matt checks the digest of
751c2a60
MM
9# the file before using it, but a fuzzed package might be able to compromise
10# your system during extraction. This is less of a problem if your repository
11# metadata is integrity protected (e.g., by SSL on the metalink or repomd)
4f0f2877
MM
12# because "dnf download" refuses to save a package whose digest does not match
13# the metadata.
751c2a60 14
273c3903
MM
15# Known blockers to submission to Fedora: Basically removing assumptions specific to my setup.
16# - Decide how to name it vs. existing "rpmconf" in Fedora
17# - Make it configurable what part of the filesystem to scan. Is there a
18# default that's sufficiently safe for people to use on systems with untrusted
19# users?
20# - Should save its own files in another tree rather than blacklisting
21# directories known to cause problems if it adds its files there.
22# - Make diff program configurable / smarter default
23
24# TODO: Move the "sync" part into a dnf plugin that runs after every transaction
25# to make it more robust. Perhaps there can be an option to run the merge
26# interactively. With tighter integration, we might be able to skip scanning the
27# whole filesystem, but that would add one more kind of state that can get wrong.
28
29# Python 3 conversion, 2017-11-12
30# One could argue we should treat filenames and RPM names as bytes, but it's too
31# much of a pain to fix all the resulting fallout now, and we don't expect
32# adversarial names anyway. ~ Matt 2017-11-12
33#
34# Fedora 30 -> 32 upgrade: remove some .decode() calls (guess rpm-python changed
35# some things from bytes to str) ~ Matt 2020-06-01
36
37import collections
38import hashlib
39import os
40import pwd, grp
41import re
42import shutil
43import stat
44import subprocess
45import sys
46import tempfile
47import time
48
49import rpm
50
51def getFileDigest(path, algoHint):
52 # TODO: Get the digest algorithm from RPM??
53 # File a bug to have rpmfiDigestAlgo exposed to Python?
54 if len(algoHint) == 32: # adobe packages
55 algo = hashlib.md5
56 elif len(algoHint) == 64:
57 algo = hashlib.sha256
58 else:
59 raise ValueError('Failed to guess digest algorithm')
60 with open(path, 'rb') as f:
61 return algo(f.read()).hexdigest()
62
63def setAttributes(fe, path):
64 # C.f. https://github.com/rpm-software-management/rpm/blob/rpm-4.13.0-rc1/lib/fsm.c#L713
65 # If user or group is not found, we'll get a KeyError.
66 os.chown(path, pwd.getpwnam(fe.user).pw_uid, grp.getgrnam(fe.group).gr_gid)
67 os.chmod(path, fe.mode & 0o7777)
68 os.utime(path, (fe.mtime, fe.mtime))
69
70def makeConfFindCommand(expr):
71 # Make a find command intended to catch any files ever managed by RPM,
72 # without getting confused by trees such as /proc
73 # or even malicious user-created files. Unfortunately, we can't just ask RPM
74 # which files it previously managed. The following heuristic should work for
75 # ml*: root filesystem (and avoid an error on /mnt/root, which -xdev doesn't
76 # catch), and exclude world-writable dirs such as /var/tmp.
77 # TODO: Make the search criteria more general.
78 return r"find / -xdev \( -path /mnt -or -path /var/www/html -or -perm /002 \) -prune -or " + expr
79
80def doMerge(a, b, c, output):
81 # TODO: Make diff program customizable.
82 # FIXME: Stop leaving ".orig" files around. At least name them distinctively.
83 # kdiff3 wrapper to work around issue with Qt apps running as root under Qubes ~ 2015-08-20
84 args = ['rpmconf-matt-merge', output, a if os.path.lexists(a) else '', b, c]
85 subprocess.check_call(args)
86
87class NeededPackage(object):
88 def __init__(self, header):
89 self.nvra = header.nvra
90 self.paths = dict() # live path -> (digest, path to download to)
91
92def rpmconf(syncOnly=False):
93 if os.geteuid() != 0:
94 print('This tool needs to run as root.', file=sys.stderr)
95 sys.exit(1)
96
97 # First, rename any rpmsave files and corresponding rpmbase files before we
98 # would overwrite the rpmbase files. We'll do this to any file type, even
99 # though there will only ever be base files for regular files.
100 print('Scanning for rpmsave files that need to be stamped.')
101 saveStamp = str(int(time.time()))
102 for savePath in subprocess.check_output(makeConfFindCommand("-name '*.rpmsave' -print"), shell=True).decode().splitlines():
103 livePath = savePath[:-len('.rpmsave')]
104 liveBasePath = livePath + '.rpmbase'
105 stampedSavePath = livePath + '.rpmsave-' + saveStamp
106 stampedSaveBasePath = stampedSavePath + '-base'
107 print('-- Timestamping rpmsave file for %s.' % livePath)
108 # XXX: Make sure we are not clobbering existing files?
109 os.rename(savePath, stampedSavePath)
110 if os.path.lexists(liveBasePath):
111 os.rename(liveBasePath, stampedSaveBasePath)
112
113 filesToMerge = collections.defaultdict(lambda: [[], False]) # live path -> (list of rpmsave stamps, bool if rpmnew)
114
115 print('Scanning for config files that need base files created.')
116 filesDone = {} # live path -> (nevra, digest)
117 needPackages = {} # nevra -> NeededPackage; no more defaultdict because NeededPackage needs header
118 ts = rpm.ts()
119 mi = ts.dbMatch()
120 for header in mi:
121 nevra = header.nevra
122 for fe in rpm.files(header):
123 # Only installed config files.
124 if fe.state != rpm.RPMFILE_STATE_NORMAL: continue
125 if not (fe.fflags & rpm.RPMFILE_CONFIG): continue
126 # For a ghost, we have no base content to write. Probably best to let this be a two-way merge if the file becomes non-ghost later.
127 if fe.fflags & rpm.RPMFILE_GHOST: continue
128 # For now, we only handle regular files. Conflicts on config symlinks seem to be rare.
129 if not stat.S_ISREG(fe.mode): continue
130 if fe.caps != '': raise NotImplementedError('File capabilities are not implemented: %s' % fe.name)
131
132 # Extension point directories whose readers can't handle additional *.rpm* files.
133 # /etc/skel/ is not actually causing a problem but leads to ugly persistent state.
134 # TODO: Find a better workaround.
135 if re.search('^(/etc/skel/|/etc/rpm/macros|/etc/logrotate.d/|/etc/grub.d/)', fe.name):
136 continue
137
138 # We need this check to avoid thrashing a conflicted base file.
139 if fe.name in filesDone:
140 (oldNevra, oldDigest) = filesDone[fe.name]
141 if fe.digest != oldDigest:
142 print('Conflict at %s: have %s from %s, ignoring %s from %s' %
143 (fe.name, oldDigest, oldNevra, fe.digest, nevra),
144 file=sys.stderr)
145 continue
146 filesDone[fe.name] = (nevra, fe.digest)
147
148 path_new = fe.name + '.rpmnew'
149 download_path = None
150 if os.path.lexists(path_new):
151 filesToMerge[fe.name][1] = True
152 # The live config file is not based on the current DB entry.
153 # Hopefully we already have a base for the live config file; if not, there's nothing we can do about it now.
154 # We do want to make sure the rpmnew file is correct.
155 if getFileDigest(path_new, fe.digest) != fe.digest:
156 download_path = path_new
157 else:
158 path_base = fe.name + '.rpmbase'
159 if not (os.path.lexists(path_base) and getFileDigest(path_base, fe.digest) == fe.digest):
160 if (os.path.lexists(fe.name) and getFileDigest(fe.name, fe.digest) == fe.digest):
161 # The live file has the original content.
162 # Copy the content and set the original attributes manually.
163 path_tmp = fe.name + '.rpmbase-tmp'
164 shutil.copyfile(fe.name, path_tmp)
165 setAttributes(fe, path_tmp)
166 os.rename(path_tmp, path_base)
167 print('- %s: Copied %s from %s.' % (nevra, path_base, fe.name))
168 else:
169 download_path = path_base
170 if download_path:
171 if nevra not in needPackages:
172 needPackages[nevra] = NeededPackage(header)
173 needPackages[nevra].paths[fe.name] = (fe.digest, download_path)
174 print('- %s: Need to download %s.' % (nevra, download_path))
175
176 if needPackages:
177 print('Downloading %d packages.' % len(needPackages))
178 packages_tmpdir = tempfile.mkdtemp(prefix='rpmconf-packages')
751c2a60 179 subprocess.check_call(['dnf', 'download'] + list(needPackages), cwd=packages_tmpdir)
273c3903
MM
180 for nevra, neededPkg in needPackages.items():
181 packagePath = '%s/%s.rpm' % (packages_tmpdir, neededPkg.nvra)
182 extract_tmpdir = tempfile.mkdtemp(prefix='rpmconf-extract-%s' % nevra)
183 cpioNeedPaths = ['.' + p for p in neededPkg.paths] # go figure
184 subprocess.check_call(['/bin/bash', '-c', 'p="$1"; shift; rpm2cpio "$p" | cpio --extract --quiet --preserve-modification-time --make-directories "$@"', '--', packagePath] + cpioNeedPaths, cwd=extract_tmpdir)
185 print('- Extracted %s.' % nevra)
186 for livePath, (needDigest, downloadPath) in neededPkg.paths.items():
187 tmpPath = extract_tmpdir + livePath
188 tmpDigest = getFileDigest(tmpPath, needDigest)
189 if tmpDigest != needDigest:
190 print('%s: got digest %s, wanted %s' % (livePath, tmpDigest, needDigest), file=sys.stderr)
191 continue
192 # This is easiest in case it is cross-filesystem, etc. mv should preserve all attributes.
193 subprocess.check_call(['mv', '-f', tmpPath, downloadPath])
194 print('-- Installed %s.' % downloadPath)
195 shutil.rmtree(extract_tmpdir)
196 shutil.rmtree(packages_tmpdir)
197
198 print('Scanning for obsolete rpmnew files.')
199 for newPath in subprocess.check_output(makeConfFindCommand("-type f -name '*.rpmnew' -print"), shell=True).decode().splitlines():
200 livePath = newPath[:-len('.rpmnew')]
201 if livePath not in filesToMerge: # only rpmnew files will be recorded in filesToMerge yet
202 print('-- Deleting %s. UNTESTED' % newPath)
203 #os.unlink(newPath)
204
205 # Remove rpmbase files for config files that are no longer managed, to not leave cruft.
206 # This intentionally does not remove rpmsave-base files. ~ 2014-07-03
207 # Note: If the config file had been modified, RPM would move it to
208 # rpmsave on package removal (whether or not it was noreplace), so we'd
209 # stamp the base file before we get here. ~ 2017-11-12
210 print('Scanning for obsolete rpmbase files.')
211 for basePath in subprocess.check_output(makeConfFindCommand("-type f -name '*.rpmbase' -print"), shell=True).decode().splitlines():
212 livePath = basePath[:-len('.rpmbase')]
213 if livePath not in filesDone:
214 print('-- Deleting %s.' % basePath)
215 os.unlink(basePath)
216
217 # "sync vs. merge" terminology is inspired by Perforce. We'll want a
218 # better term for "sync" before releasing this to the public.
219 if syncOnly:
220 print('rpmconf sync complete.')
221 return
222 else:
223 print('rpmconf sync complete. You can interrupt if you don\'t wish to merge now.')
224
225 print('Scanning for rpmsave files.')
226 for savePath in subprocess.check_output(makeConfFindCommand("-type f -name '*.rpmsave-*' -print"), shell=True).decode().splitlines():
227 m = re.search('^(.*)\.rpmsave-(\d+)$', savePath)
228 if not m: continue
229 (livePath, stamp) = (m.group(1), int(m.group(2)))
230 filesToMerge[livePath][0].append(stamp)
231
232 # Nested function for the ability to return from a nested loop...
233 def mergeFile(livePath):
234 print('- Merging %s.' % livePath)
235 (saveStamps, haveRpmnew) = filesToMerge[livePath]
236 saveStamps.sort() # mutates the original, that's OK
237 # TODO: If a package was uninstalled, we could have an rpmsave and rpmsave-base with no live.
238 # We want to alert the user that the configuration change is no longer having an effect. How?
239 # Currently kdiff3 comes up with a bunch of error dialogs and I have to manually intervene.
240 for i in range(len(saveStamps)):
241 c_output = '%s.rpmsave-%d' % (livePath, saveStamps[i+1]) if i+1 < len(saveStamps) else livePath
242 b = '%s.rpmsave-%d' % (livePath, saveStamps[i])
243 a = b + '-base'
244 try:
245 doMerge(a, b, c_output, c_output)
246 except subprocess.CalledProcessError:
247 print('- Leaving %s merge unfinished.' % livePath)
248 return 1
249 os.unlink(b)
250 if os.path.lexists(a): os.unlink(a)
251 if haveRpmnew:
252 try:
253 doMerge(livePath + '.rpmbase', livePath, livePath + '.rpmnew', livePath)
254 except subprocess.CalledProcessError:
255 print('- Leaving %s merge unfinished.' % livePath)
256 return 1
257 os.rename(livePath + '.rpmnew', livePath + '.rpmbase')
258 print('- Merged %s.' % livePath)
259 return 0
260
261 unfinishedFiles = 0
262 for livePath in filesToMerge:
263 unfinishedFiles += mergeFile(livePath)
264
265 if unfinishedFiles == 0:
266 print('rpmconf merge complete!')
267 else:
268 print('No more files to consider. %d files left unfinished.' % unfinishedFiles)
269
270if __name__ == '__main__':
271 # TODO: Adopt a real option-parsing library.
272 args = sys.argv[1:]
273 if args == []:
274 rpmconf()
275 elif args == ['--sync-only']:
276 rpmconf(syncOnly=True)
277 else:
278 print('Unrecognized arguments.', file=sys.stderr)
279 sys.exit(1)