Improved the rsyncsums script to have a --check mode.
[rsync/rsync-patches.git] / checksum4mirrors.diff
CommitLineData
213d4328
WD
1Optimize the ability of a mirror to send checksums.
2
3This adds a sender optimization feature that allows a cache of checksums
4to be used when the client specifies the --checksum option. The checksum
5files (.rsyncsums) must be created by some other process (see the perl
6script in the support dir for one way).
7
8This option should be used by mirrors that contain files that get created and
9not changed. There is a minimal amount of sanity-check information in the
10.rsyncsums file (size and mtime) so that the sum files can be shared with your
11mirror network.
12
13To use this patch, run these commands for a successful build:
14
15 patch -p1 <patches/checksum4mirrors.diff
16 ./configure (optional if already run)
17 make
18
19--- old/flist.c
20+++ new/flist.c
21@@ -117,6 +117,7 @@ static char empty_sum[MAX_DIGEST_LEN];
22 static int flist_count_offset; /* for --delete --progress */
23 static int dir_count = 0;
24 static int high_hlink_ndx;
25+static struct file_list *checksum_flist = NULL;
26
27 static void clean_flist(struct file_list *flist, int strip_root);
28 static void output_flist(struct file_list *flist);
29@@ -304,6 +305,186 @@ static void flist_done_allocating(struct
30 flist->pool_boundary = ptr;
31 }
32
33+/* The len count is the length of the basename + 1 for the null. */
34+static int add_checksum(const char *dirname, const char *basename, int len,
35+ OFF_T file_length, time_t mtime, const char *sum)
36+{
37+ struct file_struct *file;
38+ int alloc_len, extra_len;
39+ char *bp;
40+
41+ if (len == 10+1 && *basename == '.' && strcmp(basename, ".rsyncsums") == 0)
42+ return 0;
43+ if (file_length == 0)
44+ return 0;
45+
46+ extra_len = (file_extra_cnt + (file_length > 0xFFFFFFFFu) + SUM_EXTRA_CNT)
47+ * EXTRA_LEN;
48+#if EXTRA_ROUNDING > 0
49+ if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN))
50+ extra_len = (extra_len | (EXTRA_ROUNDING * EXTRA_LEN)) + EXTRA_LEN;
51+#endif
52+ alloc_len = FILE_STRUCT_LEN + extra_len + len;
53+ bp = pool_alloc(checksum_flist->file_pool, alloc_len, "add_checksum");
54+
55+ memset(bp, 0, extra_len + FILE_STRUCT_LEN);
56+ bp += extra_len;
57+ file = (struct file_struct *)bp;
58+ bp += FILE_STRUCT_LEN;
59+
60+ memcpy(bp, basename, len);
61+
62+ file->mode = S_IFREG;
63+ file->modtime = mtime;
64+ file->len32 = (uint32)file_length;
65+ if (file_length > 0xFFFFFFFFu) {
66+ file->flags |= FLAG_LENGTH64;
67+ OPT_EXTRA(file, 0)->unum = (uint32)(file_length >> 32);
68+ }
69+ file->dirname = dirname;
70+ bp = F_SUM(file);
71+ memcpy(bp, sum, checksum_len);
72+
73+ flist_expand(checksum_flist, 1);
74+ checksum_flist->files[checksum_flist->used++] = file;
75+
76+ checksum_flist->sorted = checksum_flist->files;
77+
78+ return 1;
79+}
80+
81+/* The direname value must remain unchanged during the lifespan of the
82+ * created checksum_flist object because we use it directly. */
83+static void read_checksums(const char *dirname)
84+{
85+ char line[MAXPATHLEN+1024], fbuf[MAXPATHLEN], sum[MAX_DIGEST_LEN];
86+ OFF_T file_length;
87+ time_t mtime;
88+ int len, dlen, i;
89+ char *cp;
90+ FILE *fp;
91+
92+ if (checksum_flist) {
93+ /* Reset the pool memory and empty the file-list array. */
94+ pool_free_old(checksum_flist->file_pool,
95+ pool_boundary(checksum_flist->file_pool, 0));
96+ checksum_flist->used = 0;
97+ } else
98+ checksum_flist = flist_new(FLIST_TEMP, "read_checksums");
99+
100+ checksum_flist->low = 0;
101+ checksum_flist->high = -1;
102+
103+ if (!dirname)
104+ return;
105+
106+ dlen = strlcpy(fbuf, dirname, sizeof fbuf);
107+ if (dlen >= (int)sizeof fbuf)
108+ return;
109+ if (dlen)
110+ fbuf[dlen++] = '/';
111+ else
112+ dirname = NULL;
113+ strlcpy(fbuf+dlen, ".rsyncsums", sizeof fbuf - dlen);
114+ if (!(fp = fopen(fbuf, "r")))
115+ return;
116+
117+ while (fgets(line, sizeof line, fp)) {
118+ cp = line;
119+ if (protocol_version >= 30) {
120+ char *alt_sum = cp;
121+ if (*cp == '=')
122+ while (*++cp == '=') {}
123+ else
124+ while (isXDigit(cp)) cp++;
125+ if (cp - alt_sum != MD4_DIGEST_LEN*2 || *cp != ' ')
126+ break;
127+ while (*++cp == ' ') {}
128+ }
129+
130+ if (*cp == '=') {
131+ continue;
132+ } else {
133+ for (i = 0; i < checksum_len*2; i++, cp++) {
134+ int x;
135+ if (isXDigit(cp)) {
136+ if (isDigit(cp))
137+ x = *cp - '0';
138+ else
139+ x = (*cp & 0xF) + 9;
140+ } else {
141+ cp = "";
142+ break;
143+ }
144+ if (i & 1)
145+ sum[i/2] |= x;
146+ else
147+ sum[i/2] = x << 4;
148+ }
149+ }
150+ if (*cp != ' ')
151+ break;
152+ while (*++cp == ' ') {}
153+
154+ if (protocol_version < 30) {
155+ char *alt_sum = cp;
156+ if (*cp == '=')
157+ while (*++cp == '=') {}
158+ else
159+ while (isXDigit(cp)) cp++;
160+ if (cp - alt_sum != MD5_DIGEST_LEN*2 || *cp != ' ')
161+ break;
162+ while (*++cp == ' ') {}
163+ }
164+
165+ file_length = 0;
166+ while (isDigit(cp))
167+ file_length = file_length * 10 + *cp++ - '0';
168+ if (*cp != ' ')
169+ break;
170+ while (*++cp == ' ') {}
171+
172+ mtime = 0;
173+ while (isDigit(cp))
174+ mtime = mtime * 10 + *cp++ - '0';
175+ if (*cp != ' ')
176+ break;
177+ while (*++cp == ' ') {}
178+
179+ /* Ignore ctime. */
180+ while (isDigit(cp))
181+ cp++;
182+ if (*cp != ' ')
183+ break;
184+ while (*++cp == ' ') {}
185+
186+ /* Ignore inode. */
187+ while (isDigit(cp))
188+ cp++;
189+ if (*cp != ' ')
190+ break;
191+ while (*++cp == ' ') {}
192+
193+ len = strlen(cp);
194+ while (len && (cp[len-1] == '\n' || cp[len-1] == '\r'))
195+ len--;
196+ if (!len)
197+ break;
198+ cp[len++] = '\0'; /* len now counts the null */
199+ if (strchr(cp, '/'))
200+ break;
201+ if (len > MAXPATHLEN)
202+ continue;
203+
204+ strlcpy(fbuf+dlen, cp, sizeof fbuf - dlen);
205+
206+ add_checksum(dirname, cp, len, file_length, mtime, sum);
207+ }
208+ fclose(fp);
209+
210+ clean_flist(checksum_flist, 0);
211+}
212+
213 int push_pathname(const char *dir, int len)
214 {
215 if (dir == pathname)
216@@ -989,7 +1170,7 @@ struct file_struct *make_file(const char
217 STRUCT_STAT *stp, int flags, int filter_level)
218 {
219 static char *lastdir;
220- static int lastdir_len = -1;
221+ static int lastdir_len = -2;
222 struct file_struct *file;
223 char thisname[MAXPATHLEN];
224 char linkname[MAXPATHLEN];
225@@ -1119,9 +1300,16 @@ struct file_struct *make_file(const char
226 memcpy(lastdir, thisname, len);
227 lastdir[len] = '\0';
228 lastdir_len = len;
229+ if (always_checksum && am_sender && flist)
230+ read_checksums(lastdir);
231 }
232- } else
233+ } else {
234 basename = thisname;
235+ if (always_checksum && am_sender && flist && lastdir_len == -2) {
236+ lastdir_len = -1;
237+ read_checksums("");
238+ }
239+ }
240 basename_len = strlen(basename) + 1; /* count the '\0' */
241
242 #ifdef SUPPORT_LINKS
243@@ -1197,11 +1385,21 @@ struct file_struct *make_file(const char
244 }
245 #endif
246
247- if (always_checksum && am_sender && S_ISREG(st.st_mode))
248- file_checksum(thisname, tmp_sum, st.st_size);
249-
250 F_PATHNAME(file) = pathname;
251
252+ if (always_checksum && am_sender && S_ISREG(st.st_mode)) {
253+ int j;
254+ if (flist && (j = flist_find(checksum_flist, file)) >= 0) {
255+ struct file_struct *fp = checksum_flist->sorted[j];
256+ if (F_LENGTH(fp) == st.st_size
257+ && fp->modtime == st.st_mtime)
258+ memcpy(tmp_sum, F_SUM(fp), MAX_DIGEST_LEN);
259+ else
260+ file_checksum(thisname, tmp_sum, st.st_size);
261+ } else
262+ file_checksum(thisname, tmp_sum, st.st_size);
263+ }
264+
265 /* This code is only used by the receiver when it is building
266 * a list of files for a delete pass. */
267 if (keep_dirlinks && linkname_len && flist) {
6e9495c7
WD
268@@ -2051,7 +2249,11 @@ struct file_list *send_file_list(int f,
269 * file-list to check if this is a 1-file xfer. */
270 send_extra_file_list(f, 1);
271 }
272- }
273+ } else
274+ flist_eof = 1;
275+
276+ if (checksum_updating && always_checksum && flist_eof)
277+ read_checksums(NULL);
278
279 return flist;
280 }
213d4328
WD
281--- old/ifuncs.h
282+++ new/ifuncs.h
283@@ -64,6 +64,12 @@ isDigit(const char *ptr)
284 }
285
286 static inline int
287+isXDigit(const char *ptr)
288+{
289+ return isxdigit(*(unsigned char *)ptr);
290+}
291+
292+static inline int
293 isPrint(const char *ptr)
294 {
295 return isprint(*(unsigned char *)ptr);
213d4328
WD
296--- old/support/rsyncsums
297+++ new/support/rsyncsums
d1a75c9f 298@@ -0,0 +1,203 @@
213d4328
WD
299+#!/usr/bin/perl -w
300+use strict;
301+
302+use Getopt::Long;
303+use Cwd qw(abs_path cwd);
304+use Digest::MD4;
305+use Digest::MD5;
306+
307+our $SUMS_FILE = '.rsyncsums';
308+
309+&Getopt::Long::Configure('bundling');
310+&usage if !&GetOptions(
213d4328 311+ 'recurse|r' => \( my $recurse_opt ),
d1a75c9f
WD
312+ 'simple-cmp|s' => \( my $ignore_ctime_and_inode ),
313+ 'check|c' => \( my $check_opt ),
213d4328
WD
314+ 'verbose|v+' => \( my $verbosity = 0 ),
315+ 'help|h' => \( my $help_opt ),
316+);
317+&usage if $help_opt;
318+
319+my $start_dir = cwd();
320+
321+my @dirs = @ARGV;
322+@dirs = '.' unless @dirs;
323+foreach (@dirs) {
324+ $_ = abs_path($_);
325+}
326+
327+$| = 1;
328+
d1a75c9f
WD
329+my $exit_code = 0;
330+
213d4328
WD
331+my $md4 = Digest::MD4->new;
332+my $md5 = Digest::MD5->new;
333+
334+while (@dirs) {
335+ my $dir = shift @dirs;
336+
337+ if (!chdir($dir)) {
338+ warn "Unable to chdir to $dir: $!\n";
339+ next;
340+ }
341+ if (!opendir(DP, '.')) {
342+ warn "Unable to opendir $dir: $!\n";
343+ next;
344+ }
345+
d1a75c9f
WD
346+ my $reldir = $dir;
347+ $reldir =~ s#^$start_dir(/|$)# $1 ? '' : '.' #eo;
213d4328 348+ if ($verbosity) {
213d4328 349+ print "$reldir ... ";
d1a75c9f 350+ print "\n" if $check_opt;
213d4328
WD
351+ }
352+
213d4328 353+ my %cache;
d1a75c9f
WD
354+ my $f_cnt = 0;
355+ if (open(FP, '<', $SUMS_FILE)) {
356+ while (<FP>) {
357+ chomp;
358+ my($sum4, $sum5, $size, $mtime, $ctime, $inode, $fn) = split(' ', $_, 7);
359+ $cache{$fn} = [ 0, $sum4, $sum5, $size, $mtime, $ctime & 0xFFFFFFFF, $inode & 0xFFFFFFFF ];
360+ $f_cnt++;
361+ }
362+ close FP;
363+ }
364+
213d4328 365+ my @subdirs;
d1a75c9f
WD
366+ my $d_cnt = 0;
367+ my $update_cnt = 0;
213d4328
WD
368+ while (defined(my $fn = readdir(DP))) {
369+ next if $fn =~ /^\.\.?$/ || $fn =~ /^\Q$SUMS_FILE\E$/o || -l $fn;
370+ if (-d _) {
371+ push(@subdirs, "$dir/$fn") unless $fn =~ /^(CVS|\.svn|\.git|\.bzr)$/;
372+ next;
373+ }
374+ next unless -f _;
375+
376+ my($size,$mtime,$ctime,$inode) = (stat(_))[7,9,10,1];
d1a75c9f
WD
377+ my $ref = $cache{$fn};
378+ if ($size == 0) {
379+ if (defined $ref) {
380+ delete $cache{$fn};
381+ $f_cnt--;
382+ if (!$check_opt && !$update_cnt++) {
383+ print "UPDATING\n" if $verbosity;
384+ }
385+ }
386+ next;
213d4328 387+ }
d1a75c9f 388+ $d_cnt++;
213d4328 389+
d1a75c9f 390+ if (!$check_opt) {
213d4328 391+ if (defined $ref) {
d1a75c9f
WD
392+ $$ref[0] = 1;
393+ if ($$ref[3] == $size
394+ && $$ref[4] == $mtime
395+ && ($ignore_ctime_and_inode || ($$ref[5] == $ctime && $$ref[6] == $inode))
396+ && $$ref[1] !~ /=/ && $$ref[2] !~ /=/) {
397+ next;
213d4328 398+ }
d1a75c9f
WD
399+ }
400+ if (!$update_cnt++) {
401+ print "UPDATING\n" if $verbosity;
213d4328
WD
402+ }
403+ }
213d4328 404+
d1a75c9f
WD
405+ if (!open(IN, $fn)) {
406+ print STDERR "Unable to read $fn: $!\n";
407+ if (defined $ref) {
213d4328 408+ delete $cache{$fn};
d1a75c9f 409+ $f_cnt--;
213d4328 410+ }
d1a75c9f
WD
411+ next;
412+ }
213d4328 413+
d1a75c9f
WD
414+ my($sum4, $sum5);
415+ while (1) {
416+ while (sysread(IN, $_, 64*1024)) {
417+ $md4->add($_);
418+ $md5->add($_);
213d4328 419+ }
d1a75c9f
WD
420+ $sum4 = $md4->hexdigest;
421+ $sum5 = $md5->hexdigest;
422+ print " $sum4 $sum5" if $verbosity > 2;
423+ print " $fn" if $verbosity > 1;
424+ my($size2,$mtime2,$ctime2,$inode2) = (stat(IN))[7,9,10,1];
425+ last if $size == $size2 && $mtime == $mtime2
426+ && ($ignore_ctime_and_inode || ($ctime == $ctime2 && $inode == $inode2));
427+ $size = $size2;
428+ $mtime = $mtime2;
429+ $ctime = $ctime2;
430+ $inode = $inode2;
431+ sysseek(IN, 0, 0);
432+ print " REREADING\n" if $verbosity > 1;
433+ }
213d4328 434+
d1a75c9f
WD
435+ close IN;
436+
437+ if ($check_opt) {
438+ my $dif;
439+ if (!defined $ref) {
440+ $dif = 'MISSING';
441+ } elsif ($sum4 ne $$ref[1] || $sum5 ne $$ref[2]) {
442+ $dif = 'FAILED';
443+ } else {
444+ print " OK\n" if $verbosity > 1;
445+ next;
446+ }
447+ if ($verbosity < 2) {
448+ print $verbosity ? ' ' : "$reldir/";
449+ print $fn;
213d4328 450+ }
d1a75c9f
WD
451+ print " $dif\n";
452+ $exit_code = 1;
453+ } else {
454+ print "\n" if $verbosity > 1;
455+ $cache{$fn} = [ 1, $sum4, $sum5, $size, $mtime, $ctime & 0xFFFFFFFF, $inode & 0xFFFFFFFF ];
456+ }
457+ }
458+
459+ closedir DP;
213d4328 460+
d1a75c9f
WD
461+ unshift(@dirs, sort @subdirs) if $recurse_opt;
462+
463+ if ($check_opt) {
464+ ;
465+ } elsif ($d_cnt == 0) {
466+ if ($f_cnt) {
467+ print "(removed $SUMS_FILE) " if $verbosity;
468+ unlink($SUMS_FILE);
213d4328 469+ }
d1a75c9f
WD
470+ print "empty\n" if $verbosity;
471+ } elsif ($update_cnt || $d_cnt != $f_cnt) {
472+ print "UPDATING\n" if $verbosity && !$update_cnt;
473+ open(FP, '>', $SUMS_FILE) or die "Unable to write $dir/$SUMS_FILE: $!\n";
213d4328 474+
213d4328
WD
475+ foreach my $fn (sort keys %cache) {
476+ my $ref = $cache{$fn};
d1a75c9f
WD
477+ my($found, $sum4, $sum5, $size, $mtime, $ctime, $inode) = @$ref;
478+ next unless $found;
213d4328
WD
479+ printf FP '%s %s %10d %10d %10d %10d %s' . "\n", $sum4, $sum5, $size, $mtime, $ctime, $inode, $fn;
480+ }
d1a75c9f 481+ close FP;
213d4328
WD
482+ } else {
483+ print "ok\n" if $verbosity;
484+ }
213d4328
WD
485+}
486+
d1a75c9f
WD
487+exit $exit_code;
488+
213d4328
WD
489+sub usage
490+{
491+ die <<EOT;
492+Usage: rsyncsums [OPTIONS] [DIRS]
493+
494+Options:
495+ -r, --recurse Update $SUMS_FILE files in subdirectories too.
496+ -s, --simple-cmp Ignore ctime and inode values when comparing identicality.
d1a75c9f 497+ -c, --check Check if the checksums are right (doesn't update).
213d4328
WD
498+ -v, --verbose Mention what we're doing. Repeat for more info.
499+ -h, --help Display this help message.
500+EOT
501+}