The patches for 3.0.0pre9.
[rsync/rsync-patches.git] / checksum-reading.diff
CommitLineData
213d4328
WD
1Optimize the ability of a mirror to send checksums.
2
3This adds a sender optimization feature that allows a cache of checksums
4to be used when the client specifies the --checksum option. The checksum
5files (.rsyncsums) must be created by some other process (see the perl
6script in the support dir for one way).
7
8This option should be used by mirrors that contain files that get created and
9not changed. There is a minimal amount of sanity-check information in the
10.rsyncsums file (size and mtime) so that the sum files can be shared with your
11mirror network.
12
13To use this patch, run these commands for a successful build:
14
cc3e685d 15 patch -p1 <patches/checksum-reading.diff
213d4328
WD
16 ./configure (optional if already run)
17 make
18
cc3e685d
WD
19diff --git a/flist.c b/flist.c
20--- a/flist.c
21+++ b/flist.c
f2863bc0
WD
22@@ -121,6 +121,7 @@ static char tmp_sum[MAX_DIGEST_LEN];
23 static char empty_sum[MAX_DIGEST_LEN];
213d4328
WD
24 static int flist_count_offset; /* for --delete --progress */
25 static int dir_count = 0;
213d4328
WD
26+static struct file_list *checksum_flist = NULL;
27
f2863bc0 28 static void flist_sort_and_clean(struct file_list *flist, int strip_root);
213d4328 29 static void output_flist(struct file_list *flist);
f2863bc0 30@@ -313,6 +314,186 @@ static void flist_done_allocating(struct file_list *flist)
213d4328
WD
31 flist->pool_boundary = ptr;
32 }
33
34+/* The len count is the length of the basename + 1 for the null. */
35+static int add_checksum(const char *dirname, const char *basename, int len,
36+ OFF_T file_length, time_t mtime, const char *sum)
37+{
38+ struct file_struct *file;
39+ int alloc_len, extra_len;
40+ char *bp;
41+
42+ if (len == 10+1 && *basename == '.' && strcmp(basename, ".rsyncsums") == 0)
43+ return 0;
44+ if (file_length == 0)
45+ return 0;
46+
47+ extra_len = (file_extra_cnt + (file_length > 0xFFFFFFFFu) + SUM_EXTRA_CNT)
48+ * EXTRA_LEN;
49+#if EXTRA_ROUNDING > 0
50+ if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN))
51+ extra_len = (extra_len | (EXTRA_ROUNDING * EXTRA_LEN)) + EXTRA_LEN;
52+#endif
53+ alloc_len = FILE_STRUCT_LEN + extra_len + len;
54+ bp = pool_alloc(checksum_flist->file_pool, alloc_len, "add_checksum");
55+
56+ memset(bp, 0, extra_len + FILE_STRUCT_LEN);
57+ bp += extra_len;
58+ file = (struct file_struct *)bp;
59+ bp += FILE_STRUCT_LEN;
60+
61+ memcpy(bp, basename, len);
62+
63+ file->mode = S_IFREG;
64+ file->modtime = mtime;
65+ file->len32 = (uint32)file_length;
66+ if (file_length > 0xFFFFFFFFu) {
67+ file->flags |= FLAG_LENGTH64;
68+ OPT_EXTRA(file, 0)->unum = (uint32)(file_length >> 32);
69+ }
70+ file->dirname = dirname;
71+ bp = F_SUM(file);
72+ memcpy(bp, sum, checksum_len);
73+
74+ flist_expand(checksum_flist, 1);
75+ checksum_flist->files[checksum_flist->used++] = file;
76+
77+ checksum_flist->sorted = checksum_flist->files;
78+
79+ return 1;
80+}
81+
82+/* The direname value must remain unchanged during the lifespan of the
83+ * created checksum_flist object because we use it directly. */
84+static void read_checksums(const char *dirname)
85+{
86+ char line[MAXPATHLEN+1024], fbuf[MAXPATHLEN], sum[MAX_DIGEST_LEN];
87+ OFF_T file_length;
88+ time_t mtime;
89+ int len, dlen, i;
90+ char *cp;
91+ FILE *fp;
92+
93+ if (checksum_flist) {
94+ /* Reset the pool memory and empty the file-list array. */
95+ pool_free_old(checksum_flist->file_pool,
96+ pool_boundary(checksum_flist->file_pool, 0));
97+ checksum_flist->used = 0;
98+ } else
99+ checksum_flist = flist_new(FLIST_TEMP, "read_checksums");
100+
101+ checksum_flist->low = 0;
102+ checksum_flist->high = -1;
103+
104+ if (!dirname)
105+ return;
106+
107+ dlen = strlcpy(fbuf, dirname, sizeof fbuf);
108+ if (dlen >= (int)sizeof fbuf)
109+ return;
110+ if (dlen)
111+ fbuf[dlen++] = '/';
112+ else
113+ dirname = NULL;
114+ strlcpy(fbuf+dlen, ".rsyncsums", sizeof fbuf - dlen);
115+ if (!(fp = fopen(fbuf, "r")))
116+ return;
117+
118+ while (fgets(line, sizeof line, fp)) {
119+ cp = line;
120+ if (protocol_version >= 30) {
121+ char *alt_sum = cp;
122+ if (*cp == '=')
123+ while (*++cp == '=') {}
124+ else
125+ while (isXDigit(cp)) cp++;
126+ if (cp - alt_sum != MD4_DIGEST_LEN*2 || *cp != ' ')
127+ break;
128+ while (*++cp == ' ') {}
129+ }
130+
131+ if (*cp == '=') {
132+ continue;
133+ } else {
134+ for (i = 0; i < checksum_len*2; i++, cp++) {
135+ int x;
136+ if (isXDigit(cp)) {
137+ if (isDigit(cp))
138+ x = *cp - '0';
139+ else
140+ x = (*cp & 0xF) + 9;
141+ } else {
142+ cp = "";
143+ break;
144+ }
145+ if (i & 1)
146+ sum[i/2] |= x;
147+ else
148+ sum[i/2] = x << 4;
149+ }
150+ }
151+ if (*cp != ' ')
152+ break;
153+ while (*++cp == ' ') {}
154+
155+ if (protocol_version < 30) {
156+ char *alt_sum = cp;
157+ if (*cp == '=')
158+ while (*++cp == '=') {}
159+ else
160+ while (isXDigit(cp)) cp++;
161+ if (cp - alt_sum != MD5_DIGEST_LEN*2 || *cp != ' ')
162+ break;
163+ while (*++cp == ' ') {}
164+ }
165+
166+ file_length = 0;
167+ while (isDigit(cp))
168+ file_length = file_length * 10 + *cp++ - '0';
169+ if (*cp != ' ')
170+ break;
171+ while (*++cp == ' ') {}
172+
173+ mtime = 0;
174+ while (isDigit(cp))
175+ mtime = mtime * 10 + *cp++ - '0';
176+ if (*cp != ' ')
177+ break;
178+ while (*++cp == ' ') {}
179+
180+ /* Ignore ctime. */
181+ while (isDigit(cp))
182+ cp++;
183+ if (*cp != ' ')
184+ break;
185+ while (*++cp == ' ') {}
186+
187+ /* Ignore inode. */
188+ while (isDigit(cp))
189+ cp++;
190+ if (*cp != ' ')
191+ break;
192+ while (*++cp == ' ') {}
193+
194+ len = strlen(cp);
195+ while (len && (cp[len-1] == '\n' || cp[len-1] == '\r'))
196+ len--;
197+ if (!len)
198+ break;
199+ cp[len++] = '\0'; /* len now counts the null */
200+ if (strchr(cp, '/'))
201+ break;
202+ if (len > MAXPATHLEN)
203+ continue;
204+
205+ strlcpy(fbuf+dlen, cp, sizeof fbuf - dlen);
206+
207+ add_checksum(dirname, cp, len, file_length, mtime, sum);
208+ }
209+ fclose(fp);
210+
211+ clean_flist(checksum_flist, 0);
212+}
213+
214 int push_pathname(const char *dir, int len)
215 {
216 if (dir == pathname)
f2863bc0 217@@ -1003,7 +1184,7 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
213d4328
WD
218 STRUCT_STAT *stp, int flags, int filter_level)
219 {
220 static char *lastdir;
221- static int lastdir_len = -1;
222+ static int lastdir_len = -2;
223 struct file_struct *file;
224 char thisname[MAXPATHLEN];
225 char linkname[MAXPATHLEN];
a5e6228a 226@@ -1136,9 +1317,16 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
213d4328
WD
227 memcpy(lastdir, thisname, len);
228 lastdir[len] = '\0';
229 lastdir_len = len;
230+ if (always_checksum && am_sender && flist)
231+ read_checksums(lastdir);
232 }
233- } else
234+ } else {
235 basename = thisname;
236+ if (always_checksum && am_sender && flist && lastdir_len == -2) {
237+ lastdir_len = -1;
238+ read_checksums("");
239+ }
240+ }
241 basename_len = strlen(basename) + 1; /* count the '\0' */
242
243 #ifdef SUPPORT_LINKS
a5e6228a 244@@ -1214,11 +1402,21 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
213d4328
WD
245 }
246 #endif
247
248- if (always_checksum && am_sender && S_ISREG(st.st_mode))
249- file_checksum(thisname, tmp_sum, st.st_size);
250-
251 F_PATHNAME(file) = pathname;
252
253+ if (always_checksum && am_sender && S_ISREG(st.st_mode)) {
254+ int j;
255+ if (flist && (j = flist_find(checksum_flist, file)) >= 0) {
256+ struct file_struct *fp = checksum_flist->sorted[j];
257+ if (F_LENGTH(fp) == st.st_size
258+ && fp->modtime == st.st_mtime)
259+ memcpy(tmp_sum, F_SUM(fp), MAX_DIGEST_LEN);
260+ else
261+ file_checksum(thisname, tmp_sum, st.st_size);
262+ } else
263+ file_checksum(thisname, tmp_sum, st.st_size);
264+ }
265+
266 /* This code is only used by the receiver when it is building
267 * a list of files for a delete pass. */
268 if (keep_dirlinks && linkname_len && flist) {
a5e6228a 269@@ -2074,7 +2272,11 @@ struct file_list *send_file_list(int f, int argc, char *argv[])
6e9495c7
WD
270 * file-list to check if this is a 1-file xfer. */
271 send_extra_file_list(f, 1);
272 }
273- }
274+ } else
275+ flist_eof = 1;
276+
277+ if (checksum_updating && always_checksum && flist_eof)
278+ read_checksums(NULL);
279
280 return flist;
281 }
cc3e685d
WD
282diff --git a/ifuncs.h b/ifuncs.h
283--- a/ifuncs.h
284+++ b/ifuncs.h
213d4328
WD
285@@ -64,6 +64,12 @@ isDigit(const char *ptr)
286 }
287
288 static inline int
289+isXDigit(const char *ptr)
290+{
291+ return isxdigit(*(unsigned char *)ptr);
292+}
293+
294+static inline int
295 isPrint(const char *ptr)
296 {
297 return isprint(*(unsigned char *)ptr);
cc3e685d
WD
298diff --git a/support/rsyncsums b/support/rsyncsums
299new file mode 100644
300--- /dev/null
301+++ b/support/rsyncsums
d1a75c9f 302@@ -0,0 +1,203 @@
213d4328
WD
303+#!/usr/bin/perl -w
304+use strict;
305+
306+use Getopt::Long;
307+use Cwd qw(abs_path cwd);
308+use Digest::MD4;
309+use Digest::MD5;
310+
311+our $SUMS_FILE = '.rsyncsums';
312+
313+&Getopt::Long::Configure('bundling');
314+&usage if !&GetOptions(
213d4328 315+ 'recurse|r' => \( my $recurse_opt ),
d1a75c9f
WD
316+ 'simple-cmp|s' => \( my $ignore_ctime_and_inode ),
317+ 'check|c' => \( my $check_opt ),
213d4328
WD
318+ 'verbose|v+' => \( my $verbosity = 0 ),
319+ 'help|h' => \( my $help_opt ),
320+);
321+&usage if $help_opt;
322+
323+my $start_dir = cwd();
324+
325+my @dirs = @ARGV;
326+@dirs = '.' unless @dirs;
327+foreach (@dirs) {
328+ $_ = abs_path($_);
329+}
330+
331+$| = 1;
332+
d1a75c9f
WD
333+my $exit_code = 0;
334+
213d4328
WD
335+my $md4 = Digest::MD4->new;
336+my $md5 = Digest::MD5->new;
337+
338+while (@dirs) {
339+ my $dir = shift @dirs;
340+
341+ if (!chdir($dir)) {
342+ warn "Unable to chdir to $dir: $!\n";
343+ next;
344+ }
345+ if (!opendir(DP, '.')) {
346+ warn "Unable to opendir $dir: $!\n";
347+ next;
348+ }
349+
d1a75c9f
WD
350+ my $reldir = $dir;
351+ $reldir =~ s#^$start_dir(/|$)# $1 ? '' : '.' #eo;
213d4328 352+ if ($verbosity) {
213d4328 353+ print "$reldir ... ";
d1a75c9f 354+ print "\n" if $check_opt;
213d4328
WD
355+ }
356+
213d4328 357+ my %cache;
d1a75c9f
WD
358+ my $f_cnt = 0;
359+ if (open(FP, '<', $SUMS_FILE)) {
360+ while (<FP>) {
361+ chomp;
362+ my($sum4, $sum5, $size, $mtime, $ctime, $inode, $fn) = split(' ', $_, 7);
363+ $cache{$fn} = [ 0, $sum4, $sum5, $size, $mtime, $ctime & 0xFFFFFFFF, $inode & 0xFFFFFFFF ];
364+ $f_cnt++;
365+ }
366+ close FP;
367+ }
368+
213d4328 369+ my @subdirs;
d1a75c9f
WD
370+ my $d_cnt = 0;
371+ my $update_cnt = 0;
213d4328
WD
372+ while (defined(my $fn = readdir(DP))) {
373+ next if $fn =~ /^\.\.?$/ || $fn =~ /^\Q$SUMS_FILE\E$/o || -l $fn;
374+ if (-d _) {
375+ push(@subdirs, "$dir/$fn") unless $fn =~ /^(CVS|\.svn|\.git|\.bzr)$/;
376+ next;
377+ }
378+ next unless -f _;
379+
380+ my($size,$mtime,$ctime,$inode) = (stat(_))[7,9,10,1];
d1a75c9f
WD
381+ my $ref = $cache{$fn};
382+ if ($size == 0) {
383+ if (defined $ref) {
384+ delete $cache{$fn};
385+ $f_cnt--;
386+ if (!$check_opt && !$update_cnt++) {
387+ print "UPDATING\n" if $verbosity;
388+ }
389+ }
390+ next;
213d4328 391+ }
d1a75c9f 392+ $d_cnt++;
213d4328 393+
d1a75c9f 394+ if (!$check_opt) {
213d4328 395+ if (defined $ref) {
d1a75c9f
WD
396+ $$ref[0] = 1;
397+ if ($$ref[3] == $size
398+ && $$ref[4] == $mtime
399+ && ($ignore_ctime_and_inode || ($$ref[5] == $ctime && $$ref[6] == $inode))
400+ && $$ref[1] !~ /=/ && $$ref[2] !~ /=/) {
401+ next;
213d4328 402+ }
d1a75c9f
WD
403+ }
404+ if (!$update_cnt++) {
405+ print "UPDATING\n" if $verbosity;
213d4328
WD
406+ }
407+ }
213d4328 408+
d1a75c9f
WD
409+ if (!open(IN, $fn)) {
410+ print STDERR "Unable to read $fn: $!\n";
411+ if (defined $ref) {
213d4328 412+ delete $cache{$fn};
d1a75c9f 413+ $f_cnt--;
213d4328 414+ }
d1a75c9f
WD
415+ next;
416+ }
213d4328 417+
d1a75c9f
WD
418+ my($sum4, $sum5);
419+ while (1) {
420+ while (sysread(IN, $_, 64*1024)) {
421+ $md4->add($_);
422+ $md5->add($_);
213d4328 423+ }
d1a75c9f
WD
424+ $sum4 = $md4->hexdigest;
425+ $sum5 = $md5->hexdigest;
426+ print " $sum4 $sum5" if $verbosity > 2;
427+ print " $fn" if $verbosity > 1;
428+ my($size2,$mtime2,$ctime2,$inode2) = (stat(IN))[7,9,10,1];
429+ last if $size == $size2 && $mtime == $mtime2
430+ && ($ignore_ctime_and_inode || ($ctime == $ctime2 && $inode == $inode2));
431+ $size = $size2;
432+ $mtime = $mtime2;
433+ $ctime = $ctime2;
434+ $inode = $inode2;
435+ sysseek(IN, 0, 0);
436+ print " REREADING\n" if $verbosity > 1;
437+ }
213d4328 438+
d1a75c9f
WD
439+ close IN;
440+
441+ if ($check_opt) {
442+ my $dif;
443+ if (!defined $ref) {
444+ $dif = 'MISSING';
445+ } elsif ($sum4 ne $$ref[1] || $sum5 ne $$ref[2]) {
446+ $dif = 'FAILED';
447+ } else {
448+ print " OK\n" if $verbosity > 1;
449+ next;
450+ }
451+ if ($verbosity < 2) {
452+ print $verbosity ? ' ' : "$reldir/";
453+ print $fn;
213d4328 454+ }
d1a75c9f
WD
455+ print " $dif\n";
456+ $exit_code = 1;
457+ } else {
458+ print "\n" if $verbosity > 1;
459+ $cache{$fn} = [ 1, $sum4, $sum5, $size, $mtime, $ctime & 0xFFFFFFFF, $inode & 0xFFFFFFFF ];
460+ }
461+ }
462+
463+ closedir DP;
213d4328 464+
d1a75c9f
WD
465+ unshift(@dirs, sort @subdirs) if $recurse_opt;
466+
467+ if ($check_opt) {
468+ ;
469+ } elsif ($d_cnt == 0) {
470+ if ($f_cnt) {
471+ print "(removed $SUMS_FILE) " if $verbosity;
472+ unlink($SUMS_FILE);
213d4328 473+ }
d1a75c9f
WD
474+ print "empty\n" if $verbosity;
475+ } elsif ($update_cnt || $d_cnt != $f_cnt) {
476+ print "UPDATING\n" if $verbosity && !$update_cnt;
477+ open(FP, '>', $SUMS_FILE) or die "Unable to write $dir/$SUMS_FILE: $!\n";
213d4328 478+
213d4328
WD
479+ foreach my $fn (sort keys %cache) {
480+ my $ref = $cache{$fn};
d1a75c9f
WD
481+ my($found, $sum4, $sum5, $size, $mtime, $ctime, $inode) = @$ref;
482+ next unless $found;
213d4328
WD
483+ printf FP '%s %s %10d %10d %10d %10d %s' . "\n", $sum4, $sum5, $size, $mtime, $ctime, $inode, $fn;
484+ }
d1a75c9f 485+ close FP;
213d4328
WD
486+ } else {
487+ print "ok\n" if $verbosity;
488+ }
213d4328
WD
489+}
490+
d1a75c9f
WD
491+exit $exit_code;
492+
213d4328
WD
493+sub usage
494+{
495+ die <<EOT;
496+Usage: rsyncsums [OPTIONS] [DIRS]
497+
498+Options:
499+ -r, --recurse Update $SUMS_FILE files in subdirectories too.
500+ -s, --simple-cmp Ignore ctime and inode values when comparing identicality.
d1a75c9f 501+ -c, --check Check if the checksums are right (doesn't update).
213d4328
WD
502+ -v, --verbose Mention what we're doing. Repeat for more info.
503+ -h, --help Display this help message.
504+EOT
505+}