The patches for 3.0.0pre10.
[rsync/rsync-patches.git] / checksum-reading.diff
CommitLineData
213d4328
WD
1Optimize the ability of a mirror to send checksums.
2
3This adds a sender optimization feature that allows a cache of checksums
4to be used when the client specifies the --checksum option. The checksum
5files (.rsyncsums) must be created by some other process (see the perl
6script in the support dir for one way).
7
8This option should be used by mirrors that contain files that get created and
9not changed. There is a minimal amount of sanity-check information in the
10.rsyncsums file (size and mtime) so that the sum files can be shared with your
11mirror network.
12
13To use this patch, run these commands for a successful build:
14
cc3e685d 15 patch -p1 <patches/checksum-reading.diff
213d4328
WD
16 ./configure (optional if already run)
17 make
18
cc3e685d
WD
19diff --git a/flist.c b/flist.c
20--- a/flist.c
21+++ b/flist.c
f2863bc0
WD
22@@ -121,6 +121,7 @@ static char tmp_sum[MAX_DIGEST_LEN];
23 static char empty_sum[MAX_DIGEST_LEN];
213d4328
WD
24 static int flist_count_offset; /* for --delete --progress */
25 static int dir_count = 0;
213d4328
WD
26+static struct file_list *checksum_flist = NULL;
27
f2863bc0 28 static void flist_sort_and_clean(struct file_list *flist, int strip_root);
213d4328 29 static void output_flist(struct file_list *flist);
f2863bc0 30@@ -313,6 +314,186 @@ static void flist_done_allocating(struct file_list *flist)
213d4328
WD
31 flist->pool_boundary = ptr;
32 }
33
34+/* The len count is the length of the basename + 1 for the null. */
35+static int add_checksum(const char *dirname, const char *basename, int len,
36+ OFF_T file_length, time_t mtime, const char *sum)
37+{
38+ struct file_struct *file;
39+ int alloc_len, extra_len;
40+ char *bp;
41+
42+ if (len == 10+1 && *basename == '.' && strcmp(basename, ".rsyncsums") == 0)
43+ return 0;
44+ if (file_length == 0)
45+ return 0;
46+
47+ extra_len = (file_extra_cnt + (file_length > 0xFFFFFFFFu) + SUM_EXTRA_CNT)
48+ * EXTRA_LEN;
49+#if EXTRA_ROUNDING > 0
50+ if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN))
51+ extra_len = (extra_len | (EXTRA_ROUNDING * EXTRA_LEN)) + EXTRA_LEN;
52+#endif
53+ alloc_len = FILE_STRUCT_LEN + extra_len + len;
54+ bp = pool_alloc(checksum_flist->file_pool, alloc_len, "add_checksum");
55+
56+ memset(bp, 0, extra_len + FILE_STRUCT_LEN);
57+ bp += extra_len;
58+ file = (struct file_struct *)bp;
59+ bp += FILE_STRUCT_LEN;
60+
61+ memcpy(bp, basename, len);
62+
63+ file->mode = S_IFREG;
64+ file->modtime = mtime;
65+ file->len32 = (uint32)file_length;
66+ if (file_length > 0xFFFFFFFFu) {
67+ file->flags |= FLAG_LENGTH64;
68+ OPT_EXTRA(file, 0)->unum = (uint32)(file_length >> 32);
69+ }
70+ file->dirname = dirname;
71+ bp = F_SUM(file);
72+ memcpy(bp, sum, checksum_len);
73+
74+ flist_expand(checksum_flist, 1);
75+ checksum_flist->files[checksum_flist->used++] = file;
76+
77+ checksum_flist->sorted = checksum_flist->files;
78+
79+ return 1;
80+}
81+
82+/* The direname value must remain unchanged during the lifespan of the
83+ * created checksum_flist object because we use it directly. */
84+static void read_checksums(const char *dirname)
85+{
86+ char line[MAXPATHLEN+1024], fbuf[MAXPATHLEN], sum[MAX_DIGEST_LEN];
87+ OFF_T file_length;
88+ time_t mtime;
89+ int len, dlen, i;
90+ char *cp;
91+ FILE *fp;
92+
93+ if (checksum_flist) {
94+ /* Reset the pool memory and empty the file-list array. */
95+ pool_free_old(checksum_flist->file_pool,
96+ pool_boundary(checksum_flist->file_pool, 0));
97+ checksum_flist->used = 0;
98+ } else
99+ checksum_flist = flist_new(FLIST_TEMP, "read_checksums");
100+
101+ checksum_flist->low = 0;
102+ checksum_flist->high = -1;
103+
104+ if (!dirname)
105+ return;
106+
107+ dlen = strlcpy(fbuf, dirname, sizeof fbuf);
108+ if (dlen >= (int)sizeof fbuf)
109+ return;
110+ if (dlen)
111+ fbuf[dlen++] = '/';
112+ else
113+ dirname = NULL;
114+ strlcpy(fbuf+dlen, ".rsyncsums", sizeof fbuf - dlen);
115+ if (!(fp = fopen(fbuf, "r")))
116+ return;
117+
118+ while (fgets(line, sizeof line, fp)) {
119+ cp = line;
120+ if (protocol_version >= 30) {
121+ char *alt_sum = cp;
122+ if (*cp == '=')
123+ while (*++cp == '=') {}
124+ else
125+ while (isXDigit(cp)) cp++;
126+ if (cp - alt_sum != MD4_DIGEST_LEN*2 || *cp != ' ')
127+ break;
128+ while (*++cp == ' ') {}
129+ }
130+
131+ if (*cp == '=') {
132+ continue;
133+ } else {
134+ for (i = 0; i < checksum_len*2; i++, cp++) {
135+ int x;
136+ if (isXDigit(cp)) {
137+ if (isDigit(cp))
138+ x = *cp - '0';
139+ else
140+ x = (*cp & 0xF) + 9;
141+ } else {
142+ cp = "";
143+ break;
144+ }
145+ if (i & 1)
146+ sum[i/2] |= x;
147+ else
148+ sum[i/2] = x << 4;
149+ }
150+ }
151+ if (*cp != ' ')
152+ break;
153+ while (*++cp == ' ') {}
154+
155+ if (protocol_version < 30) {
156+ char *alt_sum = cp;
157+ if (*cp == '=')
158+ while (*++cp == '=') {}
159+ else
160+ while (isXDigit(cp)) cp++;
161+ if (cp - alt_sum != MD5_DIGEST_LEN*2 || *cp != ' ')
162+ break;
163+ while (*++cp == ' ') {}
164+ }
165+
166+ file_length = 0;
167+ while (isDigit(cp))
168+ file_length = file_length * 10 + *cp++ - '0';
169+ if (*cp != ' ')
170+ break;
171+ while (*++cp == ' ') {}
172+
173+ mtime = 0;
174+ while (isDigit(cp))
175+ mtime = mtime * 10 + *cp++ - '0';
176+ if (*cp != ' ')
177+ break;
178+ while (*++cp == ' ') {}
179+
180+ /* Ignore ctime. */
181+ while (isDigit(cp))
182+ cp++;
183+ if (*cp != ' ')
184+ break;
185+ while (*++cp == ' ') {}
186+
187+ /* Ignore inode. */
188+ while (isDigit(cp))
189+ cp++;
190+ if (*cp != ' ')
191+ break;
192+ while (*++cp == ' ') {}
193+
194+ len = strlen(cp);
195+ while (len && (cp[len-1] == '\n' || cp[len-1] == '\r'))
196+ len--;
197+ if (!len)
198+ break;
199+ cp[len++] = '\0'; /* len now counts the null */
200+ if (strchr(cp, '/'))
201+ break;
202+ if (len > MAXPATHLEN)
203+ continue;
204+
205+ strlcpy(fbuf+dlen, cp, sizeof fbuf - dlen);
206+
207+ add_checksum(dirname, cp, len, file_length, mtime, sum);
208+ }
209+ fclose(fp);
210+
85096e5e 211+ flist_sort_and_clean(checksum_flist, 0);
213d4328
WD
212+}
213+
214 int push_pathname(const char *dir, int len)
215 {
216 if (dir == pathname)
f2863bc0 217@@ -1003,7 +1184,7 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
213d4328
WD
218 STRUCT_STAT *stp, int flags, int filter_level)
219 {
220 static char *lastdir;
221- static int lastdir_len = -1;
222+ static int lastdir_len = -2;
223 struct file_struct *file;
224 char thisname[MAXPATHLEN];
225 char linkname[MAXPATHLEN];
a5e6228a 226@@ -1136,9 +1317,16 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
213d4328
WD
227 memcpy(lastdir, thisname, len);
228 lastdir[len] = '\0';
229 lastdir_len = len;
230+ if (always_checksum && am_sender && flist)
231+ read_checksums(lastdir);
232 }
233- } else
234+ } else {
235 basename = thisname;
236+ if (always_checksum && am_sender && flist && lastdir_len == -2) {
237+ lastdir_len = -1;
238+ read_checksums("");
239+ }
240+ }
241 basename_len = strlen(basename) + 1; /* count the '\0' */
242
243 #ifdef SUPPORT_LINKS
a5e6228a 244@@ -1214,11 +1402,21 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
213d4328
WD
245 }
246 #endif
247
248- if (always_checksum && am_sender && S_ISREG(st.st_mode))
249- file_checksum(thisname, tmp_sum, st.st_size);
250-
251 F_PATHNAME(file) = pathname;
252
253+ if (always_checksum && am_sender && S_ISREG(st.st_mode)) {
254+ int j;
255+ if (flist && (j = flist_find(checksum_flist, file)) >= 0) {
256+ struct file_struct *fp = checksum_flist->sorted[j];
257+ if (F_LENGTH(fp) == st.st_size
258+ && fp->modtime == st.st_mtime)
259+ memcpy(tmp_sum, F_SUM(fp), MAX_DIGEST_LEN);
260+ else
261+ file_checksum(thisname, tmp_sum, st.st_size);
262+ } else
263+ file_checksum(thisname, tmp_sum, st.st_size);
264+ }
265+
266 /* This code is only used by the receiver when it is building
267 * a list of files for a delete pass. */
268 if (keep_dirlinks && linkname_len && flist) {
85096e5e 269@@ -2074,7 +2272,8 @@ struct file_list *send_file_list(int f, int argc, char *argv[])
6e9495c7
WD
270 * file-list to check if this is a 1-file xfer. */
271 send_extra_file_list(f, 1);
272 }
273- }
274+ } else
275+ flist_eof = 1;
6e9495c7
WD
276
277 return flist;
278 }
cc3e685d
WD
279diff --git a/ifuncs.h b/ifuncs.h
280--- a/ifuncs.h
281+++ b/ifuncs.h
213d4328
WD
282@@ -64,6 +64,12 @@ isDigit(const char *ptr)
283 }
284
285 static inline int
286+isXDigit(const char *ptr)
287+{
288+ return isxdigit(*(unsigned char *)ptr);
289+}
290+
291+static inline int
292 isPrint(const char *ptr)
293 {
294 return isprint(*(unsigned char *)ptr);
cc3e685d
WD
295diff --git a/support/rsyncsums b/support/rsyncsums
296new file mode 100644
297--- /dev/null
298+++ b/support/rsyncsums
d1a75c9f 299@@ -0,0 +1,203 @@
213d4328
WD
300+#!/usr/bin/perl -w
301+use strict;
302+
303+use Getopt::Long;
304+use Cwd qw(abs_path cwd);
305+use Digest::MD4;
306+use Digest::MD5;
307+
308+our $SUMS_FILE = '.rsyncsums';
309+
310+&Getopt::Long::Configure('bundling');
311+&usage if !&GetOptions(
213d4328 312+ 'recurse|r' => \( my $recurse_opt ),
d1a75c9f
WD
313+ 'simple-cmp|s' => \( my $ignore_ctime_and_inode ),
314+ 'check|c' => \( my $check_opt ),
213d4328
WD
315+ 'verbose|v+' => \( my $verbosity = 0 ),
316+ 'help|h' => \( my $help_opt ),
317+);
318+&usage if $help_opt;
319+
320+my $start_dir = cwd();
321+
322+my @dirs = @ARGV;
323+@dirs = '.' unless @dirs;
324+foreach (@dirs) {
325+ $_ = abs_path($_);
326+}
327+
328+$| = 1;
329+
d1a75c9f
WD
330+my $exit_code = 0;
331+
213d4328
WD
332+my $md4 = Digest::MD4->new;
333+my $md5 = Digest::MD5->new;
334+
335+while (@dirs) {
336+ my $dir = shift @dirs;
337+
338+ if (!chdir($dir)) {
339+ warn "Unable to chdir to $dir: $!\n";
340+ next;
341+ }
342+ if (!opendir(DP, '.')) {
343+ warn "Unable to opendir $dir: $!\n";
344+ next;
345+ }
346+
d1a75c9f
WD
347+ my $reldir = $dir;
348+ $reldir =~ s#^$start_dir(/|$)# $1 ? '' : '.' #eo;
213d4328 349+ if ($verbosity) {
213d4328 350+ print "$reldir ... ";
d1a75c9f 351+ print "\n" if $check_opt;
213d4328
WD
352+ }
353+
213d4328 354+ my %cache;
d1a75c9f
WD
355+ my $f_cnt = 0;
356+ if (open(FP, '<', $SUMS_FILE)) {
357+ while (<FP>) {
358+ chomp;
359+ my($sum4, $sum5, $size, $mtime, $ctime, $inode, $fn) = split(' ', $_, 7);
360+ $cache{$fn} = [ 0, $sum4, $sum5, $size, $mtime, $ctime & 0xFFFFFFFF, $inode & 0xFFFFFFFF ];
361+ $f_cnt++;
362+ }
363+ close FP;
364+ }
365+
213d4328 366+ my @subdirs;
d1a75c9f
WD
367+ my $d_cnt = 0;
368+ my $update_cnt = 0;
213d4328
WD
369+ while (defined(my $fn = readdir(DP))) {
370+ next if $fn =~ /^\.\.?$/ || $fn =~ /^\Q$SUMS_FILE\E$/o || -l $fn;
371+ if (-d _) {
372+ push(@subdirs, "$dir/$fn") unless $fn =~ /^(CVS|\.svn|\.git|\.bzr)$/;
373+ next;
374+ }
375+ next unless -f _;
376+
377+ my($size,$mtime,$ctime,$inode) = (stat(_))[7,9,10,1];
d1a75c9f
WD
378+ my $ref = $cache{$fn};
379+ if ($size == 0) {
380+ if (defined $ref) {
381+ delete $cache{$fn};
382+ $f_cnt--;
383+ if (!$check_opt && !$update_cnt++) {
384+ print "UPDATING\n" if $verbosity;
385+ }
386+ }
387+ next;
213d4328 388+ }
d1a75c9f 389+ $d_cnt++;
213d4328 390+
d1a75c9f 391+ if (!$check_opt) {
213d4328 392+ if (defined $ref) {
d1a75c9f
WD
393+ $$ref[0] = 1;
394+ if ($$ref[3] == $size
395+ && $$ref[4] == $mtime
396+ && ($ignore_ctime_and_inode || ($$ref[5] == $ctime && $$ref[6] == $inode))
397+ && $$ref[1] !~ /=/ && $$ref[2] !~ /=/) {
398+ next;
213d4328 399+ }
d1a75c9f
WD
400+ }
401+ if (!$update_cnt++) {
402+ print "UPDATING\n" if $verbosity;
213d4328
WD
403+ }
404+ }
213d4328 405+
d1a75c9f
WD
406+ if (!open(IN, $fn)) {
407+ print STDERR "Unable to read $fn: $!\n";
408+ if (defined $ref) {
213d4328 409+ delete $cache{$fn};
d1a75c9f 410+ $f_cnt--;
213d4328 411+ }
d1a75c9f
WD
412+ next;
413+ }
213d4328 414+
d1a75c9f
WD
415+ my($sum4, $sum5);
416+ while (1) {
417+ while (sysread(IN, $_, 64*1024)) {
418+ $md4->add($_);
419+ $md5->add($_);
213d4328 420+ }
d1a75c9f
WD
421+ $sum4 = $md4->hexdigest;
422+ $sum5 = $md5->hexdigest;
423+ print " $sum4 $sum5" if $verbosity > 2;
424+ print " $fn" if $verbosity > 1;
425+ my($size2,$mtime2,$ctime2,$inode2) = (stat(IN))[7,9,10,1];
426+ last if $size == $size2 && $mtime == $mtime2
427+ && ($ignore_ctime_and_inode || ($ctime == $ctime2 && $inode == $inode2));
428+ $size = $size2;
429+ $mtime = $mtime2;
430+ $ctime = $ctime2;
431+ $inode = $inode2;
432+ sysseek(IN, 0, 0);
433+ print " REREADING\n" if $verbosity > 1;
434+ }
213d4328 435+
d1a75c9f
WD
436+ close IN;
437+
438+ if ($check_opt) {
439+ my $dif;
440+ if (!defined $ref) {
441+ $dif = 'MISSING';
442+ } elsif ($sum4 ne $$ref[1] || $sum5 ne $$ref[2]) {
443+ $dif = 'FAILED';
444+ } else {
445+ print " OK\n" if $verbosity > 1;
446+ next;
447+ }
448+ if ($verbosity < 2) {
449+ print $verbosity ? ' ' : "$reldir/";
450+ print $fn;
213d4328 451+ }
d1a75c9f
WD
452+ print " $dif\n";
453+ $exit_code = 1;
454+ } else {
455+ print "\n" if $verbosity > 1;
456+ $cache{$fn} = [ 1, $sum4, $sum5, $size, $mtime, $ctime & 0xFFFFFFFF, $inode & 0xFFFFFFFF ];
457+ }
458+ }
459+
460+ closedir DP;
213d4328 461+
d1a75c9f
WD
462+ unshift(@dirs, sort @subdirs) if $recurse_opt;
463+
464+ if ($check_opt) {
465+ ;
466+ } elsif ($d_cnt == 0) {
467+ if ($f_cnt) {
468+ print "(removed $SUMS_FILE) " if $verbosity;
469+ unlink($SUMS_FILE);
213d4328 470+ }
d1a75c9f
WD
471+ print "empty\n" if $verbosity;
472+ } elsif ($update_cnt || $d_cnt != $f_cnt) {
473+ print "UPDATING\n" if $verbosity && !$update_cnt;
474+ open(FP, '>', $SUMS_FILE) or die "Unable to write $dir/$SUMS_FILE: $!\n";
213d4328 475+
213d4328
WD
476+ foreach my $fn (sort keys %cache) {
477+ my $ref = $cache{$fn};
d1a75c9f
WD
478+ my($found, $sum4, $sum5, $size, $mtime, $ctime, $inode) = @$ref;
479+ next unless $found;
213d4328
WD
480+ printf FP '%s %s %10d %10d %10d %10d %s' . "\n", $sum4, $sum5, $size, $mtime, $ctime, $inode, $fn;
481+ }
d1a75c9f 482+ close FP;
213d4328
WD
483+ } else {
484+ print "ok\n" if $verbosity;
485+ }
213d4328
WD
486+}
487+
d1a75c9f
WD
488+exit $exit_code;
489+
213d4328
WD
490+sub usage
491+{
492+ die <<EOT;
493+Usage: rsyncsums [OPTIONS] [DIRS]
494+
495+Options:
496+ -r, --recurse Update $SUMS_FILE files in subdirectories too.
497+ -s, --simple-cmp Ignore ctime and inode values when comparing identicality.
d1a75c9f 498+ -c, --check Check if the checksums are right (doesn't update).
213d4328
WD
499+ -v, --verbose Mention what we're doing. Repeat for more info.
500+ -h, --help Display this help message.
501+EOT
502+}