1 Optimize the ability of a mirror to send checksums.
3 This adds a sender optimization feature that allows a cache of checksums
4 to be used when the client specifies the --checksum option. The checksum
5 files (.rsyncsums) must be created by some other process (see the perl
6 script in the support dir for one way).
8 This option should be used by mirrors that contain files that get created and
9 not changed. There is a minimal amount of sanity-check information in the
10 .rsyncsums file (size and mtime) so that the sum files can be shared with your
13 To use this patch, run these commands for a successful build:
15 patch -p1 <patches/checksum-reading.diff
16 ./configure (optional if already run)
19 diff --git a/flist.c b/flist.c
22 @@ -121,6 +121,7 @@ static char tmp_sum[MAX_DIGEST_LEN];
23 static char empty_sum[MAX_DIGEST_LEN];
24 static int flist_count_offset; /* for --delete --progress */
25 static int dir_count = 0;
26 +static struct file_list *checksum_flist = NULL;
28 static void flist_sort_and_clean(struct file_list *flist, int strip_root);
29 static void output_flist(struct file_list *flist);
30 @@ -313,6 +314,186 @@ static void flist_done_allocating(struct file_list *flist)
31 flist->pool_boundary = ptr;
34 +/* The len count is the length of the basename + 1 for the null. */
35 +static int add_checksum(const char *dirname, const char *basename, int len,
36 + OFF_T file_length, time_t mtime, const char *sum)
38 + struct file_struct *file;
39 + int alloc_len, extra_len;
42 + if (len == 10+1 && *basename == '.' && strcmp(basename, ".rsyncsums") == 0)
44 + if (file_length == 0)
47 + extra_len = (file_extra_cnt + (file_length > 0xFFFFFFFFu) + SUM_EXTRA_CNT)
49 +#if EXTRA_ROUNDING > 0
50 + if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN))
51 + extra_len = (extra_len | (EXTRA_ROUNDING * EXTRA_LEN)) + EXTRA_LEN;
53 + alloc_len = FILE_STRUCT_LEN + extra_len + len;
54 + bp = pool_alloc(checksum_flist->file_pool, alloc_len, "add_checksum");
56 + memset(bp, 0, extra_len + FILE_STRUCT_LEN);
58 + file = (struct file_struct *)bp;
59 + bp += FILE_STRUCT_LEN;
61 + memcpy(bp, basename, len);
63 + file->mode = S_IFREG;
64 + file->modtime = mtime;
65 + file->len32 = (uint32)file_length;
66 + if (file_length > 0xFFFFFFFFu) {
67 + file->flags |= FLAG_LENGTH64;
68 + OPT_EXTRA(file, 0)->unum = (uint32)(file_length >> 32);
70 + file->dirname = dirname;
72 + memcpy(bp, sum, checksum_len);
74 + flist_expand(checksum_flist, 1);
75 + checksum_flist->files[checksum_flist->used++] = file;
77 + checksum_flist->sorted = checksum_flist->files;
82 +/* The direname value must remain unchanged during the lifespan of the
83 + * created checksum_flist object because we use it directly. */
84 +static void read_checksums(const char *dirname)
86 + char line[MAXPATHLEN+1024], fbuf[MAXPATHLEN], sum[MAX_DIGEST_LEN];
93 + if (checksum_flist) {
94 + /* Reset the pool memory and empty the file-list array. */
95 + pool_free_old(checksum_flist->file_pool,
96 + pool_boundary(checksum_flist->file_pool, 0));
97 + checksum_flist->used = 0;
99 + checksum_flist = flist_new(FLIST_TEMP, "read_checksums");
101 + checksum_flist->low = 0;
102 + checksum_flist->high = -1;
107 + dlen = strlcpy(fbuf, dirname, sizeof fbuf);
108 + if (dlen >= (int)sizeof fbuf)
111 + fbuf[dlen++] = '/';
114 + strlcpy(fbuf+dlen, ".rsyncsums", sizeof fbuf - dlen);
115 + if (!(fp = fopen(fbuf, "r")))
118 + while (fgets(line, sizeof line, fp)) {
120 + if (protocol_version >= 30) {
121 + char *alt_sum = cp;
123 + while (*++cp == '=') {}
125 + while (isXDigit(cp)) cp++;
126 + if (cp - alt_sum != MD4_DIGEST_LEN*2 || *cp != ' ')
128 + while (*++cp == ' ') {}
134 + for (i = 0; i < checksum_len*2; i++, cp++) {
136 + if (isXDigit(cp)) {
140 + x = (*cp & 0xF) + 9;
153 + while (*++cp == ' ') {}
155 + if (protocol_version < 30) {
156 + char *alt_sum = cp;
158 + while (*++cp == '=') {}
160 + while (isXDigit(cp)) cp++;
161 + if (cp - alt_sum != MD5_DIGEST_LEN*2 || *cp != ' ')
163 + while (*++cp == ' ') {}
167 + while (isDigit(cp))
168 + file_length = file_length * 10 + *cp++ - '0';
171 + while (*++cp == ' ') {}
174 + while (isDigit(cp))
175 + mtime = mtime * 10 + *cp++ - '0';
178 + while (*++cp == ' ') {}
180 + /* Ignore ctime. */
181 + while (isDigit(cp))
185 + while (*++cp == ' ') {}
187 + /* Ignore inode. */
188 + while (isDigit(cp))
192 + while (*++cp == ' ') {}
195 + while (len && (cp[len-1] == '\n' || cp[len-1] == '\r'))
199 + cp[len++] = '\0'; /* len now counts the null */
200 + if (strchr(cp, '/'))
202 + if (len > MAXPATHLEN)
205 + strlcpy(fbuf+dlen, cp, sizeof fbuf - dlen);
207 + add_checksum(dirname, cp, len, file_length, mtime, sum);
211 + flist_sort_and_clean(checksum_flist, 0);
214 int push_pathname(const char *dir, int len)
217 @@ -1003,7 +1184,7 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
218 STRUCT_STAT *stp, int flags, int filter_level)
220 static char *lastdir;
221 - static int lastdir_len = -1;
222 + static int lastdir_len = -2;
223 struct file_struct *file;
224 char thisname[MAXPATHLEN];
225 char linkname[MAXPATHLEN];
226 @@ -1136,9 +1317,16 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
227 memcpy(lastdir, thisname, len);
230 + if (always_checksum && am_sender && flist)
231 + read_checksums(lastdir);
236 + if (always_checksum && am_sender && flist && lastdir_len == -2) {
238 + read_checksums("");
241 basename_len = strlen(basename) + 1; /* count the '\0' */
244 @@ -1214,11 +1402,21 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
248 - if (always_checksum && am_sender && S_ISREG(st.st_mode))
249 - file_checksum(thisname, tmp_sum, st.st_size);
251 F_PATHNAME(file) = pathname;
253 + if (always_checksum && am_sender && S_ISREG(st.st_mode)) {
255 + if (flist && (j = flist_find(checksum_flist, file)) >= 0) {
256 + struct file_struct *fp = checksum_flist->sorted[j];
257 + if (F_LENGTH(fp) == st.st_size
258 + && fp->modtime == st.st_mtime)
259 + memcpy(tmp_sum, F_SUM(fp), MAX_DIGEST_LEN);
261 + file_checksum(thisname, tmp_sum, st.st_size);
263 + file_checksum(thisname, tmp_sum, st.st_size);
266 /* This code is only used by the receiver when it is building
267 * a list of files for a delete pass. */
268 if (keep_dirlinks && linkname_len && flist) {
269 @@ -2074,7 +2272,8 @@ struct file_list *send_file_list(int f, int argc, char *argv[])
270 * file-list to check if this is a 1-file xfer. */
271 send_extra_file_list(f, 1);
279 diff --git a/ifuncs.h b/ifuncs.h
282 @@ -64,6 +64,12 @@ isDigit(const char *ptr)
286 +isXDigit(const char *ptr)
288 + return isxdigit(*(unsigned char *)ptr);
292 isPrint(const char *ptr)
294 return isprint(*(unsigned char *)ptr);
295 diff --git a/support/rsyncsums b/support/rsyncsums
298 +++ b/support/rsyncsums
304 +use Cwd qw(abs_path cwd);
308 +our $SUMS_FILE = '.rsyncsums';
310 +&Getopt::Long::Configure('bundling');
311 +&usage if !&GetOptions(
312 + 'recurse|r' => \( my $recurse_opt ),
313 + 'simple-cmp|s' => \( my $ignore_ctime_and_inode ),
314 + 'check|c' => \( my $check_opt ),
315 + 'verbose|v+' => \( my $verbosity = 0 ),
316 + 'help|h' => \( my $help_opt ),
318 +&usage if $help_opt;
320 +my $start_dir = cwd();
323 +@dirs = '.' unless @dirs;
332 +my $md4 = Digest::MD4->new;
333 +my $md5 = Digest::MD5->new;
336 + my $dir = shift @dirs;
338 + if (!chdir($dir)) {
339 + warn "Unable to chdir to $dir: $!\n";
342 + if (!opendir(DP, '.')) {
343 + warn "Unable to opendir $dir: $!\n";
348 + $reldir =~ s#^$start_dir(/|$)# $1 ? '' : '.' #eo;
350 + print "$reldir ... ";
351 + print "\n" if $check_opt;
356 + if (open(FP, '<', $SUMS_FILE)) {
359 + my($sum4, $sum5, $size, $mtime, $ctime, $inode, $fn) = split(' ', $_, 7);
360 + $cache{$fn} = [ 0, $sum4, $sum5, $size, $mtime, $ctime & 0xFFFFFFFF, $inode & 0xFFFFFFFF ];
368 + my $update_cnt = 0;
369 + while (defined(my $fn = readdir(DP))) {
370 + next if $fn =~ /^\.\.?$/ || $fn =~ /^\Q$SUMS_FILE\E$/o || -l $fn;
372 + push(@subdirs, "$dir/$fn") unless $fn =~ /^(CVS|\.svn|\.git|\.bzr)$/;
377 + my($size,$mtime,$ctime,$inode) = (stat(_))[7,9,10,1];
378 + my $ref = $cache{$fn};
380 + if (defined $ref) {
381 + delete $cache{$fn};
383 + if (!$check_opt && !$update_cnt++) {
384 + print "UPDATING\n" if $verbosity;
392 + if (defined $ref) {
394 + if ($$ref[3] == $size
395 + && $$ref[4] == $mtime
396 + && ($ignore_ctime_and_inode || ($$ref[5] == $ctime && $$ref[6] == $inode))
397 + && $$ref[1] !~ /=/ && $$ref[2] !~ /=/) {
401 + if (!$update_cnt++) {
402 + print "UPDATING\n" if $verbosity;
406 + if (!open(IN, $fn)) {
407 + print STDERR "Unable to read $fn: $!\n";
408 + if (defined $ref) {
409 + delete $cache{$fn};
417 + while (sysread(IN, $_, 64*1024)) {
421 + $sum4 = $md4->hexdigest;
422 + $sum5 = $md5->hexdigest;
423 + print " $sum4 $sum5" if $verbosity > 2;
424 + print " $fn" if $verbosity > 1;
425 + my($size2,$mtime2,$ctime2,$inode2) = (stat(IN))[7,9,10,1];
426 + last if $size == $size2 && $mtime == $mtime2
427 + && ($ignore_ctime_and_inode || ($ctime == $ctime2 && $inode == $inode2));
433 + print " REREADING\n" if $verbosity > 1;
440 + if (!defined $ref) {
442 + } elsif ($sum4 ne $$ref[1] || $sum5 ne $$ref[2]) {
445 + print " OK\n" if $verbosity > 1;
448 + if ($verbosity < 2) {
449 + print $verbosity ? ' ' : "$reldir/";
455 + print "\n" if $verbosity > 1;
456 + $cache{$fn} = [ 1, $sum4, $sum5, $size, $mtime, $ctime & 0xFFFFFFFF, $inode & 0xFFFFFFFF ];
462 + unshift(@dirs, sort @subdirs) if $recurse_opt;
466 + } elsif ($d_cnt == 0) {
468 + print "(removed $SUMS_FILE) " if $verbosity;
469 + unlink($SUMS_FILE);
471 + print "empty\n" if $verbosity;
472 + } elsif ($update_cnt || $d_cnt != $f_cnt) {
473 + print "UPDATING\n" if $verbosity && !$update_cnt;
474 + open(FP, '>', $SUMS_FILE) or die "Unable to write $dir/$SUMS_FILE: $!\n";
476 + foreach my $fn (sort keys %cache) {
477 + my $ref = $cache{$fn};
478 + my($found, $sum4, $sum5, $size, $mtime, $ctime, $inode) = @$ref;
479 + next unless $found;
480 + printf FP '%s %s %10d %10d %10d %10d %s' . "\n", $sum4, $sum5, $size, $mtime, $ctime, $inode, $fn;
484 + print "ok\n" if $verbosity;
493 +Usage: rsyncsums [OPTIONS] [DIRS]
496 + -r, --recurse Update $SUMS_FILE files in subdirectories too.
497 + -s, --simple-cmp Ignore ctime and inode values when comparing identicality.
498 + -c, --check Check if the checksums are right (doesn't update).
499 + -v, --verbose Mention what we're doing. Repeat for more info.
500 + -h, --help Display this help message.