1 Optimize the ability of a mirror to send checksums.
3 This adds a sender optimization feature that allows a cache of checksums
4 to be used when the client specifies the --checksum option. The checksum
5 files (.rsyncsums) must be created by some other process (see the perl
6 script in the support dir for one way).
8 This option should be used by mirrors that contain files that get created and
9 not changed. There is a minimal amount of sanity-check information in the
10 .rsyncsums file (size and mtime) so that the sum files can be shared with your
13 To use this patch, run these commands for a successful build:
15 patch -p1 <patches/checksum4mirrors.diff
16 ./configure (optional if already run)
21 @@ -117,6 +117,7 @@ static char empty_sum[MAX_DIGEST_LEN];
22 static int flist_count_offset; /* for --delete --progress */
23 static int dir_count = 0;
24 static int high_hlink_ndx;
25 +static struct file_list *checksum_flist = NULL;
27 static void clean_flist(struct file_list *flist, int strip_root);
28 static void output_flist(struct file_list *flist);
29 @@ -304,6 +305,186 @@ static void flist_done_allocating(struct
30 flist->pool_boundary = ptr;
33 +/* The len count is the length of the basename + 1 for the null. */
34 +static int add_checksum(const char *dirname, const char *basename, int len,
35 + OFF_T file_length, time_t mtime, const char *sum)
37 + struct file_struct *file;
38 + int alloc_len, extra_len;
41 + if (len == 10+1 && *basename == '.' && strcmp(basename, ".rsyncsums") == 0)
43 + if (file_length == 0)
46 + extra_len = (file_extra_cnt + (file_length > 0xFFFFFFFFu) + SUM_EXTRA_CNT)
48 +#if EXTRA_ROUNDING > 0
49 + if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN))
50 + extra_len = (extra_len | (EXTRA_ROUNDING * EXTRA_LEN)) + EXTRA_LEN;
52 + alloc_len = FILE_STRUCT_LEN + extra_len + len;
53 + bp = pool_alloc(checksum_flist->file_pool, alloc_len, "add_checksum");
55 + memset(bp, 0, extra_len + FILE_STRUCT_LEN);
57 + file = (struct file_struct *)bp;
58 + bp += FILE_STRUCT_LEN;
60 + memcpy(bp, basename, len);
62 + file->mode = S_IFREG;
63 + file->modtime = mtime;
64 + file->len32 = (uint32)file_length;
65 + if (file_length > 0xFFFFFFFFu) {
66 + file->flags |= FLAG_LENGTH64;
67 + OPT_EXTRA(file, 0)->unum = (uint32)(file_length >> 32);
69 + file->dirname = dirname;
71 + memcpy(bp, sum, checksum_len);
73 + flist_expand(checksum_flist, 1);
74 + checksum_flist->files[checksum_flist->used++] = file;
76 + checksum_flist->sorted = checksum_flist->files;
81 +/* The direname value must remain unchanged during the lifespan of the
82 + * created checksum_flist object because we use it directly. */
83 +static void read_checksums(const char *dirname)
85 + char line[MAXPATHLEN+1024], fbuf[MAXPATHLEN], sum[MAX_DIGEST_LEN];
92 + if (checksum_flist) {
93 + /* Reset the pool memory and empty the file-list array. */
94 + pool_free_old(checksum_flist->file_pool,
95 + pool_boundary(checksum_flist->file_pool, 0));
96 + checksum_flist->used = 0;
98 + checksum_flist = flist_new(FLIST_TEMP, "read_checksums");
100 + checksum_flist->low = 0;
101 + checksum_flist->high = -1;
106 + dlen = strlcpy(fbuf, dirname, sizeof fbuf);
107 + if (dlen >= (int)sizeof fbuf)
110 + fbuf[dlen++] = '/';
113 + strlcpy(fbuf+dlen, ".rsyncsums", sizeof fbuf - dlen);
114 + if (!(fp = fopen(fbuf, "r")))
117 + while (fgets(line, sizeof line, fp)) {
119 + if (protocol_version >= 30) {
120 + char *alt_sum = cp;
122 + while (*++cp == '=') {}
124 + while (isXDigit(cp)) cp++;
125 + if (cp - alt_sum != MD4_DIGEST_LEN*2 || *cp != ' ')
127 + while (*++cp == ' ') {}
133 + for (i = 0; i < checksum_len*2; i++, cp++) {
135 + if (isXDigit(cp)) {
139 + x = (*cp & 0xF) + 9;
152 + while (*++cp == ' ') {}
154 + if (protocol_version < 30) {
155 + char *alt_sum = cp;
157 + while (*++cp == '=') {}
159 + while (isXDigit(cp)) cp++;
160 + if (cp - alt_sum != MD5_DIGEST_LEN*2 || *cp != ' ')
162 + while (*++cp == ' ') {}
166 + while (isDigit(cp))
167 + file_length = file_length * 10 + *cp++ - '0';
170 + while (*++cp == ' ') {}
173 + while (isDigit(cp))
174 + mtime = mtime * 10 + *cp++ - '0';
177 + while (*++cp == ' ') {}
179 + /* Ignore ctime. */
180 + while (isDigit(cp))
184 + while (*++cp == ' ') {}
186 + /* Ignore inode. */
187 + while (isDigit(cp))
191 + while (*++cp == ' ') {}
194 + while (len && (cp[len-1] == '\n' || cp[len-1] == '\r'))
198 + cp[len++] = '\0'; /* len now counts the null */
199 + if (strchr(cp, '/'))
201 + if (len > MAXPATHLEN)
204 + strlcpy(fbuf+dlen, cp, sizeof fbuf - dlen);
206 + add_checksum(dirname, cp, len, file_length, mtime, sum);
210 + clean_flist(checksum_flist, 0);
213 int push_pathname(const char *dir, int len)
216 @@ -989,7 +1170,7 @@ struct file_struct *make_file(const char
217 STRUCT_STAT *stp, int flags, int filter_level)
219 static char *lastdir;
220 - static int lastdir_len = -1;
221 + static int lastdir_len = -2;
222 struct file_struct *file;
223 char thisname[MAXPATHLEN];
224 char linkname[MAXPATHLEN];
225 @@ -1119,9 +1300,16 @@ struct file_struct *make_file(const char
226 memcpy(lastdir, thisname, len);
229 + if (always_checksum && am_sender && flist)
230 + read_checksums(lastdir);
235 + if (always_checksum && am_sender && flist && lastdir_len == -2) {
237 + read_checksums("");
240 basename_len = strlen(basename) + 1; /* count the '\0' */
243 @@ -1197,11 +1385,21 @@ struct file_struct *make_file(const char
247 - if (always_checksum && am_sender && S_ISREG(st.st_mode))
248 - file_checksum(thisname, tmp_sum, st.st_size);
250 F_PATHNAME(file) = pathname;
252 + if (always_checksum && am_sender && S_ISREG(st.st_mode)) {
254 + if (flist && (j = flist_find(checksum_flist, file)) >= 0) {
255 + struct file_struct *fp = checksum_flist->sorted[j];
256 + if (F_LENGTH(fp) == st.st_size
257 + && fp->modtime == st.st_mtime)
258 + memcpy(tmp_sum, F_SUM(fp), MAX_DIGEST_LEN);
260 + file_checksum(thisname, tmp_sum, st.st_size);
262 + file_checksum(thisname, tmp_sum, st.st_size);
265 /* This code is only used by the receiver when it is building
266 * a list of files for a delete pass. */
267 if (keep_dirlinks && linkname_len && flist) {
268 @@ -2051,7 +2249,11 @@ struct file_list *send_file_list(int f,
269 * file-list to check if this is a 1-file xfer. */
270 send_extra_file_list(f, 1);
276 + if (checksum_updating && always_checksum && flist_eof)
277 + read_checksums(NULL);
283 @@ -64,6 +64,12 @@ isDigit(const char *ptr)
287 +isXDigit(const char *ptr)
289 + return isxdigit(*(unsigned char *)ptr);
293 isPrint(const char *ptr)
295 return isprint(*(unsigned char *)ptr);
296 --- old/support/rsyncsums
297 +++ new/support/rsyncsums
303 +use Cwd qw(abs_path cwd);
307 +our $SUMS_FILE = '.rsyncsums';
309 +&Getopt::Long::Configure('bundling');
310 +&usage if !&GetOptions(
311 + 'recurse|r' => \( my $recurse_opt ),
312 + 'simple-cmp|s' => \( my $ignore_ctime_and_inode ),
313 + 'check|c' => \( my $check_opt ),
314 + 'verbose|v+' => \( my $verbosity = 0 ),
315 + 'help|h' => \( my $help_opt ),
317 +&usage if $help_opt;
319 +my $start_dir = cwd();
322 +@dirs = '.' unless @dirs;
331 +my $md4 = Digest::MD4->new;
332 +my $md5 = Digest::MD5->new;
335 + my $dir = shift @dirs;
337 + if (!chdir($dir)) {
338 + warn "Unable to chdir to $dir: $!\n";
341 + if (!opendir(DP, '.')) {
342 + warn "Unable to opendir $dir: $!\n";
347 + $reldir =~ s#^$start_dir(/|$)# $1 ? '' : '.' #eo;
349 + print "$reldir ... ";
350 + print "\n" if $check_opt;
355 + if (open(FP, '<', $SUMS_FILE)) {
358 + my($sum4, $sum5, $size, $mtime, $ctime, $inode, $fn) = split(' ', $_, 7);
359 + $cache{$fn} = [ 0, $sum4, $sum5, $size, $mtime, $ctime & 0xFFFFFFFF, $inode & 0xFFFFFFFF ];
367 + my $update_cnt = 0;
368 + while (defined(my $fn = readdir(DP))) {
369 + next if $fn =~ /^\.\.?$/ || $fn =~ /^\Q$SUMS_FILE\E$/o || -l $fn;
371 + push(@subdirs, "$dir/$fn") unless $fn =~ /^(CVS|\.svn|\.git|\.bzr)$/;
376 + my($size,$mtime,$ctime,$inode) = (stat(_))[7,9,10,1];
377 + my $ref = $cache{$fn};
379 + if (defined $ref) {
380 + delete $cache{$fn};
382 + if (!$check_opt && !$update_cnt++) {
383 + print "UPDATING\n" if $verbosity;
391 + if (defined $ref) {
393 + if ($$ref[3] == $size
394 + && $$ref[4] == $mtime
395 + && ($ignore_ctime_and_inode || ($$ref[5] == $ctime && $$ref[6] == $inode))
396 + && $$ref[1] !~ /=/ && $$ref[2] !~ /=/) {
400 + if (!$update_cnt++) {
401 + print "UPDATING\n" if $verbosity;
405 + if (!open(IN, $fn)) {
406 + print STDERR "Unable to read $fn: $!\n";
407 + if (defined $ref) {
408 + delete $cache{$fn};
416 + while (sysread(IN, $_, 64*1024)) {
420 + $sum4 = $md4->hexdigest;
421 + $sum5 = $md5->hexdigest;
422 + print " $sum4 $sum5" if $verbosity > 2;
423 + print " $fn" if $verbosity > 1;
424 + my($size2,$mtime2,$ctime2,$inode2) = (stat(IN))[7,9,10,1];
425 + last if $size == $size2 && $mtime == $mtime2
426 + && ($ignore_ctime_and_inode || ($ctime == $ctime2 && $inode == $inode2));
432 + print " REREADING\n" if $verbosity > 1;
439 + if (!defined $ref) {
441 + } elsif ($sum4 ne $$ref[1] || $sum5 ne $$ref[2]) {
444 + print " OK\n" if $verbosity > 1;
447 + if ($verbosity < 2) {
448 + print $verbosity ? ' ' : "$reldir/";
454 + print "\n" if $verbosity > 1;
455 + $cache{$fn} = [ 1, $sum4, $sum5, $size, $mtime, $ctime & 0xFFFFFFFF, $inode & 0xFFFFFFFF ];
461 + unshift(@dirs, sort @subdirs) if $recurse_opt;
465 + } elsif ($d_cnt == 0) {
467 + print "(removed $SUMS_FILE) " if $verbosity;
468 + unlink($SUMS_FILE);
470 + print "empty\n" if $verbosity;
471 + } elsif ($update_cnt || $d_cnt != $f_cnt) {
472 + print "UPDATING\n" if $verbosity && !$update_cnt;
473 + open(FP, '>', $SUMS_FILE) or die "Unable to write $dir/$SUMS_FILE: $!\n";
475 + foreach my $fn (sort keys %cache) {
476 + my $ref = $cache{$fn};
477 + my($found, $sum4, $sum5, $size, $mtime, $ctime, $inode) = @$ref;
478 + next unless $found;
479 + printf FP '%s %s %10d %10d %10d %10d %s' . "\n", $sum4, $sum5, $size, $mtime, $ctime, $inode, $fn;
483 + print "ok\n" if $verbosity;
492 +Usage: rsyncsums [OPTIONS] [DIRS]
495 + -r, --recurse Update $SUMS_FILE files in subdirectories too.
496 + -s, --simple-cmp Ignore ctime and inode values when comparing identicality.
497 + -c, --check Check if the checksums are right (doesn't update).
498 + -v, --verbose Mention what we're doing. Repeat for more info.
499 + -h, --help Display this help message.