| 1 | Optimize the ability of a mirror to send checksums. |
| 2 | |
| 3 | This adds a sender optimization feature that allows a cache of checksums |
| 4 | to be used when the client specifies the --checksum option. The checksum |
| 5 | files (.rsyncsums) must be created by some other process (see the perl |
| 6 | script in the support dir for one way). |
| 7 | |
| 8 | This option should be used by mirrors that contain files that get created and |
| 9 | not changed. There is a minimal amount of sanity-check information in the |
| 10 | .rsyncsums file (size and mtime) so that the sum files can be shared with your |
| 11 | mirror network. |
| 12 | |
| 13 | To use this patch, run these commands for a successful build: |
| 14 | |
| 15 | patch -p1 <patches/checksum4mirrors.diff |
| 16 | ./configure (optional if already run) |
| 17 | make |
| 18 | |
| 19 | --- old/flist.c |
| 20 | +++ new/flist.c |
| 21 | @@ -117,6 +117,7 @@ static char empty_sum[MAX_DIGEST_LEN]; |
| 22 | static int flist_count_offset; /* for --delete --progress */ |
| 23 | static int dir_count = 0; |
| 24 | static int high_hlink_ndx; |
| 25 | +static struct file_list *checksum_flist = NULL; |
| 26 | |
| 27 | static void clean_flist(struct file_list *flist, int strip_root); |
| 28 | static void output_flist(struct file_list *flist); |
| 29 | @@ -304,6 +305,186 @@ static void flist_done_allocating(struct |
| 30 | flist->pool_boundary = ptr; |
| 31 | } |
| 32 | |
| 33 | +/* The len count is the length of the basename + 1 for the null. */ |
| 34 | +static int add_checksum(const char *dirname, const char *basename, int len, |
| 35 | + OFF_T file_length, time_t mtime, const char *sum) |
| 36 | +{ |
| 37 | + struct file_struct *file; |
| 38 | + int alloc_len, extra_len; |
| 39 | + char *bp; |
| 40 | + |
| 41 | + if (len == 10+1 && *basename == '.' && strcmp(basename, ".rsyncsums") == 0) |
| 42 | + return 0; |
| 43 | + if (file_length == 0) |
| 44 | + return 0; |
| 45 | + |
| 46 | + extra_len = (file_extra_cnt + (file_length > 0xFFFFFFFFu) + SUM_EXTRA_CNT) |
| 47 | + * EXTRA_LEN; |
| 48 | +#if EXTRA_ROUNDING > 0 |
| 49 | + if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN)) |
| 50 | + extra_len = (extra_len | (EXTRA_ROUNDING * EXTRA_LEN)) + EXTRA_LEN; |
| 51 | +#endif |
| 52 | + alloc_len = FILE_STRUCT_LEN + extra_len + len; |
| 53 | + bp = pool_alloc(checksum_flist->file_pool, alloc_len, "add_checksum"); |
| 54 | + |
| 55 | + memset(bp, 0, extra_len + FILE_STRUCT_LEN); |
| 56 | + bp += extra_len; |
| 57 | + file = (struct file_struct *)bp; |
| 58 | + bp += FILE_STRUCT_LEN; |
| 59 | + |
| 60 | + memcpy(bp, basename, len); |
| 61 | + |
| 62 | + file->mode = S_IFREG; |
| 63 | + file->modtime = mtime; |
| 64 | + file->len32 = (uint32)file_length; |
| 65 | + if (file_length > 0xFFFFFFFFu) { |
| 66 | + file->flags |= FLAG_LENGTH64; |
| 67 | + OPT_EXTRA(file, 0)->unum = (uint32)(file_length >> 32); |
| 68 | + } |
| 69 | + file->dirname = dirname; |
| 70 | + bp = F_SUM(file); |
| 71 | + memcpy(bp, sum, checksum_len); |
| 72 | + |
| 73 | + flist_expand(checksum_flist, 1); |
| 74 | + checksum_flist->files[checksum_flist->used++] = file; |
| 75 | + |
| 76 | + checksum_flist->sorted = checksum_flist->files; |
| 77 | + |
| 78 | + return 1; |
| 79 | +} |
| 80 | + |
| 81 | +/* The direname value must remain unchanged during the lifespan of the |
| 82 | + * created checksum_flist object because we use it directly. */ |
| 83 | +static void read_checksums(const char *dirname) |
| 84 | +{ |
| 85 | + char line[MAXPATHLEN+1024], fbuf[MAXPATHLEN], sum[MAX_DIGEST_LEN]; |
| 86 | + OFF_T file_length; |
| 87 | + time_t mtime; |
| 88 | + int len, dlen, i; |
| 89 | + char *cp; |
| 90 | + FILE *fp; |
| 91 | + |
| 92 | + if (checksum_flist) { |
| 93 | + /* Reset the pool memory and empty the file-list array. */ |
| 94 | + pool_free_old(checksum_flist->file_pool, |
| 95 | + pool_boundary(checksum_flist->file_pool, 0)); |
| 96 | + checksum_flist->used = 0; |
| 97 | + } else |
| 98 | + checksum_flist = flist_new(FLIST_TEMP, "read_checksums"); |
| 99 | + |
| 100 | + checksum_flist->low = 0; |
| 101 | + checksum_flist->high = -1; |
| 102 | + |
| 103 | + if (!dirname) |
| 104 | + return; |
| 105 | + |
| 106 | + dlen = strlcpy(fbuf, dirname, sizeof fbuf); |
| 107 | + if (dlen >= (int)sizeof fbuf) |
| 108 | + return; |
| 109 | + if (dlen) |
| 110 | + fbuf[dlen++] = '/'; |
| 111 | + else |
| 112 | + dirname = NULL; |
| 113 | + strlcpy(fbuf+dlen, ".rsyncsums", sizeof fbuf - dlen); |
| 114 | + if (!(fp = fopen(fbuf, "r"))) |
| 115 | + return; |
| 116 | + |
| 117 | + while (fgets(line, sizeof line, fp)) { |
| 118 | + cp = line; |
| 119 | + if (protocol_version >= 30) { |
| 120 | + char *alt_sum = cp; |
| 121 | + if (*cp == '=') |
| 122 | + while (*++cp == '=') {} |
| 123 | + else |
| 124 | + while (isXDigit(cp)) cp++; |
| 125 | + if (cp - alt_sum != MD4_DIGEST_LEN*2 || *cp != ' ') |
| 126 | + break; |
| 127 | + while (*++cp == ' ') {} |
| 128 | + } |
| 129 | + |
| 130 | + if (*cp == '=') { |
| 131 | + continue; |
| 132 | + } else { |
| 133 | + for (i = 0; i < checksum_len*2; i++, cp++) { |
| 134 | + int x; |
| 135 | + if (isXDigit(cp)) { |
| 136 | + if (isDigit(cp)) |
| 137 | + x = *cp - '0'; |
| 138 | + else |
| 139 | + x = (*cp & 0xF) + 9; |
| 140 | + } else { |
| 141 | + cp = ""; |
| 142 | + break; |
| 143 | + } |
| 144 | + if (i & 1) |
| 145 | + sum[i/2] |= x; |
| 146 | + else |
| 147 | + sum[i/2] = x << 4; |
| 148 | + } |
| 149 | + } |
| 150 | + if (*cp != ' ') |
| 151 | + break; |
| 152 | + while (*++cp == ' ') {} |
| 153 | + |
| 154 | + if (protocol_version < 30) { |
| 155 | + char *alt_sum = cp; |
| 156 | + if (*cp == '=') |
| 157 | + while (*++cp == '=') {} |
| 158 | + else |
| 159 | + while (isXDigit(cp)) cp++; |
| 160 | + if (cp - alt_sum != MD5_DIGEST_LEN*2 || *cp != ' ') |
| 161 | + break; |
| 162 | + while (*++cp == ' ') {} |
| 163 | + } |
| 164 | + |
| 165 | + file_length = 0; |
| 166 | + while (isDigit(cp)) |
| 167 | + file_length = file_length * 10 + *cp++ - '0'; |
| 168 | + if (*cp != ' ') |
| 169 | + break; |
| 170 | + while (*++cp == ' ') {} |
| 171 | + |
| 172 | + mtime = 0; |
| 173 | + while (isDigit(cp)) |
| 174 | + mtime = mtime * 10 + *cp++ - '0'; |
| 175 | + if (*cp != ' ') |
| 176 | + break; |
| 177 | + while (*++cp == ' ') {} |
| 178 | + |
| 179 | + /* Ignore ctime. */ |
| 180 | + while (isDigit(cp)) |
| 181 | + cp++; |
| 182 | + if (*cp != ' ') |
| 183 | + break; |
| 184 | + while (*++cp == ' ') {} |
| 185 | + |
| 186 | + /* Ignore inode. */ |
| 187 | + while (isDigit(cp)) |
| 188 | + cp++; |
| 189 | + if (*cp != ' ') |
| 190 | + break; |
| 191 | + while (*++cp == ' ') {} |
| 192 | + |
| 193 | + len = strlen(cp); |
| 194 | + while (len && (cp[len-1] == '\n' || cp[len-1] == '\r')) |
| 195 | + len--; |
| 196 | + if (!len) |
| 197 | + break; |
| 198 | + cp[len++] = '\0'; /* len now counts the null */ |
| 199 | + if (strchr(cp, '/')) |
| 200 | + break; |
| 201 | + if (len > MAXPATHLEN) |
| 202 | + continue; |
| 203 | + |
| 204 | + strlcpy(fbuf+dlen, cp, sizeof fbuf - dlen); |
| 205 | + |
| 206 | + add_checksum(dirname, cp, len, file_length, mtime, sum); |
| 207 | + } |
| 208 | + fclose(fp); |
| 209 | + |
| 210 | + clean_flist(checksum_flist, 0); |
| 211 | +} |
| 212 | + |
| 213 | int push_pathname(const char *dir, int len) |
| 214 | { |
| 215 | if (dir == pathname) |
| 216 | @@ -989,7 +1170,7 @@ struct file_struct *make_file(const char |
| 217 | STRUCT_STAT *stp, int flags, int filter_level) |
| 218 | { |
| 219 | static char *lastdir; |
| 220 | - static int lastdir_len = -1; |
| 221 | + static int lastdir_len = -2; |
| 222 | struct file_struct *file; |
| 223 | char thisname[MAXPATHLEN]; |
| 224 | char linkname[MAXPATHLEN]; |
| 225 | @@ -1119,9 +1300,16 @@ struct file_struct *make_file(const char |
| 226 | memcpy(lastdir, thisname, len); |
| 227 | lastdir[len] = '\0'; |
| 228 | lastdir_len = len; |
| 229 | + if (always_checksum && am_sender && flist) |
| 230 | + read_checksums(lastdir); |
| 231 | } |
| 232 | - } else |
| 233 | + } else { |
| 234 | basename = thisname; |
| 235 | + if (always_checksum && am_sender && flist && lastdir_len == -2) { |
| 236 | + lastdir_len = -1; |
| 237 | + read_checksums(""); |
| 238 | + } |
| 239 | + } |
| 240 | basename_len = strlen(basename) + 1; /* count the '\0' */ |
| 241 | |
| 242 | #ifdef SUPPORT_LINKS |
| 243 | @@ -1197,11 +1385,21 @@ struct file_struct *make_file(const char |
| 244 | } |
| 245 | #endif |
| 246 | |
| 247 | - if (always_checksum && am_sender && S_ISREG(st.st_mode)) |
| 248 | - file_checksum(thisname, tmp_sum, st.st_size); |
| 249 | - |
| 250 | F_PATHNAME(file) = pathname; |
| 251 | |
| 252 | + if (always_checksum && am_sender && S_ISREG(st.st_mode)) { |
| 253 | + int j; |
| 254 | + if (flist && (j = flist_find(checksum_flist, file)) >= 0) { |
| 255 | + struct file_struct *fp = checksum_flist->sorted[j]; |
| 256 | + if (F_LENGTH(fp) == st.st_size |
| 257 | + && fp->modtime == st.st_mtime) |
| 258 | + memcpy(tmp_sum, F_SUM(fp), MAX_DIGEST_LEN); |
| 259 | + else |
| 260 | + file_checksum(thisname, tmp_sum, st.st_size); |
| 261 | + } else |
| 262 | + file_checksum(thisname, tmp_sum, st.st_size); |
| 263 | + } |
| 264 | + |
| 265 | /* This code is only used by the receiver when it is building |
| 266 | * a list of files for a delete pass. */ |
| 267 | if (keep_dirlinks && linkname_len && flist) { |
| 268 | --- old/ifuncs.h |
| 269 | +++ new/ifuncs.h |
| 270 | @@ -64,6 +64,12 @@ isDigit(const char *ptr) |
| 271 | } |
| 272 | |
| 273 | static inline int |
| 274 | +isXDigit(const char *ptr) |
| 275 | +{ |
| 276 | + return isxdigit(*(unsigned char *)ptr); |
| 277 | +} |
| 278 | + |
| 279 | +static inline int |
| 280 | isPrint(const char *ptr) |
| 281 | { |
| 282 | return isprint(*(unsigned char *)ptr); |
| 283 | --- old/patches/checksum-updating.diff |
| 284 | +++ new/patches/checksum-updating.diff |
| 285 | @@ -476,15 +476,6 @@ To use this patch, run these commands fo |
| 286 | |
| 287 | return flist; |
| 288 | } |
| 289 | -@@ -2320,7 +2673,7 @@ void flist_free(struct file_list *flist) |
| 290 | - |
| 291 | - if (!flist->prev || !flist_cnt) |
| 292 | - pool_destroy(flist->file_pool); |
| 293 | -- else |
| 294 | -+ else if (flist->pool_boundary) |
| 295 | - pool_free_old(flist->file_pool, flist->pool_boundary); |
| 296 | - |
| 297 | - if (flist->sorted && flist->sorted != flist->files) |
| 298 | --- old/ifuncs.h |
| 299 | +++ new/ifuncs.h |
| 300 | @@ -64,6 +64,12 @@ isDigit(const char *ptr) |
| 301 | --- old/support/rsyncsums |
| 302 | +++ new/support/rsyncsums |
| 303 | @@ -0,0 +1,183 @@ |
| 304 | +#!/usr/bin/perl -w |
| 305 | +use strict; |
| 306 | + |
| 307 | +use Getopt::Long; |
| 308 | +use Cwd qw(abs_path cwd); |
| 309 | +use Digest::MD4; |
| 310 | +use Digest::MD5; |
| 311 | + |
| 312 | +our $SUMS_FILE = '.rsyncsums'; |
| 313 | + |
| 314 | +&Getopt::Long::Configure('bundling'); |
| 315 | +&usage if !&GetOptions( |
| 316 | + 'simple-cmp|s' => \( my $ignore_ctime_and_inode ), |
| 317 | + 'recurse|r' => \( my $recurse_opt ), |
| 318 | + 'verbose|v+' => \( my $verbosity = 0 ), |
| 319 | + 'help|h' => \( my $help_opt ), |
| 320 | +); |
| 321 | +&usage if $help_opt; |
| 322 | + |
| 323 | +my $start_dir = cwd(); |
| 324 | + |
| 325 | +my @dirs = @ARGV; |
| 326 | +@dirs = '.' unless @dirs; |
| 327 | +foreach (@dirs) { |
| 328 | + $_ = abs_path($_); |
| 329 | +} |
| 330 | + |
| 331 | +$| = 1; |
| 332 | + |
| 333 | +my $md4 = Digest::MD4->new; |
| 334 | +my $md5 = Digest::MD5->new; |
| 335 | + |
| 336 | +while (@dirs) { |
| 337 | + my $dir = shift @dirs; |
| 338 | + |
| 339 | + if (!chdir($dir)) { |
| 340 | + warn "Unable to chdir to $dir: $!\n"; |
| 341 | + next; |
| 342 | + } |
| 343 | + if (!opendir(DP, '.')) { |
| 344 | + warn "Unable to opendir $dir: $!\n"; |
| 345 | + next; |
| 346 | + } |
| 347 | + |
| 348 | + if ($verbosity) { |
| 349 | + my $reldir = $dir; |
| 350 | + $reldir =~ s#^$start_dir(/|$)# $1 ? '' : '.' #eo; |
| 351 | + print "$reldir ... "; |
| 352 | + } |
| 353 | + |
| 354 | + my $sums_file_exists = -e $SUMS_FILE; |
| 355 | + my %cache; |
| 356 | + my @subdirs; |
| 357 | + my $cnt = 0; |
| 358 | + while (defined(my $fn = readdir(DP))) { |
| 359 | + next if $fn =~ /^\.\.?$/ || $fn =~ /^\Q$SUMS_FILE\E$/o || -l $fn; |
| 360 | + if (-d _) { |
| 361 | + push(@subdirs, "$dir/$fn") unless $fn =~ /^(CVS|\.svn|\.git|\.bzr)$/; |
| 362 | + next; |
| 363 | + } |
| 364 | + next unless -f _; |
| 365 | + |
| 366 | + my($size,$mtime,$ctime,$inode) = (stat(_))[7,9,10,1]; |
| 367 | + next if $size == 0; |
| 368 | + |
| 369 | + $cache{$fn} = [ $size, $mtime, $ctime & 0xFFFFFFFF, $inode & 0xFFFFFFFF ]; |
| 370 | + $cnt++; |
| 371 | + } |
| 372 | + |
| 373 | + closedir DP; |
| 374 | + |
| 375 | + unshift(@dirs, sort @subdirs) if $recurse_opt; |
| 376 | + |
| 377 | + if (!$cnt) { |
| 378 | + if ($sums_file_exists) { |
| 379 | + print "(removed $SUMS_FILE) " if $verbosity; |
| 380 | + unlink($SUMS_FILE); |
| 381 | + } |
| 382 | + print "empty\n" if $verbosity; |
| 383 | + next; |
| 384 | + } |
| 385 | + |
| 386 | + if (open(FP, '+<', $SUMS_FILE)) { |
| 387 | + while (<FP>) { |
| 388 | + chomp; |
| 389 | + my($sum4, $sum5, $size, $mtime, $ctime, $inode, $fn) = split(' ', $_, 7); |
| 390 | + my $ref = $cache{$fn}; |
| 391 | + if (defined $ref) { |
| 392 | + if ($ignore_ctime_and_inode) { |
| 393 | + $ctime = $$ref[2]; |
| 394 | + $inode = $$ref[3]; |
| 395 | + } |
| 396 | + if ($$ref[0] == $size |
| 397 | + && $$ref[1] == $mtime |
| 398 | + && $$ref[2] == $ctime |
| 399 | + && $$ref[3] == $inode |
| 400 | + && $sum4 !~ /=/ && $sum5 !~ /=/) { |
| 401 | + $$ref[4] = $sum4; |
| 402 | + $$ref[5] = $sum5; |
| 403 | + $cnt--; |
| 404 | + } else { |
| 405 | + $$ref[4] = $$ref[5] = undef; |
| 406 | + } |
| 407 | + } else { |
| 408 | + $cnt = -1; # Force rewrite due to removed line. |
| 409 | + } |
| 410 | + } |
| 411 | + } else { |
| 412 | + open(FP, '>', $SUMS_FILE) or die "Unable to write $dir/$SUMS_FILE: $!\n"; |
| 413 | + $cnt = -1; |
| 414 | + } |
| 415 | + |
| 416 | + if ($cnt) { |
| 417 | + print "UPDATING\n" if $verbosity; |
| 418 | + while (my($fn, $ref) = each %cache) { |
| 419 | + next if defined $$ref[3] && defined $$ref[4]; |
| 420 | + if (!open(IN, $fn)) { |
| 421 | + print STDERR "Unable to read $fn: $!\n"; |
| 422 | + delete $cache{$fn}; |
| 423 | + next; |
| 424 | + } |
| 425 | + |
| 426 | + my($size,$mtime,$ctime,$inode) = (stat(IN))[7,9,10,1]; |
| 427 | + if ($size == 0) { |
| 428 | + close IN; |
| 429 | + next; |
| 430 | + } |
| 431 | + |
| 432 | + my($sum4, $sum5); |
| 433 | + while (1) { |
| 434 | + while (sysread(IN, $_, 64*1024)) { |
| 435 | + $md4->add($_); |
| 436 | + $md5->add($_); |
| 437 | + } |
| 438 | + $sum4 = $md4->hexdigest; |
| 439 | + $sum5 = $md5->hexdigest; |
| 440 | + print " $sum4 $sum5" if $verbosity > 2; |
| 441 | + print " $fn\n" if $verbosity > 1; |
| 442 | + my($size2,$mtime2,$ctime2,$inode2) = (stat(IN))[7,9,10,1]; |
| 443 | + if ($ignore_ctime_and_inode) { |
| 444 | + $ctime = $ctime2; |
| 445 | + $inode = $inode2; |
| 446 | + } |
| 447 | + last if $size == $size2 && $mtime == $mtime2 |
| 448 | + && $ctime == $ctime2 && $inode == $inode2; |
| 449 | + $size = $size2; |
| 450 | + $mtime = $mtime2; |
| 451 | + $ctime = $ctime2; |
| 452 | + $inode = $inode2; |
| 453 | + sysseek(IN, 0, 0); |
| 454 | + } |
| 455 | + |
| 456 | + close IN; |
| 457 | + |
| 458 | + $cache{$fn} = [ $size, $mtime, $ctime, $inode, $sum4, $sum5 ]; |
| 459 | + } |
| 460 | + |
| 461 | + seek(FP, 0, 0); |
| 462 | + foreach my $fn (sort keys %cache) { |
| 463 | + my $ref = $cache{$fn}; |
| 464 | + my($size, $mtime, $ctime, $inode, $sum4, $sum5) = @$ref; |
| 465 | + printf FP '%s %s %10d %10d %10d %10d %s' . "\n", $sum4, $sum5, $size, $mtime, $ctime, $inode, $fn; |
| 466 | + } |
| 467 | + truncate(FP, tell(FP)); |
| 468 | + } else { |
| 469 | + print "ok\n" if $verbosity; |
| 470 | + } |
| 471 | + |
| 472 | + close FP; |
| 473 | +} |
| 474 | + |
| 475 | +sub usage |
| 476 | +{ |
| 477 | + die <<EOT; |
| 478 | +Usage: rsyncsums [OPTIONS] [DIRS] |
| 479 | + |
| 480 | +Options: |
| 481 | + -r, --recurse Update $SUMS_FILE files in subdirectories too. |
| 482 | + -s, --simple-cmp Ignore ctime and inode values when comparing identicality. |
| 483 | + -v, --verbose Mention what we're doing. Repeat for more info. |
| 484 | + -h, --help Display this help message. |
| 485 | +EOT |
| 486 | +} |