Commit | Line | Data |
---|---|---|
213d4328 WD |
1 | Optimize the ability of a mirror to send checksums. |
2 | ||
3 | This adds a sender optimization feature that allows a cache of checksums | |
4 | to be used when the client specifies the --checksum option. The checksum | |
5 | files (.rsyncsums) must be created by some other process (see the perl | |
6 | script in the support dir for one way). | |
7 | ||
8 | This option should be used by mirrors that contain files that get created and | |
9 | not changed. There is a minimal amount of sanity-check information in the | |
10 | .rsyncsums file (size and mtime) so that the sum files can be shared with your | |
11 | mirror network. | |
12 | ||
13 | To use this patch, run these commands for a successful build: | |
14 | ||
15 | patch -p1 <patches/checksum4mirrors.diff | |
16 | ./configure (optional if already run) | |
17 | make | |
18 | ||
19 | --- old/flist.c | |
20 | +++ new/flist.c | |
21 | @@ -117,6 +117,7 @@ static char empty_sum[MAX_DIGEST_LEN]; | |
22 | static int flist_count_offset; /* for --delete --progress */ | |
23 | static int dir_count = 0; | |
24 | static int high_hlink_ndx; | |
25 | +static struct file_list *checksum_flist = NULL; | |
26 | ||
27 | static void clean_flist(struct file_list *flist, int strip_root); | |
28 | static void output_flist(struct file_list *flist); | |
29 | @@ -304,6 +305,186 @@ static void flist_done_allocating(struct | |
30 | flist->pool_boundary = ptr; | |
31 | } | |
32 | ||
33 | +/* The len count is the length of the basename + 1 for the null. */ | |
34 | +static int add_checksum(const char *dirname, const char *basename, int len, | |
35 | + OFF_T file_length, time_t mtime, const char *sum) | |
36 | +{ | |
37 | + struct file_struct *file; | |
38 | + int alloc_len, extra_len; | |
39 | + char *bp; | |
40 | + | |
41 | + if (len == 10+1 && *basename == '.' && strcmp(basename, ".rsyncsums") == 0) | |
42 | + return 0; | |
43 | + if (file_length == 0) | |
44 | + return 0; | |
45 | + | |
46 | + extra_len = (file_extra_cnt + (file_length > 0xFFFFFFFFu) + SUM_EXTRA_CNT) | |
47 | + * EXTRA_LEN; | |
48 | +#if EXTRA_ROUNDING > 0 | |
49 | + if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN)) | |
50 | + extra_len = (extra_len | (EXTRA_ROUNDING * EXTRA_LEN)) + EXTRA_LEN; | |
51 | +#endif | |
52 | + alloc_len = FILE_STRUCT_LEN + extra_len + len; | |
53 | + bp = pool_alloc(checksum_flist->file_pool, alloc_len, "add_checksum"); | |
54 | + | |
55 | + memset(bp, 0, extra_len + FILE_STRUCT_LEN); | |
56 | + bp += extra_len; | |
57 | + file = (struct file_struct *)bp; | |
58 | + bp += FILE_STRUCT_LEN; | |
59 | + | |
60 | + memcpy(bp, basename, len); | |
61 | + | |
62 | + file->mode = S_IFREG; | |
63 | + file->modtime = mtime; | |
64 | + file->len32 = (uint32)file_length; | |
65 | + if (file_length > 0xFFFFFFFFu) { | |
66 | + file->flags |= FLAG_LENGTH64; | |
67 | + OPT_EXTRA(file, 0)->unum = (uint32)(file_length >> 32); | |
68 | + } | |
69 | + file->dirname = dirname; | |
70 | + bp = F_SUM(file); | |
71 | + memcpy(bp, sum, checksum_len); | |
72 | + | |
73 | + flist_expand(checksum_flist, 1); | |
74 | + checksum_flist->files[checksum_flist->used++] = file; | |
75 | + | |
76 | + checksum_flist->sorted = checksum_flist->files; | |
77 | + | |
78 | + return 1; | |
79 | +} | |
80 | + | |
81 | +/* The direname value must remain unchanged during the lifespan of the | |
82 | + * created checksum_flist object because we use it directly. */ | |
83 | +static void read_checksums(const char *dirname) | |
84 | +{ | |
85 | + char line[MAXPATHLEN+1024], fbuf[MAXPATHLEN], sum[MAX_DIGEST_LEN]; | |
86 | + OFF_T file_length; | |
87 | + time_t mtime; | |
88 | + int len, dlen, i; | |
89 | + char *cp; | |
90 | + FILE *fp; | |
91 | + | |
92 | + if (checksum_flist) { | |
93 | + /* Reset the pool memory and empty the file-list array. */ | |
94 | + pool_free_old(checksum_flist->file_pool, | |
95 | + pool_boundary(checksum_flist->file_pool, 0)); | |
96 | + checksum_flist->used = 0; | |
97 | + } else | |
98 | + checksum_flist = flist_new(FLIST_TEMP, "read_checksums"); | |
99 | + | |
100 | + checksum_flist->low = 0; | |
101 | + checksum_flist->high = -1; | |
102 | + | |
103 | + if (!dirname) | |
104 | + return; | |
105 | + | |
106 | + dlen = strlcpy(fbuf, dirname, sizeof fbuf); | |
107 | + if (dlen >= (int)sizeof fbuf) | |
108 | + return; | |
109 | + if (dlen) | |
110 | + fbuf[dlen++] = '/'; | |
111 | + else | |
112 | + dirname = NULL; | |
113 | + strlcpy(fbuf+dlen, ".rsyncsums", sizeof fbuf - dlen); | |
114 | + if (!(fp = fopen(fbuf, "r"))) | |
115 | + return; | |
116 | + | |
117 | + while (fgets(line, sizeof line, fp)) { | |
118 | + cp = line; | |
119 | + if (protocol_version >= 30) { | |
120 | + char *alt_sum = cp; | |
121 | + if (*cp == '=') | |
122 | + while (*++cp == '=') {} | |
123 | + else | |
124 | + while (isXDigit(cp)) cp++; | |
125 | + if (cp - alt_sum != MD4_DIGEST_LEN*2 || *cp != ' ') | |
126 | + break; | |
127 | + while (*++cp == ' ') {} | |
128 | + } | |
129 | + | |
130 | + if (*cp == '=') { | |
131 | + continue; | |
132 | + } else { | |
133 | + for (i = 0; i < checksum_len*2; i++, cp++) { | |
134 | + int x; | |
135 | + if (isXDigit(cp)) { | |
136 | + if (isDigit(cp)) | |
137 | + x = *cp - '0'; | |
138 | + else | |
139 | + x = (*cp & 0xF) + 9; | |
140 | + } else { | |
141 | + cp = ""; | |
142 | + break; | |
143 | + } | |
144 | + if (i & 1) | |
145 | + sum[i/2] |= x; | |
146 | + else | |
147 | + sum[i/2] = x << 4; | |
148 | + } | |
149 | + } | |
150 | + if (*cp != ' ') | |
151 | + break; | |
152 | + while (*++cp == ' ') {} | |
153 | + | |
154 | + if (protocol_version < 30) { | |
155 | + char *alt_sum = cp; | |
156 | + if (*cp == '=') | |
157 | + while (*++cp == '=') {} | |
158 | + else | |
159 | + while (isXDigit(cp)) cp++; | |
160 | + if (cp - alt_sum != MD5_DIGEST_LEN*2 || *cp != ' ') | |
161 | + break; | |
162 | + while (*++cp == ' ') {} | |
163 | + } | |
164 | + | |
165 | + file_length = 0; | |
166 | + while (isDigit(cp)) | |
167 | + file_length = file_length * 10 + *cp++ - '0'; | |
168 | + if (*cp != ' ') | |
169 | + break; | |
170 | + while (*++cp == ' ') {} | |
171 | + | |
172 | + mtime = 0; | |
173 | + while (isDigit(cp)) | |
174 | + mtime = mtime * 10 + *cp++ - '0'; | |
175 | + if (*cp != ' ') | |
176 | + break; | |
177 | + while (*++cp == ' ') {} | |
178 | + | |
179 | + /* Ignore ctime. */ | |
180 | + while (isDigit(cp)) | |
181 | + cp++; | |
182 | + if (*cp != ' ') | |
183 | + break; | |
184 | + while (*++cp == ' ') {} | |
185 | + | |
186 | + /* Ignore inode. */ | |
187 | + while (isDigit(cp)) | |
188 | + cp++; | |
189 | + if (*cp != ' ') | |
190 | + break; | |
191 | + while (*++cp == ' ') {} | |
192 | + | |
193 | + len = strlen(cp); | |
194 | + while (len && (cp[len-1] == '\n' || cp[len-1] == '\r')) | |
195 | + len--; | |
196 | + if (!len) | |
197 | + break; | |
198 | + cp[len++] = '\0'; /* len now counts the null */ | |
199 | + if (strchr(cp, '/')) | |
200 | + break; | |
201 | + if (len > MAXPATHLEN) | |
202 | + continue; | |
203 | + | |
204 | + strlcpy(fbuf+dlen, cp, sizeof fbuf - dlen); | |
205 | + | |
206 | + add_checksum(dirname, cp, len, file_length, mtime, sum); | |
207 | + } | |
208 | + fclose(fp); | |
209 | + | |
210 | + clean_flist(checksum_flist, 0); | |
211 | +} | |
212 | + | |
213 | int push_pathname(const char *dir, int len) | |
214 | { | |
215 | if (dir == pathname) | |
216 | @@ -989,7 +1170,7 @@ struct file_struct *make_file(const char | |
217 | STRUCT_STAT *stp, int flags, int filter_level) | |
218 | { | |
219 | static char *lastdir; | |
220 | - static int lastdir_len = -1; | |
221 | + static int lastdir_len = -2; | |
222 | struct file_struct *file; | |
223 | char thisname[MAXPATHLEN]; | |
224 | char linkname[MAXPATHLEN]; | |
225 | @@ -1119,9 +1300,16 @@ struct file_struct *make_file(const char | |
226 | memcpy(lastdir, thisname, len); | |
227 | lastdir[len] = '\0'; | |
228 | lastdir_len = len; | |
229 | + if (always_checksum && am_sender && flist) | |
230 | + read_checksums(lastdir); | |
231 | } | |
232 | - } else | |
233 | + } else { | |
234 | basename = thisname; | |
235 | + if (always_checksum && am_sender && flist && lastdir_len == -2) { | |
236 | + lastdir_len = -1; | |
237 | + read_checksums(""); | |
238 | + } | |
239 | + } | |
240 | basename_len = strlen(basename) + 1; /* count the '\0' */ | |
241 | ||
242 | #ifdef SUPPORT_LINKS | |
243 | @@ -1197,11 +1385,21 @@ struct file_struct *make_file(const char | |
244 | } | |
245 | #endif | |
246 | ||
247 | - if (always_checksum && am_sender && S_ISREG(st.st_mode)) | |
248 | - file_checksum(thisname, tmp_sum, st.st_size); | |
249 | - | |
250 | F_PATHNAME(file) = pathname; | |
251 | ||
252 | + if (always_checksum && am_sender && S_ISREG(st.st_mode)) { | |
253 | + int j; | |
254 | + if (flist && (j = flist_find(checksum_flist, file)) >= 0) { | |
255 | + struct file_struct *fp = checksum_flist->sorted[j]; | |
256 | + if (F_LENGTH(fp) == st.st_size | |
257 | + && fp->modtime == st.st_mtime) | |
258 | + memcpy(tmp_sum, F_SUM(fp), MAX_DIGEST_LEN); | |
259 | + else | |
260 | + file_checksum(thisname, tmp_sum, st.st_size); | |
261 | + } else | |
262 | + file_checksum(thisname, tmp_sum, st.st_size); | |
263 | + } | |
264 | + | |
265 | /* This code is only used by the receiver when it is building | |
266 | * a list of files for a delete pass. */ | |
267 | if (keep_dirlinks && linkname_len && flist) { | |
6e9495c7 WD |
268 | @@ -2051,7 +2249,11 @@ struct file_list *send_file_list(int f, |
269 | * file-list to check if this is a 1-file xfer. */ | |
270 | send_extra_file_list(f, 1); | |
271 | } | |
272 | - } | |
273 | + } else | |
274 | + flist_eof = 1; | |
275 | + | |
276 | + if (checksum_updating && always_checksum && flist_eof) | |
277 | + read_checksums(NULL); | |
278 | ||
279 | return flist; | |
280 | } | |
213d4328 WD |
281 | --- old/ifuncs.h |
282 | +++ new/ifuncs.h | |
283 | @@ -64,6 +64,12 @@ isDigit(const char *ptr) | |
284 | } | |
285 | ||
286 | static inline int | |
287 | +isXDigit(const char *ptr) | |
288 | +{ | |
289 | + return isxdigit(*(unsigned char *)ptr); | |
290 | +} | |
291 | + | |
292 | +static inline int | |
293 | isPrint(const char *ptr) | |
294 | { | |
295 | return isprint(*(unsigned char *)ptr); | |
213d4328 WD |
296 | --- old/support/rsyncsums |
297 | +++ new/support/rsyncsums | |
d1a75c9f | 298 | @@ -0,0 +1,203 @@ |
213d4328 WD |
299 | +#!/usr/bin/perl -w |
300 | +use strict; | |
301 | + | |
302 | +use Getopt::Long; | |
303 | +use Cwd qw(abs_path cwd); | |
304 | +use Digest::MD4; | |
305 | +use Digest::MD5; | |
306 | + | |
307 | +our $SUMS_FILE = '.rsyncsums'; | |
308 | + | |
309 | +&Getopt::Long::Configure('bundling'); | |
310 | +&usage if !&GetOptions( | |
213d4328 | 311 | + 'recurse|r' => \( my $recurse_opt ), |
d1a75c9f WD |
312 | + 'simple-cmp|s' => \( my $ignore_ctime_and_inode ), |
313 | + 'check|c' => \( my $check_opt ), | |
213d4328 WD |
314 | + 'verbose|v+' => \( my $verbosity = 0 ), |
315 | + 'help|h' => \( my $help_opt ), | |
316 | +); | |
317 | +&usage if $help_opt; | |
318 | + | |
319 | +my $start_dir = cwd(); | |
320 | + | |
321 | +my @dirs = @ARGV; | |
322 | +@dirs = '.' unless @dirs; | |
323 | +foreach (@dirs) { | |
324 | + $_ = abs_path($_); | |
325 | +} | |
326 | + | |
327 | +$| = 1; | |
328 | + | |
d1a75c9f WD |
329 | +my $exit_code = 0; |
330 | + | |
213d4328 WD |
331 | +my $md4 = Digest::MD4->new; |
332 | +my $md5 = Digest::MD5->new; | |
333 | + | |
334 | +while (@dirs) { | |
335 | + my $dir = shift @dirs; | |
336 | + | |
337 | + if (!chdir($dir)) { | |
338 | + warn "Unable to chdir to $dir: $!\n"; | |
339 | + next; | |
340 | + } | |
341 | + if (!opendir(DP, '.')) { | |
342 | + warn "Unable to opendir $dir: $!\n"; | |
343 | + next; | |
344 | + } | |
345 | + | |
d1a75c9f WD |
346 | + my $reldir = $dir; |
347 | + $reldir =~ s#^$start_dir(/|$)# $1 ? '' : '.' #eo; | |
213d4328 | 348 | + if ($verbosity) { |
213d4328 | 349 | + print "$reldir ... "; |
d1a75c9f | 350 | + print "\n" if $check_opt; |
213d4328 WD |
351 | + } |
352 | + | |
213d4328 | 353 | + my %cache; |
d1a75c9f WD |
354 | + my $f_cnt = 0; |
355 | + if (open(FP, '<', $SUMS_FILE)) { | |
356 | + while (<FP>) { | |
357 | + chomp; | |
358 | + my($sum4, $sum5, $size, $mtime, $ctime, $inode, $fn) = split(' ', $_, 7); | |
359 | + $cache{$fn} = [ 0, $sum4, $sum5, $size, $mtime, $ctime & 0xFFFFFFFF, $inode & 0xFFFFFFFF ]; | |
360 | + $f_cnt++; | |
361 | + } | |
362 | + close FP; | |
363 | + } | |
364 | + | |
213d4328 | 365 | + my @subdirs; |
d1a75c9f WD |
366 | + my $d_cnt = 0; |
367 | + my $update_cnt = 0; | |
213d4328 WD |
368 | + while (defined(my $fn = readdir(DP))) { |
369 | + next if $fn =~ /^\.\.?$/ || $fn =~ /^\Q$SUMS_FILE\E$/o || -l $fn; | |
370 | + if (-d _) { | |
371 | + push(@subdirs, "$dir/$fn") unless $fn =~ /^(CVS|\.svn|\.git|\.bzr)$/; | |
372 | + next; | |
373 | + } | |
374 | + next unless -f _; | |
375 | + | |
376 | + my($size,$mtime,$ctime,$inode) = (stat(_))[7,9,10,1]; | |
d1a75c9f WD |
377 | + my $ref = $cache{$fn}; |
378 | + if ($size == 0) { | |
379 | + if (defined $ref) { | |
380 | + delete $cache{$fn}; | |
381 | + $f_cnt--; | |
382 | + if (!$check_opt && !$update_cnt++) { | |
383 | + print "UPDATING\n" if $verbosity; | |
384 | + } | |
385 | + } | |
386 | + next; | |
213d4328 | 387 | + } |
d1a75c9f | 388 | + $d_cnt++; |
213d4328 | 389 | + |
d1a75c9f | 390 | + if (!$check_opt) { |
213d4328 | 391 | + if (defined $ref) { |
d1a75c9f WD |
392 | + $$ref[0] = 1; |
393 | + if ($$ref[3] == $size | |
394 | + && $$ref[4] == $mtime | |
395 | + && ($ignore_ctime_and_inode || ($$ref[5] == $ctime && $$ref[6] == $inode)) | |
396 | + && $$ref[1] !~ /=/ && $$ref[2] !~ /=/) { | |
397 | + next; | |
213d4328 | 398 | + } |
d1a75c9f WD |
399 | + } |
400 | + if (!$update_cnt++) { | |
401 | + print "UPDATING\n" if $verbosity; | |
213d4328 WD |
402 | + } |
403 | + } | |
213d4328 | 404 | + |
d1a75c9f WD |
405 | + if (!open(IN, $fn)) { |
406 | + print STDERR "Unable to read $fn: $!\n"; | |
407 | + if (defined $ref) { | |
213d4328 | 408 | + delete $cache{$fn}; |
d1a75c9f | 409 | + $f_cnt--; |
213d4328 | 410 | + } |
d1a75c9f WD |
411 | + next; |
412 | + } | |
213d4328 | 413 | + |
d1a75c9f WD |
414 | + my($sum4, $sum5); |
415 | + while (1) { | |
416 | + while (sysread(IN, $_, 64*1024)) { | |
417 | + $md4->add($_); | |
418 | + $md5->add($_); | |
213d4328 | 419 | + } |
d1a75c9f WD |
420 | + $sum4 = $md4->hexdigest; |
421 | + $sum5 = $md5->hexdigest; | |
422 | + print " $sum4 $sum5" if $verbosity > 2; | |
423 | + print " $fn" if $verbosity > 1; | |
424 | + my($size2,$mtime2,$ctime2,$inode2) = (stat(IN))[7,9,10,1]; | |
425 | + last if $size == $size2 && $mtime == $mtime2 | |
426 | + && ($ignore_ctime_and_inode || ($ctime == $ctime2 && $inode == $inode2)); | |
427 | + $size = $size2; | |
428 | + $mtime = $mtime2; | |
429 | + $ctime = $ctime2; | |
430 | + $inode = $inode2; | |
431 | + sysseek(IN, 0, 0); | |
432 | + print " REREADING\n" if $verbosity > 1; | |
433 | + } | |
213d4328 | 434 | + |
d1a75c9f WD |
435 | + close IN; |
436 | + | |
437 | + if ($check_opt) { | |
438 | + my $dif; | |
439 | + if (!defined $ref) { | |
440 | + $dif = 'MISSING'; | |
441 | + } elsif ($sum4 ne $$ref[1] || $sum5 ne $$ref[2]) { | |
442 | + $dif = 'FAILED'; | |
443 | + } else { | |
444 | + print " OK\n" if $verbosity > 1; | |
445 | + next; | |
446 | + } | |
447 | + if ($verbosity < 2) { | |
448 | + print $verbosity ? ' ' : "$reldir/"; | |
449 | + print $fn; | |
213d4328 | 450 | + } |
d1a75c9f WD |
451 | + print " $dif\n"; |
452 | + $exit_code = 1; | |
453 | + } else { | |
454 | + print "\n" if $verbosity > 1; | |
455 | + $cache{$fn} = [ 1, $sum4, $sum5, $size, $mtime, $ctime & 0xFFFFFFFF, $inode & 0xFFFFFFFF ]; | |
456 | + } | |
457 | + } | |
458 | + | |
459 | + closedir DP; | |
213d4328 | 460 | + |
d1a75c9f WD |
461 | + unshift(@dirs, sort @subdirs) if $recurse_opt; |
462 | + | |
463 | + if ($check_opt) { | |
464 | + ; | |
465 | + } elsif ($d_cnt == 0) { | |
466 | + if ($f_cnt) { | |
467 | + print "(removed $SUMS_FILE) " if $verbosity; | |
468 | + unlink($SUMS_FILE); | |
213d4328 | 469 | + } |
d1a75c9f WD |
470 | + print "empty\n" if $verbosity; |
471 | + } elsif ($update_cnt || $d_cnt != $f_cnt) { | |
472 | + print "UPDATING\n" if $verbosity && !$update_cnt; | |
473 | + open(FP, '>', $SUMS_FILE) or die "Unable to write $dir/$SUMS_FILE: $!\n"; | |
213d4328 | 474 | + |
213d4328 WD |
475 | + foreach my $fn (sort keys %cache) { |
476 | + my $ref = $cache{$fn}; | |
d1a75c9f WD |
477 | + my($found, $sum4, $sum5, $size, $mtime, $ctime, $inode) = @$ref; |
478 | + next unless $found; | |
213d4328 WD |
479 | + printf FP '%s %s %10d %10d %10d %10d %s' . "\n", $sum4, $sum5, $size, $mtime, $ctime, $inode, $fn; |
480 | + } | |
d1a75c9f | 481 | + close FP; |
213d4328 WD |
482 | + } else { |
483 | + print "ok\n" if $verbosity; | |
484 | + } | |
213d4328 WD |
485 | +} |
486 | + | |
d1a75c9f WD |
487 | +exit $exit_code; |
488 | + | |
213d4328 WD |
489 | +sub usage |
490 | +{ | |
491 | + die <<EOT; | |
492 | +Usage: rsyncsums [OPTIONS] [DIRS] | |
493 | + | |
494 | +Options: | |
495 | + -r, --recurse Update $SUMS_FILE files in subdirectories too. | |
496 | + -s, --simple-cmp Ignore ctime and inode values when comparing identicality. | |
d1a75c9f | 497 | + -c, --check Check if the checksums are right (doesn't update). |
213d4328 WD |
498 | + -v, --verbose Mention what we're doing. Repeat for more info. |
499 | + -h, --help Display this help message. | |
500 | +EOT | |
501 | +} |