Commit | Line | Data |
---|---|---|
213d4328 WD |
1 | Optimize the ability of a mirror to send checksums. |
2 | ||
3 | This adds a sender optimization feature that allows a cache of checksums | |
4 | to be used when the client specifies the --checksum option. The checksum | |
5 | files (.rsyncsums) must be created by some other process (see the perl | |
6 | script in the support dir for one way). | |
7 | ||
8 | This option should be used by mirrors that contain files that get created and | |
9 | not changed. There is a minimal amount of sanity-check information in the | |
10 | .rsyncsums file (size and mtime) so that the sum files can be shared with your | |
11 | mirror network. | |
12 | ||
13 | To use this patch, run these commands for a successful build: | |
14 | ||
cc3e685d | 15 | patch -p1 <patches/checksum-reading.diff |
213d4328 WD |
16 | ./configure (optional if already run) |
17 | make | |
18 | ||
cc3e685d WD |
19 | diff --git a/flist.c b/flist.c |
20 | --- a/flist.c | |
21 | +++ b/flist.c | |
f2863bc0 WD |
22 | @@ -121,6 +121,7 @@ static char tmp_sum[MAX_DIGEST_LEN]; |
23 | static char empty_sum[MAX_DIGEST_LEN]; | |
213d4328 WD |
24 | static int flist_count_offset; /* for --delete --progress */ |
25 | static int dir_count = 0; | |
213d4328 WD |
26 | +static struct file_list *checksum_flist = NULL; |
27 | ||
f2863bc0 | 28 | static void flist_sort_and_clean(struct file_list *flist, int strip_root); |
213d4328 | 29 | static void output_flist(struct file_list *flist); |
f2863bc0 | 30 | @@ -313,6 +314,186 @@ static void flist_done_allocating(struct file_list *flist) |
213d4328 WD |
31 | flist->pool_boundary = ptr; |
32 | } | |
33 | ||
34 | +/* The len count is the length of the basename + 1 for the null. */ | |
35 | +static int add_checksum(const char *dirname, const char *basename, int len, | |
36 | + OFF_T file_length, time_t mtime, const char *sum) | |
37 | +{ | |
38 | + struct file_struct *file; | |
39 | + int alloc_len, extra_len; | |
40 | + char *bp; | |
41 | + | |
42 | + if (len == 10+1 && *basename == '.' && strcmp(basename, ".rsyncsums") == 0) | |
43 | + return 0; | |
44 | + if (file_length == 0) | |
45 | + return 0; | |
46 | + | |
47 | + extra_len = (file_extra_cnt + (file_length > 0xFFFFFFFFu) + SUM_EXTRA_CNT) | |
48 | + * EXTRA_LEN; | |
49 | +#if EXTRA_ROUNDING > 0 | |
50 | + if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN)) | |
51 | + extra_len = (extra_len | (EXTRA_ROUNDING * EXTRA_LEN)) + EXTRA_LEN; | |
52 | +#endif | |
53 | + alloc_len = FILE_STRUCT_LEN + extra_len + len; | |
54 | + bp = pool_alloc(checksum_flist->file_pool, alloc_len, "add_checksum"); | |
55 | + | |
56 | + memset(bp, 0, extra_len + FILE_STRUCT_LEN); | |
57 | + bp += extra_len; | |
58 | + file = (struct file_struct *)bp; | |
59 | + bp += FILE_STRUCT_LEN; | |
60 | + | |
61 | + memcpy(bp, basename, len); | |
62 | + | |
63 | + file->mode = S_IFREG; | |
64 | + file->modtime = mtime; | |
65 | + file->len32 = (uint32)file_length; | |
66 | + if (file_length > 0xFFFFFFFFu) { | |
67 | + file->flags |= FLAG_LENGTH64; | |
68 | + OPT_EXTRA(file, 0)->unum = (uint32)(file_length >> 32); | |
69 | + } | |
70 | + file->dirname = dirname; | |
71 | + bp = F_SUM(file); | |
72 | + memcpy(bp, sum, checksum_len); | |
73 | + | |
74 | + flist_expand(checksum_flist, 1); | |
75 | + checksum_flist->files[checksum_flist->used++] = file; | |
76 | + | |
77 | + checksum_flist->sorted = checksum_flist->files; | |
78 | + | |
79 | + return 1; | |
80 | +} | |
81 | + | |
82 | +/* The direname value must remain unchanged during the lifespan of the | |
83 | + * created checksum_flist object because we use it directly. */ | |
84 | +static void read_checksums(const char *dirname) | |
85 | +{ | |
86 | + char line[MAXPATHLEN+1024], fbuf[MAXPATHLEN], sum[MAX_DIGEST_LEN]; | |
87 | + OFF_T file_length; | |
88 | + time_t mtime; | |
89 | + int len, dlen, i; | |
90 | + char *cp; | |
91 | + FILE *fp; | |
92 | + | |
93 | + if (checksum_flist) { | |
94 | + /* Reset the pool memory and empty the file-list array. */ | |
95 | + pool_free_old(checksum_flist->file_pool, | |
96 | + pool_boundary(checksum_flist->file_pool, 0)); | |
97 | + checksum_flist->used = 0; | |
98 | + } else | |
99 | + checksum_flist = flist_new(FLIST_TEMP, "read_checksums"); | |
100 | + | |
101 | + checksum_flist->low = 0; | |
102 | + checksum_flist->high = -1; | |
103 | + | |
104 | + if (!dirname) | |
105 | + return; | |
106 | + | |
107 | + dlen = strlcpy(fbuf, dirname, sizeof fbuf); | |
108 | + if (dlen >= (int)sizeof fbuf) | |
109 | + return; | |
110 | + if (dlen) | |
111 | + fbuf[dlen++] = '/'; | |
112 | + else | |
113 | + dirname = NULL; | |
114 | + strlcpy(fbuf+dlen, ".rsyncsums", sizeof fbuf - dlen); | |
115 | + if (!(fp = fopen(fbuf, "r"))) | |
116 | + return; | |
117 | + | |
118 | + while (fgets(line, sizeof line, fp)) { | |
119 | + cp = line; | |
120 | + if (protocol_version >= 30) { | |
121 | + char *alt_sum = cp; | |
122 | + if (*cp == '=') | |
123 | + while (*++cp == '=') {} | |
124 | + else | |
125 | + while (isXDigit(cp)) cp++; | |
126 | + if (cp - alt_sum != MD4_DIGEST_LEN*2 || *cp != ' ') | |
127 | + break; | |
128 | + while (*++cp == ' ') {} | |
129 | + } | |
130 | + | |
131 | + if (*cp == '=') { | |
132 | + continue; | |
133 | + } else { | |
134 | + for (i = 0; i < checksum_len*2; i++, cp++) { | |
135 | + int x; | |
136 | + if (isXDigit(cp)) { | |
137 | + if (isDigit(cp)) | |
138 | + x = *cp - '0'; | |
139 | + else | |
140 | + x = (*cp & 0xF) + 9; | |
141 | + } else { | |
142 | + cp = ""; | |
143 | + break; | |
144 | + } | |
145 | + if (i & 1) | |
146 | + sum[i/2] |= x; | |
147 | + else | |
148 | + sum[i/2] = x << 4; | |
149 | + } | |
150 | + } | |
151 | + if (*cp != ' ') | |
152 | + break; | |
153 | + while (*++cp == ' ') {} | |
154 | + | |
155 | + if (protocol_version < 30) { | |
156 | + char *alt_sum = cp; | |
157 | + if (*cp == '=') | |
158 | + while (*++cp == '=') {} | |
159 | + else | |
160 | + while (isXDigit(cp)) cp++; | |
161 | + if (cp - alt_sum != MD5_DIGEST_LEN*2 || *cp != ' ') | |
162 | + break; | |
163 | + while (*++cp == ' ') {} | |
164 | + } | |
165 | + | |
166 | + file_length = 0; | |
167 | + while (isDigit(cp)) | |
168 | + file_length = file_length * 10 + *cp++ - '0'; | |
169 | + if (*cp != ' ') | |
170 | + break; | |
171 | + while (*++cp == ' ') {} | |
172 | + | |
173 | + mtime = 0; | |
174 | + while (isDigit(cp)) | |
175 | + mtime = mtime * 10 + *cp++ - '0'; | |
176 | + if (*cp != ' ') | |
177 | + break; | |
178 | + while (*++cp == ' ') {} | |
179 | + | |
180 | + /* Ignore ctime. */ | |
181 | + while (isDigit(cp)) | |
182 | + cp++; | |
183 | + if (*cp != ' ') | |
184 | + break; | |
185 | + while (*++cp == ' ') {} | |
186 | + | |
187 | + /* Ignore inode. */ | |
188 | + while (isDigit(cp)) | |
189 | + cp++; | |
190 | + if (*cp != ' ') | |
191 | + break; | |
192 | + while (*++cp == ' ') {} | |
193 | + | |
194 | + len = strlen(cp); | |
195 | + while (len && (cp[len-1] == '\n' || cp[len-1] == '\r')) | |
196 | + len--; | |
197 | + if (!len) | |
198 | + break; | |
199 | + cp[len++] = '\0'; /* len now counts the null */ | |
200 | + if (strchr(cp, '/')) | |
201 | + break; | |
202 | + if (len > MAXPATHLEN) | |
203 | + continue; | |
204 | + | |
205 | + strlcpy(fbuf+dlen, cp, sizeof fbuf - dlen); | |
206 | + | |
207 | + add_checksum(dirname, cp, len, file_length, mtime, sum); | |
208 | + } | |
209 | + fclose(fp); | |
210 | + | |
211 | + clean_flist(checksum_flist, 0); | |
212 | +} | |
213 | + | |
214 | int push_pathname(const char *dir, int len) | |
215 | { | |
216 | if (dir == pathname) | |
f2863bc0 | 217 | @@ -1003,7 +1184,7 @@ struct file_struct *make_file(const char *fname, struct file_list *flist, |
213d4328 WD |
218 | STRUCT_STAT *stp, int flags, int filter_level) |
219 | { | |
220 | static char *lastdir; | |
221 | - static int lastdir_len = -1; | |
222 | + static int lastdir_len = -2; | |
223 | struct file_struct *file; | |
224 | char thisname[MAXPATHLEN]; | |
225 | char linkname[MAXPATHLEN]; | |
a5e6228a | 226 | @@ -1136,9 +1317,16 @@ struct file_struct *make_file(const char *fname, struct file_list *flist, |
213d4328 WD |
227 | memcpy(lastdir, thisname, len); |
228 | lastdir[len] = '\0'; | |
229 | lastdir_len = len; | |
230 | + if (always_checksum && am_sender && flist) | |
231 | + read_checksums(lastdir); | |
232 | } | |
233 | - } else | |
234 | + } else { | |
235 | basename = thisname; | |
236 | + if (always_checksum && am_sender && flist && lastdir_len == -2) { | |
237 | + lastdir_len = -1; | |
238 | + read_checksums(""); | |
239 | + } | |
240 | + } | |
241 | basename_len = strlen(basename) + 1; /* count the '\0' */ | |
242 | ||
243 | #ifdef SUPPORT_LINKS | |
a5e6228a | 244 | @@ -1214,11 +1402,21 @@ struct file_struct *make_file(const char *fname, struct file_list *flist, |
213d4328 WD |
245 | } |
246 | #endif | |
247 | ||
248 | - if (always_checksum && am_sender && S_ISREG(st.st_mode)) | |
249 | - file_checksum(thisname, tmp_sum, st.st_size); | |
250 | - | |
251 | F_PATHNAME(file) = pathname; | |
252 | ||
253 | + if (always_checksum && am_sender && S_ISREG(st.st_mode)) { | |
254 | + int j; | |
255 | + if (flist && (j = flist_find(checksum_flist, file)) >= 0) { | |
256 | + struct file_struct *fp = checksum_flist->sorted[j]; | |
257 | + if (F_LENGTH(fp) == st.st_size | |
258 | + && fp->modtime == st.st_mtime) | |
259 | + memcpy(tmp_sum, F_SUM(fp), MAX_DIGEST_LEN); | |
260 | + else | |
261 | + file_checksum(thisname, tmp_sum, st.st_size); | |
262 | + } else | |
263 | + file_checksum(thisname, tmp_sum, st.st_size); | |
264 | + } | |
265 | + | |
266 | /* This code is only used by the receiver when it is building | |
267 | * a list of files for a delete pass. */ | |
268 | if (keep_dirlinks && linkname_len && flist) { | |
a5e6228a | 269 | @@ -2074,7 +2272,11 @@ struct file_list *send_file_list(int f, int argc, char *argv[]) |
6e9495c7 WD |
270 | * file-list to check if this is a 1-file xfer. */ |
271 | send_extra_file_list(f, 1); | |
272 | } | |
273 | - } | |
274 | + } else | |
275 | + flist_eof = 1; | |
276 | + | |
277 | + if (checksum_updating && always_checksum && flist_eof) | |
278 | + read_checksums(NULL); | |
279 | ||
280 | return flist; | |
281 | } | |
cc3e685d WD |
282 | diff --git a/ifuncs.h b/ifuncs.h |
283 | --- a/ifuncs.h | |
284 | +++ b/ifuncs.h | |
213d4328 WD |
285 | @@ -64,6 +64,12 @@ isDigit(const char *ptr) |
286 | } | |
287 | ||
288 | static inline int | |
289 | +isXDigit(const char *ptr) | |
290 | +{ | |
291 | + return isxdigit(*(unsigned char *)ptr); | |
292 | +} | |
293 | + | |
294 | +static inline int | |
295 | isPrint(const char *ptr) | |
296 | { | |
297 | return isprint(*(unsigned char *)ptr); | |
cc3e685d WD |
298 | diff --git a/support/rsyncsums b/support/rsyncsums |
299 | new file mode 100644 | |
300 | --- /dev/null | |
301 | +++ b/support/rsyncsums | |
d1a75c9f | 302 | @@ -0,0 +1,203 @@ |
213d4328 WD |
303 | +#!/usr/bin/perl -w |
304 | +use strict; | |
305 | + | |
306 | +use Getopt::Long; | |
307 | +use Cwd qw(abs_path cwd); | |
308 | +use Digest::MD4; | |
309 | +use Digest::MD5; | |
310 | + | |
311 | +our $SUMS_FILE = '.rsyncsums'; | |
312 | + | |
313 | +&Getopt::Long::Configure('bundling'); | |
314 | +&usage if !&GetOptions( | |
213d4328 | 315 | + 'recurse|r' => \( my $recurse_opt ), |
d1a75c9f WD |
316 | + 'simple-cmp|s' => \( my $ignore_ctime_and_inode ), |
317 | + 'check|c' => \( my $check_opt ), | |
213d4328 WD |
318 | + 'verbose|v+' => \( my $verbosity = 0 ), |
319 | + 'help|h' => \( my $help_opt ), | |
320 | +); | |
321 | +&usage if $help_opt; | |
322 | + | |
323 | +my $start_dir = cwd(); | |
324 | + | |
325 | +my @dirs = @ARGV; | |
326 | +@dirs = '.' unless @dirs; | |
327 | +foreach (@dirs) { | |
328 | + $_ = abs_path($_); | |
329 | +} | |
330 | + | |
331 | +$| = 1; | |
332 | + | |
d1a75c9f WD |
333 | +my $exit_code = 0; |
334 | + | |
213d4328 WD |
335 | +my $md4 = Digest::MD4->new; |
336 | +my $md5 = Digest::MD5->new; | |
337 | + | |
338 | +while (@dirs) { | |
339 | + my $dir = shift @dirs; | |
340 | + | |
341 | + if (!chdir($dir)) { | |
342 | + warn "Unable to chdir to $dir: $!\n"; | |
343 | + next; | |
344 | + } | |
345 | + if (!opendir(DP, '.')) { | |
346 | + warn "Unable to opendir $dir: $!\n"; | |
347 | + next; | |
348 | + } | |
349 | + | |
d1a75c9f WD |
350 | + my $reldir = $dir; |
351 | + $reldir =~ s#^$start_dir(/|$)# $1 ? '' : '.' #eo; | |
213d4328 | 352 | + if ($verbosity) { |
213d4328 | 353 | + print "$reldir ... "; |
d1a75c9f | 354 | + print "\n" if $check_opt; |
213d4328 WD |
355 | + } |
356 | + | |
213d4328 | 357 | + my %cache; |
d1a75c9f WD |
358 | + my $f_cnt = 0; |
359 | + if (open(FP, '<', $SUMS_FILE)) { | |
360 | + while (<FP>) { | |
361 | + chomp; | |
362 | + my($sum4, $sum5, $size, $mtime, $ctime, $inode, $fn) = split(' ', $_, 7); | |
363 | + $cache{$fn} = [ 0, $sum4, $sum5, $size, $mtime, $ctime & 0xFFFFFFFF, $inode & 0xFFFFFFFF ]; | |
364 | + $f_cnt++; | |
365 | + } | |
366 | + close FP; | |
367 | + } | |
368 | + | |
213d4328 | 369 | + my @subdirs; |
d1a75c9f WD |
370 | + my $d_cnt = 0; |
371 | + my $update_cnt = 0; | |
213d4328 WD |
372 | + while (defined(my $fn = readdir(DP))) { |
373 | + next if $fn =~ /^\.\.?$/ || $fn =~ /^\Q$SUMS_FILE\E$/o || -l $fn; | |
374 | + if (-d _) { | |
375 | + push(@subdirs, "$dir/$fn") unless $fn =~ /^(CVS|\.svn|\.git|\.bzr)$/; | |
376 | + next; | |
377 | + } | |
378 | + next unless -f _; | |
379 | + | |
380 | + my($size,$mtime,$ctime,$inode) = (stat(_))[7,9,10,1]; | |
d1a75c9f WD |
381 | + my $ref = $cache{$fn}; |
382 | + if ($size == 0) { | |
383 | + if (defined $ref) { | |
384 | + delete $cache{$fn}; | |
385 | + $f_cnt--; | |
386 | + if (!$check_opt && !$update_cnt++) { | |
387 | + print "UPDATING\n" if $verbosity; | |
388 | + } | |
389 | + } | |
390 | + next; | |
213d4328 | 391 | + } |
d1a75c9f | 392 | + $d_cnt++; |
213d4328 | 393 | + |
d1a75c9f | 394 | + if (!$check_opt) { |
213d4328 | 395 | + if (defined $ref) { |
d1a75c9f WD |
396 | + $$ref[0] = 1; |
397 | + if ($$ref[3] == $size | |
398 | + && $$ref[4] == $mtime | |
399 | + && ($ignore_ctime_and_inode || ($$ref[5] == $ctime && $$ref[6] == $inode)) | |
400 | + && $$ref[1] !~ /=/ && $$ref[2] !~ /=/) { | |
401 | + next; | |
213d4328 | 402 | + } |
d1a75c9f WD |
403 | + } |
404 | + if (!$update_cnt++) { | |
405 | + print "UPDATING\n" if $verbosity; | |
213d4328 WD |
406 | + } |
407 | + } | |
213d4328 | 408 | + |
d1a75c9f WD |
409 | + if (!open(IN, $fn)) { |
410 | + print STDERR "Unable to read $fn: $!\n"; | |
411 | + if (defined $ref) { | |
213d4328 | 412 | + delete $cache{$fn}; |
d1a75c9f | 413 | + $f_cnt--; |
213d4328 | 414 | + } |
d1a75c9f WD |
415 | + next; |
416 | + } | |
213d4328 | 417 | + |
d1a75c9f WD |
418 | + my($sum4, $sum5); |
419 | + while (1) { | |
420 | + while (sysread(IN, $_, 64*1024)) { | |
421 | + $md4->add($_); | |
422 | + $md5->add($_); | |
213d4328 | 423 | + } |
d1a75c9f WD |
424 | + $sum4 = $md4->hexdigest; |
425 | + $sum5 = $md5->hexdigest; | |
426 | + print " $sum4 $sum5" if $verbosity > 2; | |
427 | + print " $fn" if $verbosity > 1; | |
428 | + my($size2,$mtime2,$ctime2,$inode2) = (stat(IN))[7,9,10,1]; | |
429 | + last if $size == $size2 && $mtime == $mtime2 | |
430 | + && ($ignore_ctime_and_inode || ($ctime == $ctime2 && $inode == $inode2)); | |
431 | + $size = $size2; | |
432 | + $mtime = $mtime2; | |
433 | + $ctime = $ctime2; | |
434 | + $inode = $inode2; | |
435 | + sysseek(IN, 0, 0); | |
436 | + print " REREADING\n" if $verbosity > 1; | |
437 | + } | |
213d4328 | 438 | + |
d1a75c9f WD |
439 | + close IN; |
440 | + | |
441 | + if ($check_opt) { | |
442 | + my $dif; | |
443 | + if (!defined $ref) { | |
444 | + $dif = 'MISSING'; | |
445 | + } elsif ($sum4 ne $$ref[1] || $sum5 ne $$ref[2]) { | |
446 | + $dif = 'FAILED'; | |
447 | + } else { | |
448 | + print " OK\n" if $verbosity > 1; | |
449 | + next; | |
450 | + } | |
451 | + if ($verbosity < 2) { | |
452 | + print $verbosity ? ' ' : "$reldir/"; | |
453 | + print $fn; | |
213d4328 | 454 | + } |
d1a75c9f WD |
455 | + print " $dif\n"; |
456 | + $exit_code = 1; | |
457 | + } else { | |
458 | + print "\n" if $verbosity > 1; | |
459 | + $cache{$fn} = [ 1, $sum4, $sum5, $size, $mtime, $ctime & 0xFFFFFFFF, $inode & 0xFFFFFFFF ]; | |
460 | + } | |
461 | + } | |
462 | + | |
463 | + closedir DP; | |
213d4328 | 464 | + |
d1a75c9f WD |
465 | + unshift(@dirs, sort @subdirs) if $recurse_opt; |
466 | + | |
467 | + if ($check_opt) { | |
468 | + ; | |
469 | + } elsif ($d_cnt == 0) { | |
470 | + if ($f_cnt) { | |
471 | + print "(removed $SUMS_FILE) " if $verbosity; | |
472 | + unlink($SUMS_FILE); | |
213d4328 | 473 | + } |
d1a75c9f WD |
474 | + print "empty\n" if $verbosity; |
475 | + } elsif ($update_cnt || $d_cnt != $f_cnt) { | |
476 | + print "UPDATING\n" if $verbosity && !$update_cnt; | |
477 | + open(FP, '>', $SUMS_FILE) or die "Unable to write $dir/$SUMS_FILE: $!\n"; | |
213d4328 | 478 | + |
213d4328 WD |
479 | + foreach my $fn (sort keys %cache) { |
480 | + my $ref = $cache{$fn}; | |
d1a75c9f WD |
481 | + my($found, $sum4, $sum5, $size, $mtime, $ctime, $inode) = @$ref; |
482 | + next unless $found; | |
213d4328 WD |
483 | + printf FP '%s %s %10d %10d %10d %10d %s' . "\n", $sum4, $sum5, $size, $mtime, $ctime, $inode, $fn; |
484 | + } | |
d1a75c9f | 485 | + close FP; |
213d4328 WD |
486 | + } else { |
487 | + print "ok\n" if $verbosity; | |
488 | + } | |
213d4328 WD |
489 | +} |
490 | + | |
d1a75c9f WD |
491 | +exit $exit_code; |
492 | + | |
213d4328 WD |
493 | +sub usage |
494 | +{ | |
495 | + die <<EOT; | |
496 | +Usage: rsyncsums [OPTIONS] [DIRS] | |
497 | + | |
498 | +Options: | |
499 | + -r, --recurse Update $SUMS_FILE files in subdirectories too. | |
500 | + -s, --simple-cmp Ignore ctime and inode values when comparing identicality. | |
d1a75c9f | 501 | + -c, --check Check if the checksums are right (doesn't update). |
213d4328 WD |
502 | + -v, --verbose Mention what we're doing. Repeat for more info. |
503 | + -h, --help Display this help message. | |
504 | +EOT | |
505 | +} |