Commit | Line | Data |
---|---|---|
213d4328 WD |
1 | Optimize the ability of a mirror to send checksums. |
2 | ||
3 | This adds a sender optimization feature that allows a cache of checksums | |
4 | to be used when the client specifies the --checksum option. The checksum | |
5 | files (.rsyncsums) must be created by some other process (see the perl | |
6 | script in the support dir for one way). | |
7 | ||
8 | This option should be used by mirrors that contain files that get created and | |
9 | not changed. There is a minimal amount of sanity-check information in the | |
10 | .rsyncsums file (size and mtime) so that the sum files can be shared with your | |
11 | mirror network. | |
12 | ||
13 | To use this patch, run these commands for a successful build: | |
14 | ||
cc3e685d | 15 | patch -p1 <patches/checksum-reading.diff |
213d4328 WD |
16 | ./configure (optional if already run) |
17 | make | |
18 | ||
cc3e685d WD |
19 | diff --git a/flist.c b/flist.c |
20 | --- a/flist.c | |
21 | +++ b/flist.c | |
f2863bc0 WD |
22 | @@ -121,6 +121,7 @@ static char tmp_sum[MAX_DIGEST_LEN]; |
23 | static char empty_sum[MAX_DIGEST_LEN]; | |
213d4328 WD |
24 | static int flist_count_offset; /* for --delete --progress */ |
25 | static int dir_count = 0; | |
213d4328 WD |
26 | +static struct file_list *checksum_flist = NULL; |
27 | ||
f2863bc0 | 28 | static void flist_sort_and_clean(struct file_list *flist, int strip_root); |
213d4328 | 29 | static void output_flist(struct file_list *flist); |
f2863bc0 | 30 | @@ -313,6 +314,186 @@ static void flist_done_allocating(struct file_list *flist) |
213d4328 WD |
31 | flist->pool_boundary = ptr; |
32 | } | |
33 | ||
34 | +/* The len count is the length of the basename + 1 for the null. */ | |
35 | +static int add_checksum(const char *dirname, const char *basename, int len, | |
36 | + OFF_T file_length, time_t mtime, const char *sum) | |
37 | +{ | |
38 | + struct file_struct *file; | |
39 | + int alloc_len, extra_len; | |
40 | + char *bp; | |
41 | + | |
42 | + if (len == 10+1 && *basename == '.' && strcmp(basename, ".rsyncsums") == 0) | |
43 | + return 0; | |
44 | + if (file_length == 0) | |
45 | + return 0; | |
46 | + | |
47 | + extra_len = (file_extra_cnt + (file_length > 0xFFFFFFFFu) + SUM_EXTRA_CNT) | |
48 | + * EXTRA_LEN; | |
49 | +#if EXTRA_ROUNDING > 0 | |
50 | + if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN)) | |
51 | + extra_len = (extra_len | (EXTRA_ROUNDING * EXTRA_LEN)) + EXTRA_LEN; | |
52 | +#endif | |
53 | + alloc_len = FILE_STRUCT_LEN + extra_len + len; | |
54 | + bp = pool_alloc(checksum_flist->file_pool, alloc_len, "add_checksum"); | |
55 | + | |
56 | + memset(bp, 0, extra_len + FILE_STRUCT_LEN); | |
57 | + bp += extra_len; | |
58 | + file = (struct file_struct *)bp; | |
59 | + bp += FILE_STRUCT_LEN; | |
60 | + | |
61 | + memcpy(bp, basename, len); | |
62 | + | |
63 | + file->mode = S_IFREG; | |
64 | + file->modtime = mtime; | |
65 | + file->len32 = (uint32)file_length; | |
66 | + if (file_length > 0xFFFFFFFFu) { | |
67 | + file->flags |= FLAG_LENGTH64; | |
68 | + OPT_EXTRA(file, 0)->unum = (uint32)(file_length >> 32); | |
69 | + } | |
70 | + file->dirname = dirname; | |
71 | + bp = F_SUM(file); | |
72 | + memcpy(bp, sum, checksum_len); | |
73 | + | |
74 | + flist_expand(checksum_flist, 1); | |
75 | + checksum_flist->files[checksum_flist->used++] = file; | |
76 | + | |
77 | + checksum_flist->sorted = checksum_flist->files; | |
78 | + | |
79 | + return 1; | |
80 | +} | |
81 | + | |
82 | +/* The direname value must remain unchanged during the lifespan of the | |
83 | + * created checksum_flist object because we use it directly. */ | |
84 | +static void read_checksums(const char *dirname) | |
85 | +{ | |
86 | + char line[MAXPATHLEN+1024], fbuf[MAXPATHLEN], sum[MAX_DIGEST_LEN]; | |
87 | + OFF_T file_length; | |
88 | + time_t mtime; | |
89 | + int len, dlen, i; | |
90 | + char *cp; | |
91 | + FILE *fp; | |
92 | + | |
93 | + if (checksum_flist) { | |
94 | + /* Reset the pool memory and empty the file-list array. */ | |
95 | + pool_free_old(checksum_flist->file_pool, | |
96 | + pool_boundary(checksum_flist->file_pool, 0)); | |
97 | + checksum_flist->used = 0; | |
98 | + } else | |
99 | + checksum_flist = flist_new(FLIST_TEMP, "read_checksums"); | |
100 | + | |
101 | + checksum_flist->low = 0; | |
102 | + checksum_flist->high = -1; | |
103 | + | |
104 | + if (!dirname) | |
105 | + return; | |
106 | + | |
107 | + dlen = strlcpy(fbuf, dirname, sizeof fbuf); | |
108 | + if (dlen >= (int)sizeof fbuf) | |
109 | + return; | |
110 | + if (dlen) | |
111 | + fbuf[dlen++] = '/'; | |
112 | + else | |
113 | + dirname = NULL; | |
114 | + strlcpy(fbuf+dlen, ".rsyncsums", sizeof fbuf - dlen); | |
115 | + if (!(fp = fopen(fbuf, "r"))) | |
116 | + return; | |
117 | + | |
118 | + while (fgets(line, sizeof line, fp)) { | |
119 | + cp = line; | |
120 | + if (protocol_version >= 30) { | |
121 | + char *alt_sum = cp; | |
122 | + if (*cp == '=') | |
123 | + while (*++cp == '=') {} | |
124 | + else | |
125 | + while (isXDigit(cp)) cp++; | |
126 | + if (cp - alt_sum != MD4_DIGEST_LEN*2 || *cp != ' ') | |
127 | + break; | |
128 | + while (*++cp == ' ') {} | |
129 | + } | |
130 | + | |
131 | + if (*cp == '=') { | |
132 | + continue; | |
133 | + } else { | |
134 | + for (i = 0; i < checksum_len*2; i++, cp++) { | |
135 | + int x; | |
136 | + if (isXDigit(cp)) { | |
137 | + if (isDigit(cp)) | |
138 | + x = *cp - '0'; | |
139 | + else | |
140 | + x = (*cp & 0xF) + 9; | |
141 | + } else { | |
142 | + cp = ""; | |
143 | + break; | |
144 | + } | |
145 | + if (i & 1) | |
146 | + sum[i/2] |= x; | |
147 | + else | |
148 | + sum[i/2] = x << 4; | |
149 | + } | |
150 | + } | |
151 | + if (*cp != ' ') | |
152 | + break; | |
153 | + while (*++cp == ' ') {} | |
154 | + | |
155 | + if (protocol_version < 30) { | |
156 | + char *alt_sum = cp; | |
157 | + if (*cp == '=') | |
158 | + while (*++cp == '=') {} | |
159 | + else | |
160 | + while (isXDigit(cp)) cp++; | |
161 | + if (cp - alt_sum != MD5_DIGEST_LEN*2 || *cp != ' ') | |
162 | + break; | |
163 | + while (*++cp == ' ') {} | |
164 | + } | |
165 | + | |
166 | + file_length = 0; | |
167 | + while (isDigit(cp)) | |
168 | + file_length = file_length * 10 + *cp++ - '0'; | |
169 | + if (*cp != ' ') | |
170 | + break; | |
171 | + while (*++cp == ' ') {} | |
172 | + | |
173 | + mtime = 0; | |
174 | + while (isDigit(cp)) | |
175 | + mtime = mtime * 10 + *cp++ - '0'; | |
176 | + if (*cp != ' ') | |
177 | + break; | |
178 | + while (*++cp == ' ') {} | |
179 | + | |
180 | + /* Ignore ctime. */ | |
181 | + while (isDigit(cp)) | |
182 | + cp++; | |
183 | + if (*cp != ' ') | |
184 | + break; | |
185 | + while (*++cp == ' ') {} | |
186 | + | |
187 | + /* Ignore inode. */ | |
188 | + while (isDigit(cp)) | |
189 | + cp++; | |
190 | + if (*cp != ' ') | |
191 | + break; | |
192 | + while (*++cp == ' ') {} | |
193 | + | |
194 | + len = strlen(cp); | |
195 | + while (len && (cp[len-1] == '\n' || cp[len-1] == '\r')) | |
196 | + len--; | |
197 | + if (!len) | |
198 | + break; | |
199 | + cp[len++] = '\0'; /* len now counts the null */ | |
200 | + if (strchr(cp, '/')) | |
201 | + break; | |
202 | + if (len > MAXPATHLEN) | |
203 | + continue; | |
204 | + | |
205 | + strlcpy(fbuf+dlen, cp, sizeof fbuf - dlen); | |
206 | + | |
207 | + add_checksum(dirname, cp, len, file_length, mtime, sum); | |
208 | + } | |
209 | + fclose(fp); | |
210 | + | |
85096e5e | 211 | + flist_sort_and_clean(checksum_flist, 0); |
213d4328 WD |
212 | +} |
213 | + | |
214 | int push_pathname(const char *dir, int len) | |
215 | { | |
216 | if (dir == pathname) | |
f2863bc0 | 217 | @@ -1003,7 +1184,7 @@ struct file_struct *make_file(const char *fname, struct file_list *flist, |
213d4328 WD |
218 | STRUCT_STAT *stp, int flags, int filter_level) |
219 | { | |
220 | static char *lastdir; | |
221 | - static int lastdir_len = -1; | |
222 | + static int lastdir_len = -2; | |
223 | struct file_struct *file; | |
224 | char thisname[MAXPATHLEN]; | |
225 | char linkname[MAXPATHLEN]; | |
a5e6228a | 226 | @@ -1136,9 +1317,16 @@ struct file_struct *make_file(const char *fname, struct file_list *flist, |
213d4328 WD |
227 | memcpy(lastdir, thisname, len); |
228 | lastdir[len] = '\0'; | |
229 | lastdir_len = len; | |
230 | + if (always_checksum && am_sender && flist) | |
231 | + read_checksums(lastdir); | |
232 | } | |
233 | - } else | |
234 | + } else { | |
235 | basename = thisname; | |
236 | + if (always_checksum && am_sender && flist && lastdir_len == -2) { | |
237 | + lastdir_len = -1; | |
238 | + read_checksums(""); | |
239 | + } | |
240 | + } | |
241 | basename_len = strlen(basename) + 1; /* count the '\0' */ | |
242 | ||
243 | #ifdef SUPPORT_LINKS | |
a5e6228a | 244 | @@ -1214,11 +1402,21 @@ struct file_struct *make_file(const char *fname, struct file_list *flist, |
213d4328 WD |
245 | } |
246 | #endif | |
247 | ||
248 | - if (always_checksum && am_sender && S_ISREG(st.st_mode)) | |
249 | - file_checksum(thisname, tmp_sum, st.st_size); | |
250 | - | |
251 | F_PATHNAME(file) = pathname; | |
252 | ||
253 | + if (always_checksum && am_sender && S_ISREG(st.st_mode)) { | |
254 | + int j; | |
255 | + if (flist && (j = flist_find(checksum_flist, file)) >= 0) { | |
256 | + struct file_struct *fp = checksum_flist->sorted[j]; | |
257 | + if (F_LENGTH(fp) == st.st_size | |
258 | + && fp->modtime == st.st_mtime) | |
259 | + memcpy(tmp_sum, F_SUM(fp), MAX_DIGEST_LEN); | |
260 | + else | |
261 | + file_checksum(thisname, tmp_sum, st.st_size); | |
262 | + } else | |
263 | + file_checksum(thisname, tmp_sum, st.st_size); | |
264 | + } | |
265 | + | |
266 | /* This code is only used by the receiver when it is building | |
267 | * a list of files for a delete pass. */ | |
268 | if (keep_dirlinks && linkname_len && flist) { | |
85096e5e | 269 | @@ -2074,7 +2272,8 @@ struct file_list *send_file_list(int f, int argc, char *argv[]) |
6e9495c7 WD |
270 | * file-list to check if this is a 1-file xfer. */ |
271 | send_extra_file_list(f, 1); | |
272 | } | |
273 | - } | |
274 | + } else | |
275 | + flist_eof = 1; | |
6e9495c7 WD |
276 | |
277 | return flist; | |
278 | } | |
cc3e685d WD |
279 | diff --git a/ifuncs.h b/ifuncs.h |
280 | --- a/ifuncs.h | |
281 | +++ b/ifuncs.h | |
213d4328 WD |
282 | @@ -64,6 +64,12 @@ isDigit(const char *ptr) |
283 | } | |
284 | ||
285 | static inline int | |
286 | +isXDigit(const char *ptr) | |
287 | +{ | |
288 | + return isxdigit(*(unsigned char *)ptr); | |
289 | +} | |
290 | + | |
291 | +static inline int | |
292 | isPrint(const char *ptr) | |
293 | { | |
294 | return isprint(*(unsigned char *)ptr); | |
cc3e685d WD |
295 | diff --git a/support/rsyncsums b/support/rsyncsums |
296 | new file mode 100644 | |
297 | --- /dev/null | |
298 | +++ b/support/rsyncsums | |
d1a75c9f | 299 | @@ -0,0 +1,203 @@ |
213d4328 WD |
300 | +#!/usr/bin/perl -w |
301 | +use strict; | |
302 | + | |
303 | +use Getopt::Long; | |
304 | +use Cwd qw(abs_path cwd); | |
305 | +use Digest::MD4; | |
306 | +use Digest::MD5; | |
307 | + | |
308 | +our $SUMS_FILE = '.rsyncsums'; | |
309 | + | |
310 | +&Getopt::Long::Configure('bundling'); | |
311 | +&usage if !&GetOptions( | |
213d4328 | 312 | + 'recurse|r' => \( my $recurse_opt ), |
d1a75c9f WD |
313 | + 'simple-cmp|s' => \( my $ignore_ctime_and_inode ), |
314 | + 'check|c' => \( my $check_opt ), | |
213d4328 WD |
315 | + 'verbose|v+' => \( my $verbosity = 0 ), |
316 | + 'help|h' => \( my $help_opt ), | |
317 | +); | |
318 | +&usage if $help_opt; | |
319 | + | |
320 | +my $start_dir = cwd(); | |
321 | + | |
322 | +my @dirs = @ARGV; | |
323 | +@dirs = '.' unless @dirs; | |
324 | +foreach (@dirs) { | |
325 | + $_ = abs_path($_); | |
326 | +} | |
327 | + | |
328 | +$| = 1; | |
329 | + | |
d1a75c9f WD |
330 | +my $exit_code = 0; |
331 | + | |
213d4328 WD |
332 | +my $md4 = Digest::MD4->new; |
333 | +my $md5 = Digest::MD5->new; | |
334 | + | |
335 | +while (@dirs) { | |
336 | + my $dir = shift @dirs; | |
337 | + | |
338 | + if (!chdir($dir)) { | |
339 | + warn "Unable to chdir to $dir: $!\n"; | |
340 | + next; | |
341 | + } | |
342 | + if (!opendir(DP, '.')) { | |
343 | + warn "Unable to opendir $dir: $!\n"; | |
344 | + next; | |
345 | + } | |
346 | + | |
d1a75c9f WD |
347 | + my $reldir = $dir; |
348 | + $reldir =~ s#^$start_dir(/|$)# $1 ? '' : '.' #eo; | |
213d4328 | 349 | + if ($verbosity) { |
213d4328 | 350 | + print "$reldir ... "; |
d1a75c9f | 351 | + print "\n" if $check_opt; |
213d4328 WD |
352 | + } |
353 | + | |
213d4328 | 354 | + my %cache; |
d1a75c9f WD |
355 | + my $f_cnt = 0; |
356 | + if (open(FP, '<', $SUMS_FILE)) { | |
357 | + while (<FP>) { | |
358 | + chomp; | |
359 | + my($sum4, $sum5, $size, $mtime, $ctime, $inode, $fn) = split(' ', $_, 7); | |
360 | + $cache{$fn} = [ 0, $sum4, $sum5, $size, $mtime, $ctime & 0xFFFFFFFF, $inode & 0xFFFFFFFF ]; | |
361 | + $f_cnt++; | |
362 | + } | |
363 | + close FP; | |
364 | + } | |
365 | + | |
213d4328 | 366 | + my @subdirs; |
d1a75c9f WD |
367 | + my $d_cnt = 0; |
368 | + my $update_cnt = 0; | |
213d4328 WD |
369 | + while (defined(my $fn = readdir(DP))) { |
370 | + next if $fn =~ /^\.\.?$/ || $fn =~ /^\Q$SUMS_FILE\E$/o || -l $fn; | |
371 | + if (-d _) { | |
372 | + push(@subdirs, "$dir/$fn") unless $fn =~ /^(CVS|\.svn|\.git|\.bzr)$/; | |
373 | + next; | |
374 | + } | |
375 | + next unless -f _; | |
376 | + | |
377 | + my($size,$mtime,$ctime,$inode) = (stat(_))[7,9,10,1]; | |
d1a75c9f WD |
378 | + my $ref = $cache{$fn}; |
379 | + if ($size == 0) { | |
380 | + if (defined $ref) { | |
381 | + delete $cache{$fn}; | |
382 | + $f_cnt--; | |
383 | + if (!$check_opt && !$update_cnt++) { | |
384 | + print "UPDATING\n" if $verbosity; | |
385 | + } | |
386 | + } | |
387 | + next; | |
213d4328 | 388 | + } |
d1a75c9f | 389 | + $d_cnt++; |
213d4328 | 390 | + |
d1a75c9f | 391 | + if (!$check_opt) { |
213d4328 | 392 | + if (defined $ref) { |
d1a75c9f WD |
393 | + $$ref[0] = 1; |
394 | + if ($$ref[3] == $size | |
395 | + && $$ref[4] == $mtime | |
396 | + && ($ignore_ctime_and_inode || ($$ref[5] == $ctime && $$ref[6] == $inode)) | |
397 | + && $$ref[1] !~ /=/ && $$ref[2] !~ /=/) { | |
398 | + next; | |
213d4328 | 399 | + } |
d1a75c9f WD |
400 | + } |
401 | + if (!$update_cnt++) { | |
402 | + print "UPDATING\n" if $verbosity; | |
213d4328 WD |
403 | + } |
404 | + } | |
213d4328 | 405 | + |
d1a75c9f WD |
406 | + if (!open(IN, $fn)) { |
407 | + print STDERR "Unable to read $fn: $!\n"; | |
408 | + if (defined $ref) { | |
213d4328 | 409 | + delete $cache{$fn}; |
d1a75c9f | 410 | + $f_cnt--; |
213d4328 | 411 | + } |
d1a75c9f WD |
412 | + next; |
413 | + } | |
213d4328 | 414 | + |
d1a75c9f WD |
415 | + my($sum4, $sum5); |
416 | + while (1) { | |
417 | + while (sysread(IN, $_, 64*1024)) { | |
418 | + $md4->add($_); | |
419 | + $md5->add($_); | |
213d4328 | 420 | + } |
d1a75c9f WD |
421 | + $sum4 = $md4->hexdigest; |
422 | + $sum5 = $md5->hexdigest; | |
423 | + print " $sum4 $sum5" if $verbosity > 2; | |
424 | + print " $fn" if $verbosity > 1; | |
425 | + my($size2,$mtime2,$ctime2,$inode2) = (stat(IN))[7,9,10,1]; | |
426 | + last if $size == $size2 && $mtime == $mtime2 | |
427 | + && ($ignore_ctime_and_inode || ($ctime == $ctime2 && $inode == $inode2)); | |
428 | + $size = $size2; | |
429 | + $mtime = $mtime2; | |
430 | + $ctime = $ctime2; | |
431 | + $inode = $inode2; | |
432 | + sysseek(IN, 0, 0); | |
433 | + print " REREADING\n" if $verbosity > 1; | |
434 | + } | |
213d4328 | 435 | + |
d1a75c9f WD |
436 | + close IN; |
437 | + | |
438 | + if ($check_opt) { | |
439 | + my $dif; | |
440 | + if (!defined $ref) { | |
441 | + $dif = 'MISSING'; | |
442 | + } elsif ($sum4 ne $$ref[1] || $sum5 ne $$ref[2]) { | |
443 | + $dif = 'FAILED'; | |
444 | + } else { | |
445 | + print " OK\n" if $verbosity > 1; | |
446 | + next; | |
447 | + } | |
448 | + if ($verbosity < 2) { | |
449 | + print $verbosity ? ' ' : "$reldir/"; | |
450 | + print $fn; | |
213d4328 | 451 | + } |
d1a75c9f WD |
452 | + print " $dif\n"; |
453 | + $exit_code = 1; | |
454 | + } else { | |
455 | + print "\n" if $verbosity > 1; | |
456 | + $cache{$fn} = [ 1, $sum4, $sum5, $size, $mtime, $ctime & 0xFFFFFFFF, $inode & 0xFFFFFFFF ]; | |
457 | + } | |
458 | + } | |
459 | + | |
460 | + closedir DP; | |
213d4328 | 461 | + |
d1a75c9f WD |
462 | + unshift(@dirs, sort @subdirs) if $recurse_opt; |
463 | + | |
464 | + if ($check_opt) { | |
465 | + ; | |
466 | + } elsif ($d_cnt == 0) { | |
467 | + if ($f_cnt) { | |
468 | + print "(removed $SUMS_FILE) " if $verbosity; | |
469 | + unlink($SUMS_FILE); | |
213d4328 | 470 | + } |
d1a75c9f WD |
471 | + print "empty\n" if $verbosity; |
472 | + } elsif ($update_cnt || $d_cnt != $f_cnt) { | |
473 | + print "UPDATING\n" if $verbosity && !$update_cnt; | |
474 | + open(FP, '>', $SUMS_FILE) or die "Unable to write $dir/$SUMS_FILE: $!\n"; | |
213d4328 | 475 | + |
213d4328 WD |
476 | + foreach my $fn (sort keys %cache) { |
477 | + my $ref = $cache{$fn}; | |
d1a75c9f WD |
478 | + my($found, $sum4, $sum5, $size, $mtime, $ctime, $inode) = @$ref; |
479 | + next unless $found; | |
213d4328 WD |
480 | + printf FP '%s %s %10d %10d %10d %10d %s' . "\n", $sum4, $sum5, $size, $mtime, $ctime, $inode, $fn; |
481 | + } | |
d1a75c9f | 482 | + close FP; |
213d4328 WD |
483 | + } else { |
484 | + print "ok\n" if $verbosity; | |
485 | + } | |
213d4328 WD |
486 | +} |
487 | + | |
d1a75c9f WD |
488 | +exit $exit_code; |
489 | + | |
213d4328 WD |
490 | +sub usage |
491 | +{ | |
492 | + die <<EOT; | |
493 | +Usage: rsyncsums [OPTIONS] [DIRS] | |
494 | + | |
495 | +Options: | |
496 | + -r, --recurse Update $SUMS_FILE files in subdirectories too. | |
497 | + -s, --simple-cmp Ignore ctime and inode values when comparing identicality. | |
d1a75c9f | 498 | + -c, --check Check if the checksums are right (doesn't update). |
213d4328 WD |
499 | + -v, --verbose Mention what we're doing. Repeat for more info. |
500 | + -h, --help Display this help message. | |
501 | +EOT | |
502 | +} |