Broke-up the checksump-updating patch into a non-updating version
[rsync/rsync-patches.git] / checksum4mirrors.diff
CommitLineData
213d4328
WD
1Optimize the ability of a mirror to send checksums.
2
3This adds a sender optimization feature that allows a cache of checksums
4to be used when the client specifies the --checksum option. The checksum
5files (.rsyncsums) must be created by some other process (see the perl
6script in the support dir for one way).
7
8This option should be used by mirrors that contain files that get created and
9not changed. There is a minimal amount of sanity-check information in the
10.rsyncsums file (size and mtime) so that the sum files can be shared with your
11mirror network.
12
13To use this patch, run these commands for a successful build:
14
15 patch -p1 <patches/checksum4mirrors.diff
16 ./configure (optional if already run)
17 make
18
19--- old/flist.c
20+++ new/flist.c
21@@ -117,6 +117,7 @@ static char empty_sum[MAX_DIGEST_LEN];
22 static int flist_count_offset; /* for --delete --progress */
23 static int dir_count = 0;
24 static int high_hlink_ndx;
25+static struct file_list *checksum_flist = NULL;
26
27 static void clean_flist(struct file_list *flist, int strip_root);
28 static void output_flist(struct file_list *flist);
29@@ -304,6 +305,186 @@ static void flist_done_allocating(struct
30 flist->pool_boundary = ptr;
31 }
32
33+/* The len count is the length of the basename + 1 for the null. */
34+static int add_checksum(const char *dirname, const char *basename, int len,
35+ OFF_T file_length, time_t mtime, const char *sum)
36+{
37+ struct file_struct *file;
38+ int alloc_len, extra_len;
39+ char *bp;
40+
41+ if (len == 10+1 && *basename == '.' && strcmp(basename, ".rsyncsums") == 0)
42+ return 0;
43+ if (file_length == 0)
44+ return 0;
45+
46+ extra_len = (file_extra_cnt + (file_length > 0xFFFFFFFFu) + SUM_EXTRA_CNT)
47+ * EXTRA_LEN;
48+#if EXTRA_ROUNDING > 0
49+ if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN))
50+ extra_len = (extra_len | (EXTRA_ROUNDING * EXTRA_LEN)) + EXTRA_LEN;
51+#endif
52+ alloc_len = FILE_STRUCT_LEN + extra_len + len;
53+ bp = pool_alloc(checksum_flist->file_pool, alloc_len, "add_checksum");
54+
55+ memset(bp, 0, extra_len + FILE_STRUCT_LEN);
56+ bp += extra_len;
57+ file = (struct file_struct *)bp;
58+ bp += FILE_STRUCT_LEN;
59+
60+ memcpy(bp, basename, len);
61+
62+ file->mode = S_IFREG;
63+ file->modtime = mtime;
64+ file->len32 = (uint32)file_length;
65+ if (file_length > 0xFFFFFFFFu) {
66+ file->flags |= FLAG_LENGTH64;
67+ OPT_EXTRA(file, 0)->unum = (uint32)(file_length >> 32);
68+ }
69+ file->dirname = dirname;
70+ bp = F_SUM(file);
71+ memcpy(bp, sum, checksum_len);
72+
73+ flist_expand(checksum_flist, 1);
74+ checksum_flist->files[checksum_flist->used++] = file;
75+
76+ checksum_flist->sorted = checksum_flist->files;
77+
78+ return 1;
79+}
80+
81+/* The direname value must remain unchanged during the lifespan of the
82+ * created checksum_flist object because we use it directly. */
83+static void read_checksums(const char *dirname)
84+{
85+ char line[MAXPATHLEN+1024], fbuf[MAXPATHLEN], sum[MAX_DIGEST_LEN];
86+ OFF_T file_length;
87+ time_t mtime;
88+ int len, dlen, i;
89+ char *cp;
90+ FILE *fp;
91+
92+ if (checksum_flist) {
93+ /* Reset the pool memory and empty the file-list array. */
94+ pool_free_old(checksum_flist->file_pool,
95+ pool_boundary(checksum_flist->file_pool, 0));
96+ checksum_flist->used = 0;
97+ } else
98+ checksum_flist = flist_new(FLIST_TEMP, "read_checksums");
99+
100+ checksum_flist->low = 0;
101+ checksum_flist->high = -1;
102+
103+ if (!dirname)
104+ return;
105+
106+ dlen = strlcpy(fbuf, dirname, sizeof fbuf);
107+ if (dlen >= (int)sizeof fbuf)
108+ return;
109+ if (dlen)
110+ fbuf[dlen++] = '/';
111+ else
112+ dirname = NULL;
113+ strlcpy(fbuf+dlen, ".rsyncsums", sizeof fbuf - dlen);
114+ if (!(fp = fopen(fbuf, "r")))
115+ return;
116+
117+ while (fgets(line, sizeof line, fp)) {
118+ cp = line;
119+ if (protocol_version >= 30) {
120+ char *alt_sum = cp;
121+ if (*cp == '=')
122+ while (*++cp == '=') {}
123+ else
124+ while (isXDigit(cp)) cp++;
125+ if (cp - alt_sum != MD4_DIGEST_LEN*2 || *cp != ' ')
126+ break;
127+ while (*++cp == ' ') {}
128+ }
129+
130+ if (*cp == '=') {
131+ continue;
132+ } else {
133+ for (i = 0; i < checksum_len*2; i++, cp++) {
134+ int x;
135+ if (isXDigit(cp)) {
136+ if (isDigit(cp))
137+ x = *cp - '0';
138+ else
139+ x = (*cp & 0xF) + 9;
140+ } else {
141+ cp = "";
142+ break;
143+ }
144+ if (i & 1)
145+ sum[i/2] |= x;
146+ else
147+ sum[i/2] = x << 4;
148+ }
149+ }
150+ if (*cp != ' ')
151+ break;
152+ while (*++cp == ' ') {}
153+
154+ if (protocol_version < 30) {
155+ char *alt_sum = cp;
156+ if (*cp == '=')
157+ while (*++cp == '=') {}
158+ else
159+ while (isXDigit(cp)) cp++;
160+ if (cp - alt_sum != MD5_DIGEST_LEN*2 || *cp != ' ')
161+ break;
162+ while (*++cp == ' ') {}
163+ }
164+
165+ file_length = 0;
166+ while (isDigit(cp))
167+ file_length = file_length * 10 + *cp++ - '0';
168+ if (*cp != ' ')
169+ break;
170+ while (*++cp == ' ') {}
171+
172+ mtime = 0;
173+ while (isDigit(cp))
174+ mtime = mtime * 10 + *cp++ - '0';
175+ if (*cp != ' ')
176+ break;
177+ while (*++cp == ' ') {}
178+
179+ /* Ignore ctime. */
180+ while (isDigit(cp))
181+ cp++;
182+ if (*cp != ' ')
183+ break;
184+ while (*++cp == ' ') {}
185+
186+ /* Ignore inode. */
187+ while (isDigit(cp))
188+ cp++;
189+ if (*cp != ' ')
190+ break;
191+ while (*++cp == ' ') {}
192+
193+ len = strlen(cp);
194+ while (len && (cp[len-1] == '\n' || cp[len-1] == '\r'))
195+ len--;
196+ if (!len)
197+ break;
198+ cp[len++] = '\0'; /* len now counts the null */
199+ if (strchr(cp, '/'))
200+ break;
201+ if (len > MAXPATHLEN)
202+ continue;
203+
204+ strlcpy(fbuf+dlen, cp, sizeof fbuf - dlen);
205+
206+ add_checksum(dirname, cp, len, file_length, mtime, sum);
207+ }
208+ fclose(fp);
209+
210+ clean_flist(checksum_flist, 0);
211+}
212+
213 int push_pathname(const char *dir, int len)
214 {
215 if (dir == pathname)
216@@ -989,7 +1170,7 @@ struct file_struct *make_file(const char
217 STRUCT_STAT *stp, int flags, int filter_level)
218 {
219 static char *lastdir;
220- static int lastdir_len = -1;
221+ static int lastdir_len = -2;
222 struct file_struct *file;
223 char thisname[MAXPATHLEN];
224 char linkname[MAXPATHLEN];
225@@ -1119,9 +1300,16 @@ struct file_struct *make_file(const char
226 memcpy(lastdir, thisname, len);
227 lastdir[len] = '\0';
228 lastdir_len = len;
229+ if (always_checksum && am_sender && flist)
230+ read_checksums(lastdir);
231 }
232- } else
233+ } else {
234 basename = thisname;
235+ if (always_checksum && am_sender && flist && lastdir_len == -2) {
236+ lastdir_len = -1;
237+ read_checksums("");
238+ }
239+ }
240 basename_len = strlen(basename) + 1; /* count the '\0' */
241
242 #ifdef SUPPORT_LINKS
243@@ -1197,11 +1385,21 @@ struct file_struct *make_file(const char
244 }
245 #endif
246
247- if (always_checksum && am_sender && S_ISREG(st.st_mode))
248- file_checksum(thisname, tmp_sum, st.st_size);
249-
250 F_PATHNAME(file) = pathname;
251
252+ if (always_checksum && am_sender && S_ISREG(st.st_mode)) {
253+ int j;
254+ if (flist && (j = flist_find(checksum_flist, file)) >= 0) {
255+ struct file_struct *fp = checksum_flist->sorted[j];
256+ if (F_LENGTH(fp) == st.st_size
257+ && fp->modtime == st.st_mtime)
258+ memcpy(tmp_sum, F_SUM(fp), MAX_DIGEST_LEN);
259+ else
260+ file_checksum(thisname, tmp_sum, st.st_size);
261+ } else
262+ file_checksum(thisname, tmp_sum, st.st_size);
263+ }
264+
265 /* This code is only used by the receiver when it is building
266 * a list of files for a delete pass. */
267 if (keep_dirlinks && linkname_len && flist) {
268--- old/ifuncs.h
269+++ new/ifuncs.h
270@@ -64,6 +64,12 @@ isDigit(const char *ptr)
271 }
272
273 static inline int
274+isXDigit(const char *ptr)
275+{
276+ return isxdigit(*(unsigned char *)ptr);
277+}
278+
279+static inline int
280 isPrint(const char *ptr)
281 {
282 return isprint(*(unsigned char *)ptr);
283--- old/patches/checksum-updating.diff
284+++ new/patches/checksum-updating.diff
285@@ -476,15 +476,6 @@ To use this patch, run these commands fo
286
287 return flist;
288 }
289-@@ -2320,7 +2673,7 @@ void flist_free(struct file_list *flist)
290-
291- if (!flist->prev || !flist_cnt)
292- pool_destroy(flist->file_pool);
293-- else
294-+ else if (flist->pool_boundary)
295- pool_free_old(flist->file_pool, flist->pool_boundary);
296-
297- if (flist->sorted && flist->sorted != flist->files)
298 --- old/ifuncs.h
299 +++ new/ifuncs.h
300 @@ -64,6 +64,12 @@ isDigit(const char *ptr)
301--- old/support/rsyncsums
302+++ new/support/rsyncsums
303@@ -0,0 +1,183 @@
304+#!/usr/bin/perl -w
305+use strict;
306+
307+use Getopt::Long;
308+use Cwd qw(abs_path cwd);
309+use Digest::MD4;
310+use Digest::MD5;
311+
312+our $SUMS_FILE = '.rsyncsums';
313+
314+&Getopt::Long::Configure('bundling');
315+&usage if !&GetOptions(
316+ 'simple-cmp|s' => \( my $ignore_ctime_and_inode ),
317+ 'recurse|r' => \( my $recurse_opt ),
318+ 'verbose|v+' => \( my $verbosity = 0 ),
319+ 'help|h' => \( my $help_opt ),
320+);
321+&usage if $help_opt;
322+
323+my $start_dir = cwd();
324+
325+my @dirs = @ARGV;
326+@dirs = '.' unless @dirs;
327+foreach (@dirs) {
328+ $_ = abs_path($_);
329+}
330+
331+$| = 1;
332+
333+my $md4 = Digest::MD4->new;
334+my $md5 = Digest::MD5->new;
335+
336+while (@dirs) {
337+ my $dir = shift @dirs;
338+
339+ if (!chdir($dir)) {
340+ warn "Unable to chdir to $dir: $!\n";
341+ next;
342+ }
343+ if (!opendir(DP, '.')) {
344+ warn "Unable to opendir $dir: $!\n";
345+ next;
346+ }
347+
348+ if ($verbosity) {
349+ my $reldir = $dir;
350+ $reldir =~ s#^$start_dir(/|$)# $1 ? '' : '.' #eo;
351+ print "$reldir ... ";
352+ }
353+
354+ my $sums_file_exists = -e $SUMS_FILE;
355+ my %cache;
356+ my @subdirs;
357+ my $cnt = 0;
358+ while (defined(my $fn = readdir(DP))) {
359+ next if $fn =~ /^\.\.?$/ || $fn =~ /^\Q$SUMS_FILE\E$/o || -l $fn;
360+ if (-d _) {
361+ push(@subdirs, "$dir/$fn") unless $fn =~ /^(CVS|\.svn|\.git|\.bzr)$/;
362+ next;
363+ }
364+ next unless -f _;
365+
366+ my($size,$mtime,$ctime,$inode) = (stat(_))[7,9,10,1];
367+ next if $size == 0;
368+
369+ $cache{$fn} = [ $size, $mtime, $ctime & 0xFFFFFFFF, $inode & 0xFFFFFFFF ];
370+ $cnt++;
371+ }
372+
373+ closedir DP;
374+
375+ unshift(@dirs, sort @subdirs) if $recurse_opt;
376+
377+ if (!$cnt) {
378+ if ($sums_file_exists) {
379+ print "(removed $SUMS_FILE) " if $verbosity;
380+ unlink($SUMS_FILE);
381+ }
382+ print "empty\n" if $verbosity;
383+ next;
384+ }
385+
386+ if (open(FP, '+<', $SUMS_FILE)) {
387+ while (<FP>) {
388+ chomp;
389+ my($sum4, $sum5, $size, $mtime, $ctime, $inode, $fn) = split(' ', $_, 7);
390+ my $ref = $cache{$fn};
391+ if (defined $ref) {
392+ if ($ignore_ctime_and_inode) {
393+ $ctime = $$ref[2];
394+ $inode = $$ref[3];
395+ }
396+ if ($$ref[0] == $size
397+ && $$ref[1] == $mtime
398+ && $$ref[2] == $ctime
399+ && $$ref[3] == $inode
400+ && $sum4 !~ /=/ && $sum5 !~ /=/) {
401+ $$ref[4] = $sum4;
402+ $$ref[5] = $sum5;
403+ $cnt--;
404+ } else {
405+ $$ref[4] = $$ref[5] = undef;
406+ }
407+ } else {
408+ $cnt = -1; # Force rewrite due to removed line.
409+ }
410+ }
411+ } else {
412+ open(FP, '>', $SUMS_FILE) or die "Unable to write $dir/$SUMS_FILE: $!\n";
413+ $cnt = -1;
414+ }
415+
416+ if ($cnt) {
417+ print "UPDATING\n" if $verbosity;
418+ while (my($fn, $ref) = each %cache) {
419+ next if defined $$ref[3] && defined $$ref[4];
420+ if (!open(IN, $fn)) {
421+ print STDERR "Unable to read $fn: $!\n";
422+ delete $cache{$fn};
423+ next;
424+ }
425+
426+ my($size,$mtime,$ctime,$inode) = (stat(IN))[7,9,10,1];
427+ if ($size == 0) {
428+ close IN;
429+ next;
430+ }
431+
432+ my($sum4, $sum5);
433+ while (1) {
434+ while (sysread(IN, $_, 64*1024)) {
435+ $md4->add($_);
436+ $md5->add($_);
437+ }
438+ $sum4 = $md4->hexdigest;
439+ $sum5 = $md5->hexdigest;
440+ print " $sum4 $sum5" if $verbosity > 2;
441+ print " $fn\n" if $verbosity > 1;
442+ my($size2,$mtime2,$ctime2,$inode2) = (stat(IN))[7,9,10,1];
443+ if ($ignore_ctime_and_inode) {
444+ $ctime = $ctime2;
445+ $inode = $inode2;
446+ }
447+ last if $size == $size2 && $mtime == $mtime2
448+ && $ctime == $ctime2 && $inode == $inode2;
449+ $size = $size2;
450+ $mtime = $mtime2;
451+ $ctime = $ctime2;
452+ $inode = $inode2;
453+ sysseek(IN, 0, 0);
454+ }
455+
456+ close IN;
457+
458+ $cache{$fn} = [ $size, $mtime, $ctime, $inode, $sum4, $sum5 ];
459+ }
460+
461+ seek(FP, 0, 0);
462+ foreach my $fn (sort keys %cache) {
463+ my $ref = $cache{$fn};
464+ my($size, $mtime, $ctime, $inode, $sum4, $sum5) = @$ref;
465+ printf FP '%s %s %10d %10d %10d %10d %s' . "\n", $sum4, $sum5, $size, $mtime, $ctime, $inode, $fn;
466+ }
467+ truncate(FP, tell(FP));
468+ } else {
469+ print "ok\n" if $verbosity;
470+ }
471+
472+ close FP;
473+}
474+
475+sub usage
476+{
477+ die <<EOT;
478+Usage: rsyncsums [OPTIONS] [DIRS]
479+
480+Options:
481+ -r, --recurse Update $SUMS_FILE files in subdirectories too.
482+ -s, --simple-cmp Ignore ctime and inode values when comparing identicality.
483+ -v, --verbose Mention what we're doing. Repeat for more info.
484+ -h, --help Display this help message.
485+EOT
486+}