A little more cleanup.
[rsync/rsync-patches.git] / checksum4mirrors.diff
CommitLineData
213d4328
WD
1Optimize the ability of a mirror to send checksums.
2
3This adds a sender optimization feature that allows a cache of checksums
4to be used when the client specifies the --checksum option. The checksum
5files (.rsyncsums) must be created by some other process (see the perl
6script in the support dir for one way).
7
8This option should be used by mirrors that contain files that get created and
9not changed. There is a minimal amount of sanity-check information in the
10.rsyncsums file (size and mtime) so that the sum files can be shared with your
11mirror network.
12
13To use this patch, run these commands for a successful build:
14
15 patch -p1 <patches/checksum4mirrors.diff
16 ./configure (optional if already run)
17 make
18
19--- old/flist.c
20+++ new/flist.c
21@@ -117,6 +117,7 @@ static char empty_sum[MAX_DIGEST_LEN];
22 static int flist_count_offset; /* for --delete --progress */
23 static int dir_count = 0;
24 static int high_hlink_ndx;
25+static struct file_list *checksum_flist = NULL;
26
27 static void clean_flist(struct file_list *flist, int strip_root);
28 static void output_flist(struct file_list *flist);
29@@ -304,6 +305,186 @@ static void flist_done_allocating(struct
30 flist->pool_boundary = ptr;
31 }
32
33+/* The len count is the length of the basename + 1 for the null. */
34+static int add_checksum(const char *dirname, const char *basename, int len,
35+ OFF_T file_length, time_t mtime, const char *sum)
36+{
37+ struct file_struct *file;
38+ int alloc_len, extra_len;
39+ char *bp;
40+
41+ if (len == 10+1 && *basename == '.' && strcmp(basename, ".rsyncsums") == 0)
42+ return 0;
43+ if (file_length == 0)
44+ return 0;
45+
46+ extra_len = (file_extra_cnt + (file_length > 0xFFFFFFFFu) + SUM_EXTRA_CNT)
47+ * EXTRA_LEN;
48+#if EXTRA_ROUNDING > 0
49+ if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN))
50+ extra_len = (extra_len | (EXTRA_ROUNDING * EXTRA_LEN)) + EXTRA_LEN;
51+#endif
52+ alloc_len = FILE_STRUCT_LEN + extra_len + len;
53+ bp = pool_alloc(checksum_flist->file_pool, alloc_len, "add_checksum");
54+
55+ memset(bp, 0, extra_len + FILE_STRUCT_LEN);
56+ bp += extra_len;
57+ file = (struct file_struct *)bp;
58+ bp += FILE_STRUCT_LEN;
59+
60+ memcpy(bp, basename, len);
61+
62+ file->mode = S_IFREG;
63+ file->modtime = mtime;
64+ file->len32 = (uint32)file_length;
65+ if (file_length > 0xFFFFFFFFu) {
66+ file->flags |= FLAG_LENGTH64;
67+ OPT_EXTRA(file, 0)->unum = (uint32)(file_length >> 32);
68+ }
69+ file->dirname = dirname;
70+ bp = F_SUM(file);
71+ memcpy(bp, sum, checksum_len);
72+
73+ flist_expand(checksum_flist, 1);
74+ checksum_flist->files[checksum_flist->used++] = file;
75+
76+ checksum_flist->sorted = checksum_flist->files;
77+
78+ return 1;
79+}
80+
81+/* The direname value must remain unchanged during the lifespan of the
82+ * created checksum_flist object because we use it directly. */
83+static void read_checksums(const char *dirname)
84+{
85+ char line[MAXPATHLEN+1024], fbuf[MAXPATHLEN], sum[MAX_DIGEST_LEN];
86+ OFF_T file_length;
87+ time_t mtime;
88+ int len, dlen, i;
89+ char *cp;
90+ FILE *fp;
91+
92+ if (checksum_flist) {
93+ /* Reset the pool memory and empty the file-list array. */
94+ pool_free_old(checksum_flist->file_pool,
95+ pool_boundary(checksum_flist->file_pool, 0));
96+ checksum_flist->used = 0;
97+ } else
98+ checksum_flist = flist_new(FLIST_TEMP, "read_checksums");
99+
100+ checksum_flist->low = 0;
101+ checksum_flist->high = -1;
102+
103+ if (!dirname)
104+ return;
105+
106+ dlen = strlcpy(fbuf, dirname, sizeof fbuf);
107+ if (dlen >= (int)sizeof fbuf)
108+ return;
109+ if (dlen)
110+ fbuf[dlen++] = '/';
111+ else
112+ dirname = NULL;
113+ strlcpy(fbuf+dlen, ".rsyncsums", sizeof fbuf - dlen);
114+ if (!(fp = fopen(fbuf, "r")))
115+ return;
116+
117+ while (fgets(line, sizeof line, fp)) {
118+ cp = line;
119+ if (protocol_version >= 30) {
120+ char *alt_sum = cp;
121+ if (*cp == '=')
122+ while (*++cp == '=') {}
123+ else
124+ while (isXDigit(cp)) cp++;
125+ if (cp - alt_sum != MD4_DIGEST_LEN*2 || *cp != ' ')
126+ break;
127+ while (*++cp == ' ') {}
128+ }
129+
130+ if (*cp == '=') {
131+ continue;
132+ } else {
133+ for (i = 0; i < checksum_len*2; i++, cp++) {
134+ int x;
135+ if (isXDigit(cp)) {
136+ if (isDigit(cp))
137+ x = *cp - '0';
138+ else
139+ x = (*cp & 0xF) + 9;
140+ } else {
141+ cp = "";
142+ break;
143+ }
144+ if (i & 1)
145+ sum[i/2] |= x;
146+ else
147+ sum[i/2] = x << 4;
148+ }
149+ }
150+ if (*cp != ' ')
151+ break;
152+ while (*++cp == ' ') {}
153+
154+ if (protocol_version < 30) {
155+ char *alt_sum = cp;
156+ if (*cp == '=')
157+ while (*++cp == '=') {}
158+ else
159+ while (isXDigit(cp)) cp++;
160+ if (cp - alt_sum != MD5_DIGEST_LEN*2 || *cp != ' ')
161+ break;
162+ while (*++cp == ' ') {}
163+ }
164+
165+ file_length = 0;
166+ while (isDigit(cp))
167+ file_length = file_length * 10 + *cp++ - '0';
168+ if (*cp != ' ')
169+ break;
170+ while (*++cp == ' ') {}
171+
172+ mtime = 0;
173+ while (isDigit(cp))
174+ mtime = mtime * 10 + *cp++ - '0';
175+ if (*cp != ' ')
176+ break;
177+ while (*++cp == ' ') {}
178+
179+ /* Ignore ctime. */
180+ while (isDigit(cp))
181+ cp++;
182+ if (*cp != ' ')
183+ break;
184+ while (*++cp == ' ') {}
185+
186+ /* Ignore inode. */
187+ while (isDigit(cp))
188+ cp++;
189+ if (*cp != ' ')
190+ break;
191+ while (*++cp == ' ') {}
192+
193+ len = strlen(cp);
194+ while (len && (cp[len-1] == '\n' || cp[len-1] == '\r'))
195+ len--;
196+ if (!len)
197+ break;
198+ cp[len++] = '\0'; /* len now counts the null */
199+ if (strchr(cp, '/'))
200+ break;
201+ if (len > MAXPATHLEN)
202+ continue;
203+
204+ strlcpy(fbuf+dlen, cp, sizeof fbuf - dlen);
205+
206+ add_checksum(dirname, cp, len, file_length, mtime, sum);
207+ }
208+ fclose(fp);
209+
210+ clean_flist(checksum_flist, 0);
211+}
212+
213 int push_pathname(const char *dir, int len)
214 {
215 if (dir == pathname)
216@@ -989,7 +1170,7 @@ struct file_struct *make_file(const char
217 STRUCT_STAT *stp, int flags, int filter_level)
218 {
219 static char *lastdir;
220- static int lastdir_len = -1;
221+ static int lastdir_len = -2;
222 struct file_struct *file;
223 char thisname[MAXPATHLEN];
224 char linkname[MAXPATHLEN];
225@@ -1119,9 +1300,16 @@ struct file_struct *make_file(const char
226 memcpy(lastdir, thisname, len);
227 lastdir[len] = '\0';
228 lastdir_len = len;
229+ if (always_checksum && am_sender && flist)
230+ read_checksums(lastdir);
231 }
232- } else
233+ } else {
234 basename = thisname;
235+ if (always_checksum && am_sender && flist && lastdir_len == -2) {
236+ lastdir_len = -1;
237+ read_checksums("");
238+ }
239+ }
240 basename_len = strlen(basename) + 1; /* count the '\0' */
241
242 #ifdef SUPPORT_LINKS
243@@ -1197,11 +1385,21 @@ struct file_struct *make_file(const char
244 }
245 #endif
246
247- if (always_checksum && am_sender && S_ISREG(st.st_mode))
248- file_checksum(thisname, tmp_sum, st.st_size);
249-
250 F_PATHNAME(file) = pathname;
251
252+ if (always_checksum && am_sender && S_ISREG(st.st_mode)) {
253+ int j;
254+ if (flist && (j = flist_find(checksum_flist, file)) >= 0) {
255+ struct file_struct *fp = checksum_flist->sorted[j];
256+ if (F_LENGTH(fp) == st.st_size
257+ && fp->modtime == st.st_mtime)
258+ memcpy(tmp_sum, F_SUM(fp), MAX_DIGEST_LEN);
259+ else
260+ file_checksum(thisname, tmp_sum, st.st_size);
261+ } else
262+ file_checksum(thisname, tmp_sum, st.st_size);
263+ }
264+
265 /* This code is only used by the receiver when it is building
266 * a list of files for a delete pass. */
267 if (keep_dirlinks && linkname_len && flist) {
6e9495c7
WD
268@@ -2051,7 +2249,11 @@ struct file_list *send_file_list(int f,
269 * file-list to check if this is a 1-file xfer. */
270 send_extra_file_list(f, 1);
271 }
272- }
273+ } else
274+ flist_eof = 1;
275+
276+ if (checksum_updating && always_checksum && flist_eof)
277+ read_checksums(NULL);
278
279 return flist;
280 }
213d4328
WD
281--- old/ifuncs.h
282+++ new/ifuncs.h
283@@ -64,6 +64,12 @@ isDigit(const char *ptr)
284 }
285
286 static inline int
287+isXDigit(const char *ptr)
288+{
289+ return isxdigit(*(unsigned char *)ptr);
290+}
291+
292+static inline int
293 isPrint(const char *ptr)
294 {
295 return isprint(*(unsigned char *)ptr);
213d4328
WD
296--- old/support/rsyncsums
297+++ new/support/rsyncsums
298@@ -0,0 +1,183 @@
299+#!/usr/bin/perl -w
300+use strict;
301+
302+use Getopt::Long;
303+use Cwd qw(abs_path cwd);
304+use Digest::MD4;
305+use Digest::MD5;
306+
307+our $SUMS_FILE = '.rsyncsums';
308+
309+&Getopt::Long::Configure('bundling');
310+&usage if !&GetOptions(
311+ 'simple-cmp|s' => \( my $ignore_ctime_and_inode ),
312+ 'recurse|r' => \( my $recurse_opt ),
313+ 'verbose|v+' => \( my $verbosity = 0 ),
314+ 'help|h' => \( my $help_opt ),
315+);
316+&usage if $help_opt;
317+
318+my $start_dir = cwd();
319+
320+my @dirs = @ARGV;
321+@dirs = '.' unless @dirs;
322+foreach (@dirs) {
323+ $_ = abs_path($_);
324+}
325+
326+$| = 1;
327+
328+my $md4 = Digest::MD4->new;
329+my $md5 = Digest::MD5->new;
330+
331+while (@dirs) {
332+ my $dir = shift @dirs;
333+
334+ if (!chdir($dir)) {
335+ warn "Unable to chdir to $dir: $!\n";
336+ next;
337+ }
338+ if (!opendir(DP, '.')) {
339+ warn "Unable to opendir $dir: $!\n";
340+ next;
341+ }
342+
343+ if ($verbosity) {
344+ my $reldir = $dir;
345+ $reldir =~ s#^$start_dir(/|$)# $1 ? '' : '.' #eo;
346+ print "$reldir ... ";
347+ }
348+
349+ my $sums_file_exists = -e $SUMS_FILE;
350+ my %cache;
351+ my @subdirs;
352+ my $cnt = 0;
353+ while (defined(my $fn = readdir(DP))) {
354+ next if $fn =~ /^\.\.?$/ || $fn =~ /^\Q$SUMS_FILE\E$/o || -l $fn;
355+ if (-d _) {
356+ push(@subdirs, "$dir/$fn") unless $fn =~ /^(CVS|\.svn|\.git|\.bzr)$/;
357+ next;
358+ }
359+ next unless -f _;
360+
361+ my($size,$mtime,$ctime,$inode) = (stat(_))[7,9,10,1];
362+ next if $size == 0;
363+
364+ $cache{$fn} = [ $size, $mtime, $ctime & 0xFFFFFFFF, $inode & 0xFFFFFFFF ];
365+ $cnt++;
366+ }
367+
368+ closedir DP;
369+
370+ unshift(@dirs, sort @subdirs) if $recurse_opt;
371+
372+ if (!$cnt) {
373+ if ($sums_file_exists) {
374+ print "(removed $SUMS_FILE) " if $verbosity;
375+ unlink($SUMS_FILE);
376+ }
377+ print "empty\n" if $verbosity;
378+ next;
379+ }
380+
381+ if (open(FP, '+<', $SUMS_FILE)) {
382+ while (<FP>) {
383+ chomp;
384+ my($sum4, $sum5, $size, $mtime, $ctime, $inode, $fn) = split(' ', $_, 7);
385+ my $ref = $cache{$fn};
386+ if (defined $ref) {
387+ if ($ignore_ctime_and_inode) {
388+ $ctime = $$ref[2];
389+ $inode = $$ref[3];
390+ }
391+ if ($$ref[0] == $size
392+ && $$ref[1] == $mtime
393+ && $$ref[2] == $ctime
394+ && $$ref[3] == $inode
395+ && $sum4 !~ /=/ && $sum5 !~ /=/) {
396+ $$ref[4] = $sum4;
397+ $$ref[5] = $sum5;
398+ $cnt--;
399+ } else {
400+ $$ref[4] = $$ref[5] = undef;
401+ }
402+ } else {
403+ $cnt = -1; # Force rewrite due to removed line.
404+ }
405+ }
406+ } else {
407+ open(FP, '>', $SUMS_FILE) or die "Unable to write $dir/$SUMS_FILE: $!\n";
408+ $cnt = -1;
409+ }
410+
411+ if ($cnt) {
412+ print "UPDATING\n" if $verbosity;
413+ while (my($fn, $ref) = each %cache) {
414+ next if defined $$ref[3] && defined $$ref[4];
415+ if (!open(IN, $fn)) {
416+ print STDERR "Unable to read $fn: $!\n";
417+ delete $cache{$fn};
418+ next;
419+ }
420+
421+ my($size,$mtime,$ctime,$inode) = (stat(IN))[7,9,10,1];
422+ if ($size == 0) {
423+ close IN;
424+ next;
425+ }
426+
427+ my($sum4, $sum5);
428+ while (1) {
429+ while (sysread(IN, $_, 64*1024)) {
430+ $md4->add($_);
431+ $md5->add($_);
432+ }
433+ $sum4 = $md4->hexdigest;
434+ $sum5 = $md5->hexdigest;
435+ print " $sum4 $sum5" if $verbosity > 2;
436+ print " $fn\n" if $verbosity > 1;
437+ my($size2,$mtime2,$ctime2,$inode2) = (stat(IN))[7,9,10,1];
438+ if ($ignore_ctime_and_inode) {
439+ $ctime = $ctime2;
440+ $inode = $inode2;
441+ }
442+ last if $size == $size2 && $mtime == $mtime2
443+ && $ctime == $ctime2 && $inode == $inode2;
444+ $size = $size2;
445+ $mtime = $mtime2;
446+ $ctime = $ctime2;
447+ $inode = $inode2;
448+ sysseek(IN, 0, 0);
449+ }
450+
451+ close IN;
452+
453+ $cache{$fn} = [ $size, $mtime, $ctime, $inode, $sum4, $sum5 ];
454+ }
455+
456+ seek(FP, 0, 0);
457+ foreach my $fn (sort keys %cache) {
458+ my $ref = $cache{$fn};
459+ my($size, $mtime, $ctime, $inode, $sum4, $sum5) = @$ref;
460+ printf FP '%s %s %10d %10d %10d %10d %s' . "\n", $sum4, $sum5, $size, $mtime, $ctime, $inode, $fn;
461+ }
462+ truncate(FP, tell(FP));
463+ } else {
464+ print "ok\n" if $verbosity;
465+ }
466+
467+ close FP;
468+}
469+
470+sub usage
471+{
472+ die <<EOT;
473+Usage: rsyncsums [OPTIONS] [DIRS]
474+
475+Options:
476+ -r, --recurse Update $SUMS_FILE files in subdirectories too.
477+ -s, --simple-cmp Ignore ctime and inode values when comparing identicality.
478+ -v, --verbose Mention what we're doing. Repeat for more info.
479+ -h, --help Display this help message.
480+EOT
481+}