Updated patches to work with the current trunk.
[rsync/rsync-patches.git] / checksum-reading.diff
... / ...
CommitLineData
1Optimize the --checksum option using externally created .rsyncsums files.
2
3This adds a new option, --sumfiles=MODE, that allows you to use a cache of
4checksums when performing a --checksum transfer. These checksum files
5(.rsyncsums) must be created by some other process -- see the perl script,
6rsyncsums, in the support dir for one way.
7
8This option can be particularly helpful to a public mirror that wants to
9pre-compute their .rsyncsums files, set the "checksum files = strict" option
10in their daemon config file, and thus make it quite efficient for a client
11rsync to make use of the --checksum option on their server.
12
13To use this patch, run these commands for a successful build:
14
15 patch -p1 <patches/checksum-reading.diff
16 ./configure (optional if already run)
17 make
18
19based-on: 24079e988fc31af4eba56cd2701fdc5a4154980d
20diff --git a/checksum.c b/checksum.c
21--- a/checksum.c
22+++ b/checksum.c
23@@ -98,7 +98,7 @@ void get_checksum2(char *buf, int32 len, char *sum)
24 }
25 }
26
27-void file_checksum(char *fname, char *sum, OFF_T size)
28+void file_checksum(const char *fname, OFF_T size, char *sum)
29 {
30 struct map_struct *buf;
31 OFF_T i, len = size;
32diff --git a/clientserver.c b/clientserver.c
33--- a/clientserver.c
34+++ b/clientserver.c
35@@ -42,6 +42,8 @@ extern int numeric_ids;
36 extern int filesfrom_fd;
37 extern int remote_protocol;
38 extern int protocol_version;
39+extern int always_checksum;
40+extern int checksum_files;
41 extern int io_timeout;
42 extern int no_detach;
43 extern int write_batch;
44@@ -874,6 +876,9 @@ static int rsync_module(int f_in, int f_out, int i, const char *addr, const char
45 } else if (am_root < 0) /* Treat --fake-super from client as --super. */
46 am_root = 2;
47
48+ checksum_files = always_checksum ? lp_checksum_files(i)
49+ : CSF_IGNORE_FILES;
50+
51 if (filesfrom_fd == 0)
52 filesfrom_fd = f_in;
53
54diff --git a/flist.c b/flist.c
55--- a/flist.c
56+++ b/flist.c
57@@ -22,6 +22,7 @@
58
59 #include "rsync.h"
60 #include "ifuncs.h"
61+#include "itypes.h"
62 #include "rounding.h"
63 #include "inums.h"
64 #include "io.h"
65@@ -33,6 +34,7 @@ extern int am_sender;
66 extern int am_generator;
67 extern int inc_recurse;
68 extern int always_checksum;
69+extern int basis_dir_cnt;
70 extern int module_id;
71 extern int ignore_errors;
72 extern int numeric_ids;
73@@ -60,6 +62,7 @@ extern int file_extra_cnt;
74 extern int ignore_perishable;
75 extern int non_perishable_cnt;
76 extern int prune_empty_dirs;
77+extern int checksum_files;
78 extern int copy_links;
79 extern int copy_unsafe_links;
80 extern int protocol_version;
81@@ -70,6 +73,7 @@ extern int sender_symlink_iconv;
82 extern int output_needs_newline;
83 extern int sender_keeps_checksum;
84 extern int unsort_ndx;
85+extern char *basis_dir[];
86 extern struct stats stats;
87 extern char *filesfrom_host;
88 extern char *usermap, *groupmap;
89@@ -94,6 +98,12 @@ extern iconv_t ic_send, ic_recv;
90 #endif
91 #endif
92
93+#define RSYNCSUMS_FILE ".rsyncsums"
94+#define RSYNCSUMS_LEN (sizeof RSYNCSUMS_FILE-1)
95+
96+#define CLEAN_STRIP_ROOT (1<<0)
97+#define CLEAN_KEEP_LAST (1<<1)
98+
99 #define PTR_SIZE (sizeof (struct file_struct *))
100
101 int io_error;
102@@ -135,7 +145,11 @@ static char tmp_sum[MAX_DIGEST_LEN];
103 static char empty_sum[MAX_DIGEST_LEN];
104 static int flist_count_offset; /* for --delete --progress */
105
106-static void flist_sort_and_clean(struct file_list *flist, int strip_root);
107+static struct csum_cache {
108+ struct file_list *flist;
109+} *csum_cache = NULL;
110+
111+static void flist_sort_and_clean(struct file_list *flist, int flags);
112 static void output_flist(struct file_list *flist);
113
114 void init_flist(void)
115@@ -350,6 +364,238 @@ static void flist_done_allocating(struct file_list *flist)
116 flist->pool_boundary = ptr;
117 }
118
119+void reset_checksum_cache()
120+{
121+ int slot, slots = am_sender ? 1 : basis_dir_cnt + 1;
122+
123+ if (!csum_cache) {
124+ csum_cache = new_array0(struct csum_cache, slots);
125+ if (!csum_cache)
126+ out_of_memory("reset_checksum_cache");
127+ }
128+
129+ for (slot = 0; slot < slots; slot++) {
130+ struct file_list *flist = csum_cache[slot].flist;
131+
132+ if (flist) {
133+ /* Reset the pool memory and empty the file-list array. */
134+ pool_free_old(flist->file_pool,
135+ pool_boundary(flist->file_pool, 0));
136+ flist->used = 0;
137+ } else
138+ flist = csum_cache[slot].flist = flist_new(FLIST_TEMP, "reset_checksum_cache");
139+
140+ flist->low = 0;
141+ flist->high = -1;
142+ flist->next = NULL;
143+ }
144+}
145+
146+/* The basename_len count is the length of the basename + 1 for the '\0'. */
147+static int add_checksum(struct file_list *flist, const char *dirname,
148+ const char *basename, int basename_len, OFF_T file_length,
149+ time_t mtime, uint32 ctime, uint32 inode,
150+ const char *sum)
151+{
152+ struct file_struct *file;
153+ int alloc_len, extra_len;
154+ char *bp;
155+
156+ if (basename_len == RSYNCSUMS_LEN+1 && *basename == '.'
157+ && strcmp(basename, RSYNCSUMS_FILE) == 0)
158+ return 0;
159+
160+ /* "2" is for a 32-bit ctime num and an 32-bit inode num. */
161+ extra_len = (file_extra_cnt + (file_length > 0xFFFFFFFFu) + SUM_EXTRA_CNT + 2)
162+ * EXTRA_LEN;
163+#if EXTRA_ROUNDING > 0
164+ if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN))
165+ extra_len = (extra_len | (EXTRA_ROUNDING * EXTRA_LEN)) + EXTRA_LEN;
166+#endif
167+ alloc_len = FILE_STRUCT_LEN + extra_len + basename_len;
168+ bp = pool_alloc(flist->file_pool, alloc_len, "add_checksum");
169+
170+ memset(bp, 0, extra_len + FILE_STRUCT_LEN);
171+ bp += extra_len;
172+ file = (struct file_struct *)bp;
173+ bp += FILE_STRUCT_LEN;
174+
175+ memcpy(bp, basename, basename_len);
176+
177+ file->mode = S_IFREG;
178+ file->modtime = mtime;
179+ file->len32 = (uint32)file_length;
180+ if (file_length > 0xFFFFFFFFu) {
181+ file->flags |= FLAG_LENGTH64;
182+ OPT_EXTRA(file, 0)->unum = (uint32)(file_length >> 32);
183+ }
184+ file->dirname = dirname;
185+ F_CTIME(file) = ctime;
186+ F_INODE(file) = inode;
187+ bp = F_SUM(file);
188+ memcpy(bp, sum, checksum_len);
189+
190+ flist_expand(flist, 1);
191+ flist->files[flist->used++] = file;
192+
193+ flist->sorted = flist->files;
194+
195+ return 1;
196+}
197+
198+/* The "dirname" arg's data must remain unchanged during the lifespan of
199+ * the created csum_cache[].flist object because we use it directly. */
200+static void read_checksums(int slot, struct file_list *flist, const char *dirname)
201+{
202+ char line[MAXPATHLEN+1024], fbuf[MAXPATHLEN], sum[MAX_DIGEST_LEN];
203+ FILE *fp;
204+ char *cp;
205+ int len, i;
206+ time_t mtime;
207+ OFF_T file_length;
208+ uint32 ctime, inode;
209+ int dlen = dirname ? strlcpy(fbuf, dirname, sizeof fbuf) : 0;
210+
211+ if (dlen >= (int)(sizeof fbuf - 1 - RSYNCSUMS_LEN))
212+ return;
213+ if (dlen)
214+ fbuf[dlen++] = '/';
215+ else
216+ dirname = NULL;
217+ strlcpy(fbuf+dlen, RSYNCSUMS_FILE, sizeof fbuf - dlen);
218+ if (slot) {
219+ pathjoin(line, sizeof line, basis_dir[slot-1], fbuf);
220+ cp = line;
221+ } else
222+ cp = fbuf;
223+ if (!(fp = fopen(cp, "r")))
224+ return;
225+
226+ while (fgets(line, sizeof line, fp)) {
227+ cp = line;
228+ if (protocol_version >= 30) {
229+ char *alt_sum = cp;
230+ if (*cp == '=')
231+ while (*++cp == '=') {}
232+ else
233+ while (isXDigit(cp)) cp++;
234+ if (cp - alt_sum != MD4_DIGEST_LEN*2 || *cp != ' ')
235+ break;
236+ while (*++cp == ' ') {}
237+ }
238+
239+ if (*cp == '=') {
240+ continue;
241+ } else {
242+ for (i = 0; i < checksum_len*2; i++, cp++) {
243+ int x;
244+ if (isXDigit(cp)) {
245+ if (isDigit(cp))
246+ x = *cp - '0';
247+ else
248+ x = (*cp & 0xF) + 9;
249+ } else {
250+ cp = "";
251+ break;
252+ }
253+ if (i & 1)
254+ sum[i/2] |= x;
255+ else
256+ sum[i/2] = x << 4;
257+ }
258+ }
259+ if (*cp != ' ')
260+ break;
261+ while (*++cp == ' ') {}
262+
263+ if (protocol_version < 30) {
264+ char *alt_sum = cp;
265+ if (*cp == '=')
266+ while (*++cp == '=') {}
267+ else
268+ while (isXDigit(cp)) cp++;
269+ if (cp - alt_sum != MD5_DIGEST_LEN*2 || *cp != ' ')
270+ break;
271+ while (*++cp == ' ') {}
272+ }
273+
274+ file_length = 0;
275+ while (isDigit(cp))
276+ file_length = file_length * 10 + *cp++ - '0';
277+ if (*cp != ' ')
278+ break;
279+ while (*++cp == ' ') {}
280+
281+ mtime = 0;
282+ while (isDigit(cp))
283+ mtime = mtime * 10 + *cp++ - '0';
284+ if (*cp != ' ')
285+ break;
286+ while (*++cp == ' ') {}
287+
288+ ctime = 0;
289+ while (isDigit(cp))
290+ ctime = ctime * 10 + *cp++ - '0';
291+ if (*cp != ' ')
292+ break;
293+ while (*++cp == ' ') {}
294+
295+ inode = 0;
296+ while (isDigit(cp))
297+ inode = inode * 10 + *cp++ - '0';
298+ if (*cp != ' ')
299+ break;
300+ while (*++cp == ' ') {}
301+
302+ len = strlen(cp);
303+ while (len && (cp[len-1] == '\n' || cp[len-1] == '\r'))
304+ len--;
305+ if (!len)
306+ break;
307+ cp[len++] = '\0'; /* len now counts the null */
308+ if (strchr(cp, '/'))
309+ break;
310+ if (len > MAXPATHLEN)
311+ continue;
312+
313+ strlcpy(fbuf+dlen, cp, sizeof fbuf - dlen);
314+
315+ add_checksum(flist, dirname, cp, len, file_length,
316+ mtime, ctime, inode,
317+ sum);
318+ }
319+ fclose(fp);
320+
321+ flist_sort_and_clean(flist, CLEAN_KEEP_LAST);
322+}
323+
324+void get_cached_checksum(int slot, const char *fname, struct file_struct *file,
325+ STRUCT_STAT *stp, char *sum_buf)
326+{
327+ struct file_list *flist = csum_cache[slot].flist;
328+ int j;
329+
330+ if (!flist->next) {
331+ flist->next = cur_flist; /* next points from checksum flist to file flist */
332+ read_checksums(slot, flist, file->dirname);
333+ }
334+
335+ if ((j = flist_find(flist, file)) >= 0) {
336+ struct file_struct *fp = flist->sorted[j];
337+
338+ if (F_LENGTH(fp) == stp->st_size
339+ && fp->modtime == stp->st_mtime
340+ && (checksum_files & CSF_LAX
341+ || (F_CTIME(fp) == (uint32)stp->st_ctime
342+ && F_INODE(fp) == (uint32)stp->st_ino))) {
343+ memcpy(sum_buf, F_SUM(fp), MAX_DIGEST_LEN);
344+ return;
345+ }
346+ }
347+
348+ file_checksum(fname, stp->st_size, sum_buf);
349+}
350+
351 /* Call this with EITHER (1) "file, NULL, 0" to chdir() to the file's
352 * F_PATHNAME(), or (2) "NULL, dir, dirlen" to chdir() to the supplied dir,
353 * with dir == NULL taken to be the starting directory, and dirlen < 0
354@@ -1141,7 +1387,7 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
355 STRUCT_STAT *stp, int flags, int filter_level)
356 {
357 static char *lastdir;
358- static int lastdir_len = -1;
359+ static int lastdir_len = -2;
360 struct file_struct *file;
361 char thisname[MAXPATHLEN];
362 char linkname[MAXPATHLEN];
363@@ -1287,9 +1533,16 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
364 memcpy(lastdir, thisname, len);
365 lastdir[len] = '\0';
366 lastdir_len = len;
367+ if (checksum_files && am_sender && flist)
368+ reset_checksum_cache();
369 }
370- } else
371+ } else {
372 basename = thisname;
373+ if (checksum_files && am_sender && flist && lastdir_len == -2) {
374+ lastdir_len = -1;
375+ reset_checksum_cache();
376+ }
377+ }
378 basename_len = strlen(basename) + 1; /* count the '\0' */
379
380 #ifdef SUPPORT_LINKS
381@@ -1307,11 +1560,8 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
382 extra_len += EXTRA_LEN;
383 #endif
384
385- if (always_checksum && am_sender && S_ISREG(st.st_mode)) {
386- file_checksum(thisname, tmp_sum, st.st_size);
387- if (sender_keeps_checksum)
388- extra_len += SUM_EXTRA_CNT * EXTRA_LEN;
389- }
390+ if (sender_keeps_checksum && S_ISREG(st.st_mode))
391+ extra_len += SUM_EXTRA_CNT * EXTRA_LEN;
392
393 #if EXTRA_ROUNDING > 0
394 if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN))
395@@ -1394,8 +1644,14 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
396 return NULL;
397 }
398
399- if (sender_keeps_checksum && S_ISREG(st.st_mode))
400- memcpy(F_SUM(file), tmp_sum, checksum_len);
401+ if (always_checksum && am_sender && S_ISREG(st.st_mode)) {
402+ if (flist && checksum_files)
403+ get_cached_checksum(0, thisname, file, &st, tmp_sum);
404+ else
405+ file_checksum(thisname, st.st_size, tmp_sum);
406+ if (sender_keeps_checksum)
407+ memcpy(F_SUM(file), tmp_sum, checksum_len);
408+ }
409
410 if (unsort_ndx)
411 F_NDX(file) = stats.num_dirs;
412@@ -2527,7 +2783,7 @@ struct file_list *recv_file_list(int f)
413 rprintf(FINFO, "[%s] flist_eof=1\n", who_am_i());
414 }
415
416- flist_sort_and_clean(flist, relative_paths);
417+ flist_sort_and_clean(flist, relative_paths ? CLEAN_STRIP_ROOT : 0);
418
419 if (protocol_version < 30) {
420 /* Recv the io_error flag */
421@@ -2750,7 +3006,7 @@ void flist_free(struct file_list *flist)
422
423 /* This routine ensures we don't have any duplicate names in our file list.
424 * duplicate names can cause corruption because of the pipelining. */
425-static void flist_sort_and_clean(struct file_list *flist, int strip_root)
426+static void flist_sort_and_clean(struct file_list *flist, int flags)
427 {
428 char fbuf[MAXPATHLEN];
429 int i, prev_i;
430@@ -2801,7 +3057,7 @@ static void flist_sort_and_clean(struct file_list *flist, int strip_root)
431 /* If one is a dir and the other is not, we want to
432 * keep the dir because it might have contents in the
433 * list. Otherwise keep the first one. */
434- if (S_ISDIR(file->mode)) {
435+ if (S_ISDIR(file->mode) || flags & CLEAN_KEEP_LAST) {
436 struct file_struct *fp = flist->sorted[j];
437 if (!S_ISDIR(fp->mode))
438 keep = i, drop = j;
439@@ -2817,8 +3073,8 @@ static void flist_sort_and_clean(struct file_list *flist, int strip_root)
440 } else
441 keep = j, drop = i;
442
443- if (!am_sender) {
444- if (DEBUG_GTE(DUP, 1)) {
445+ if (!am_sender || flags & CLEAN_KEEP_LAST) {
446+ if (DEBUG_GTE(DUP, 1) && !(flags & CLEAN_KEEP_LAST)) {
447 rprintf(FINFO,
448 "removing duplicate name %s from file list (%d)\n",
449 f_name(file, fbuf), drop + flist->ndx_start);
450@@ -2840,7 +3096,7 @@ static void flist_sort_and_clean(struct file_list *flist, int strip_root)
451 }
452 flist->high = prev_i;
453
454- if (strip_root) {
455+ if (flags & CLEAN_STRIP_ROOT) {
456 /* We need to strip off the leading slashes for relative
457 * paths, but this must be done _after_ the sorting phase. */
458 for (i = flist->low; i <= flist->high; i++) {
459diff --git a/generator.c b/generator.c
460--- a/generator.c
461+++ b/generator.c
462@@ -53,6 +53,7 @@ extern int delete_after;
463 extern int missing_args;
464 extern int msgdone_cnt;
465 extern int ignore_errors;
466+extern int checksum_files;
467 extern int remove_source_files;
468 extern int delay_updates;
469 extern int update_only;
470@@ -522,7 +523,7 @@ void itemize(const char *fnamecmp, struct file_struct *file, int ndx, int statre
471
472
473 /* Perform our quick-check heuristic for determining if a file is unchanged. */
474-int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st)
475+int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st, int slot)
476 {
477 if (st->st_size != F_LENGTH(file))
478 return 0;
479@@ -531,7 +532,10 @@ int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st)
480 of the file time to determine whether to sync */
481 if (always_checksum > 0 && S_ISREG(st->st_mode)) {
482 char sum[MAX_DIGEST_LEN];
483- file_checksum(fn, sum, st->st_size);
484+ if (checksum_files && slot >= 0)
485+ get_cached_checksum(slot, fn, file, st, sum);
486+ else
487+ file_checksum(fn, st->st_size, sum);
488 return memcmp(sum, F_SUM(file), checksum_len) == 0;
489 }
490
491@@ -795,7 +799,7 @@ static int try_dests_reg(struct file_struct *file, char *fname, int ndx,
492 match_level = 1;
493 /* FALL THROUGH */
494 case 1:
495- if (!unchanged_file(cmpbuf, file, &sxp->st))
496+ if (!unchanged_file(cmpbuf, file, &sxp->st, j+1))
497 continue;
498 best_match = j;
499 match_level = 2;
500@@ -1081,7 +1085,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
501 * --ignore-non-existing, daemon exclude, or mkdir failure. */
502 static struct file_struct *skip_dir = NULL;
503 static struct file_list *fuzzy_dirlist = NULL;
504- static int need_fuzzy_dirlist = 0;
505+ static int need_new_dirscan = 0;
506 struct file_struct *fuzzy_file = NULL;
507 int fd = -1, f_copy = -1;
508 stat_x sx, real_sx;
509@@ -1165,8 +1169,8 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
510 flist_free(fuzzy_dirlist);
511 fuzzy_dirlist = NULL;
512 }
513- if (fuzzy_basis)
514- need_fuzzy_dirlist = 1;
515+ if (fuzzy_basis || checksum_files)
516+ need_new_dirscan = 1;
517 #ifdef SUPPORT_ACLS
518 if (!preserve_perms)
519 dflt_perms = default_perms_for_dir(dn);
520@@ -1174,10 +1178,15 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
521 }
522 parent_dirname = dn;
523
524- if (need_fuzzy_dirlist && S_ISREG(file->mode)) {
525- strlcpy(fnamecmpbuf, dn, sizeof fnamecmpbuf);
526- fuzzy_dirlist = get_dirlist(fnamecmpbuf, -1, 1);
527- need_fuzzy_dirlist = 0;
528+ if (need_new_dirscan && S_ISREG(file->mode)) {
529+ if (fuzzy_basis) {
530+ strlcpy(fnamecmpbuf, dn, sizeof fnamecmpbuf);
531+ fuzzy_dirlist = get_dirlist(fnamecmpbuf, -1, 1);
532+ }
533+ if (checksum_files) {
534+ reset_checksum_cache();
535+ }
536+ need_new_dirscan = 0;
537 }
538
539 statret = link_stat(fname, &sx.st, keep_dirlinks && is_dir);
540@@ -1600,7 +1609,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
541 ;
542 else if (fnamecmp_type == FNAMECMP_FUZZY)
543 ;
544- else if (unchanged_file(fnamecmp, file, &sx.st)) {
545+ else if (unchanged_file(fnamecmp, file, &sx.st, fnamecmp_type == FNAMECMP_FNAME ? 0 : -1)) {
546 if (partialptr) {
547 do_unlink(partialptr);
548 handle_partial_dir(partialptr, PDIR_DELETE);
549diff --git a/hlink.c b/hlink.c
550--- a/hlink.c
551+++ b/hlink.c
552@@ -409,7 +409,7 @@ int hard_link_check(struct file_struct *file, int ndx, char *fname,
553 }
554 break;
555 }
556- if (!unchanged_file(cmpbuf, file, &alt_sx.st))
557+ if (!unchanged_file(cmpbuf, file, &alt_sx.st, j+1))
558 continue;
559 statret = 1;
560 if (unchanged_attrs(cmpbuf, file, &alt_sx))
561diff --git a/itypes.h b/itypes.h
562--- a/itypes.h
563+++ b/itypes.h
564@@ -23,6 +23,12 @@ isDigit(const char *ptr)
565 }
566
567 static inline int
568+isXDigit(const char *ptr)
569+{
570+ return isxdigit(*(unsigned char *)ptr);
571+}
572+
573+static inline int
574 isPrint(const char *ptr)
575 {
576 return isprint(*(unsigned char *)ptr);
577diff --git a/loadparm.c b/loadparm.c
578--- a/loadparm.c
579+++ b/loadparm.c
580@@ -133,6 +133,7 @@ typedef struct {
581 /* NOTE: update this macro if the last char* variable changes! */
582 #define LOCAL_STRING_COUNT() (offsetof(local_vars, uid) / sizeof (char*) + 1)
583
584+ int checksum_files;
585 int max_connections;
586 int max_verbosity;
587 int syslog_facility;
588@@ -205,6 +206,7 @@ static const all_vars Defaults = {
589 /* temp_dir; */ NULL,
590 /* uid; */ NULL,
591
592+ /* checksum_files; */ CSF_IGNORE_FILES,
593 /* max_connections; */ 0,
594 /* max_verbosity; */ 1,
595 /* syslog_facility; */ LOG_DAEMON,
596@@ -306,6 +308,13 @@ static struct enum_list enum_facilities[] = {
597 { -1, NULL }
598 };
599
600+static struct enum_list enum_csum_modes[] = {
601+ { CSF_IGNORE_FILES, "none" },
602+ { CSF_LAX_MODE, "lax" },
603+ { CSF_STRICT_MODE, "strict" },
604+ { -1, NULL }
605+};
606+
607 static struct parm_struct parm_table[] =
608 {
609 {"address", P_STRING, P_GLOBAL,&Vars.g.bind_address, NULL,0},
610@@ -316,6 +325,7 @@ static struct parm_struct parm_table[] =
611
612 {"auth users", P_STRING, P_LOCAL, &Vars.l.auth_users, NULL,0},
613 {"charset", P_STRING, P_LOCAL, &Vars.l.charset, NULL,0},
614+ {"checksum files", P_ENUM, P_LOCAL, &Vars.l.checksum_files, enum_csum_modes,0},
615 {"comment", P_STRING, P_LOCAL, &Vars.l.comment, NULL,0},
616 {"dont compress", P_STRING, P_LOCAL, &Vars.l.dont_compress, NULL,0},
617 {"exclude from", P_STRING, P_LOCAL, &Vars.l.exclude_from, NULL,0},
618@@ -470,6 +480,7 @@ FN_LOCAL_STRING(lp_secrets_file, secrets_file)
619 FN_LOCAL_STRING(lp_temp_dir, temp_dir)
620 FN_LOCAL_STRING(lp_uid, uid)
621
622+FN_LOCAL_INTEGER(lp_checksum_files, checksum_files)
623 FN_LOCAL_INTEGER(lp_max_connections, max_connections)
624 FN_LOCAL_INTEGER(lp_max_verbosity, max_verbosity)
625 FN_LOCAL_INTEGER(lp_syslog_facility, syslog_facility)
626diff --git a/options.c b/options.c
627--- a/options.c
628+++ b/options.c
629@@ -112,6 +112,7 @@ size_t bwlimit_writemax = 0;
630 int ignore_existing = 0;
631 int ignore_non_existing = 0;
632 int need_messages_from_generator = 0;
633+int checksum_files = CSF_IGNORE_FILES;
634 int max_delete = INT_MIN;
635 OFF_T max_size = 0;
636 OFF_T min_size = 0;
637@@ -662,6 +663,7 @@ void usage(enum logcode F)
638 rprintf(F," -q, --quiet suppress non-error messages\n");
639 rprintf(F," --no-motd suppress daemon-mode MOTD (see manpage caveat)\n");
640 rprintf(F," -c, --checksum skip based on checksum, not mod-time & size\n");
641+ rprintf(F," --sumfiles=MODE use .rsyncsums to speedup --checksum mode\n");
642 rprintf(F," -a, --archive archive mode; equals -rlptgoD (no -H,-A,-X)\n");
643 rprintf(F," --no-OPTION turn off an implied OPTION (e.g. --no-D)\n");
644 rprintf(F," -r, --recursive recurse into directories\n");
645@@ -798,7 +800,7 @@ enum {OPT_VERSION = 1000, OPT_DAEMON, OPT_SENDER, OPT_EXCLUDE, OPT_EXCLUDE_FROM,
646 OPT_FILTER, OPT_COMPARE_DEST, OPT_COPY_DEST, OPT_LINK_DEST, OPT_HELP,
647 OPT_INCLUDE, OPT_INCLUDE_FROM, OPT_MODIFY_WINDOW, OPT_MIN_SIZE, OPT_CHMOD,
648 OPT_READ_BATCH, OPT_WRITE_BATCH, OPT_ONLY_WRITE_BATCH, OPT_MAX_SIZE,
649- OPT_NO_D, OPT_APPEND, OPT_NO_ICONV, OPT_INFO, OPT_DEBUG,
650+ OPT_NO_D, OPT_APPEND, OPT_NO_ICONV, OPT_INFO, OPT_DEBUG, OPT_SUMFILES,
651 OPT_USERMAP, OPT_GROUPMAP, OPT_CHOWN, OPT_BWLIMIT,
652 OPT_SERVER, OPT_REFUSED_BASE = 9000};
653
654@@ -934,6 +936,7 @@ static struct poptOption long_options[] = {
655 {"checksum", 'c', POPT_ARG_VAL, &always_checksum, 1, 0, 0 },
656 {"no-checksum", 0, POPT_ARG_VAL, &always_checksum, 0, 0, 0 },
657 {"no-c", 0, POPT_ARG_VAL, &always_checksum, 0, 0, 0 },
658+ {"sumfiles", 0, POPT_ARG_STRING, 0, OPT_SUMFILES, 0, 0 },
659 {"block-size", 'B', POPT_ARG_LONG, &block_size, 0, 0, 0 },
660 {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
661 {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
662@@ -1651,6 +1654,23 @@ int parse_arguments(int *argc_p, const char ***argv_p)
663 }
664 break;
665
666+ case OPT_SUMFILES:
667+ arg = poptGetOptArg(pc);
668+ checksum_files = 0;
669+ if (strcmp(arg, "lax") == 0)
670+ checksum_files |= CSF_LAX_MODE;
671+ else if (strcmp(arg, "strict") == 0)
672+ checksum_files |= CSF_STRICT_MODE;
673+ else if (strcmp(arg, "none") == 0)
674+ checksum_files = CSF_IGNORE_FILES;
675+ else {
676+ snprintf(err_buf, sizeof err_buf,
677+ "Invalid argument passed to --sumfiles (%s)\n",
678+ arg);
679+ return 0;
680+ }
681+ break;
682+
683 case OPT_INFO:
684 arg = poptGetOptArg(pc);
685 parse_output_words(info_words, info_levels, arg, USER_PRIORITY);
686@@ -1851,6 +1871,9 @@ int parse_arguments(int *argc_p, const char ***argv_p)
687 }
688 #endif
689
690+ if (!always_checksum)
691+ checksum_files = CSF_IGNORE_FILES;
692+
693 if (write_batch && read_batch) {
694 snprintf(err_buf, sizeof err_buf,
695 "--write-batch and --read-batch can not be used together\n");
696diff --git a/rsync.h b/rsync.h
697--- a/rsync.h
698+++ b/rsync.h
699@@ -727,6 +727,10 @@ extern int xattrs_ndx;
700 #define F_SUM(f) ((char*)OPT_EXTRA(f, START_BUMP(f) + HLINK_BUMP(f) \
701 + SUM_EXTRA_CNT - 1))
702
703+/* These are only valid on an entry read from a checksum file. */
704+#define F_CTIME(f) OPT_EXTRA(f, LEN64_BUMP(f) + SUM_EXTRA_CNT)->unum
705+#define F_INODE(f) OPT_EXTRA(f, LEN64_BUMP(f) + SUM_EXTRA_CNT + 1)->unum
706+
707 /* Some utility defines: */
708 #define F_IS_ACTIVE(f) (f)->basename[0]
709 #define F_IS_HLINKED(f) ((f)->flags & FLAG_HLINKED)
710@@ -923,6 +927,13 @@ typedef struct {
711 char fname[1]; /* has variable size */
712 } relnamecache;
713
714+#define CSF_ENABLE (1<<1)
715+#define CSF_LAX (1<<2)
716+
717+#define CSF_IGNORE_FILES 0
718+#define CSF_LAX_MODE (CSF_ENABLE|CSF_LAX)
719+#define CSF_STRICT_MODE (CSF_ENABLE)
720+
721 #include "byteorder.h"
722 #include "lib/mdigest.h"
723 #include "lib/wildmatch.h"
724diff --git a/rsync.yo b/rsync.yo
725--- a/rsync.yo
726+++ b/rsync.yo
727@@ -323,6 +323,7 @@ to the detailed description below for a complete description. verb(
728 -q, --quiet suppress non-error messages
729 --no-motd suppress daemon-mode MOTD (see caveat)
730 -c, --checksum skip based on checksum, not mod-time & size
731+ --sumfiles=MODE use .rsyncsums to speedup --checksum mode
732 -a, --archive archive mode; equals -rlptgoD (no -H,-A,-X)
733 --no-OPTION turn off an implied OPTION (e.g. --no-D)
734 -r, --recursive recurse into directories
735@@ -568,9 +569,9 @@ uses a "quick check" that (by default) checks if each file's size and time
736 of last modification match between the sender and receiver. This option
737 changes this to compare a 128-bit checksum for each file that has a
738 matching size. Generating the checksums means that both sides will expend
739-a lot of disk I/O reading all the data in the files in the transfer (and
740-this is prior to any reading that will be done to transfer changed files),
741-so this can slow things down significantly.
742+a lot of disk I/O reading the data in all the files in the transfer, so
743+this can slow things down significantly (and this is prior to any reading
744+that will be done to transfer the files that have changed).
745
746 The sending side generates its checksums while it is doing the file-system
747 scan that builds the list of the available files. The receiver generates
748@@ -578,6 +579,8 @@ its checksums when it is scanning for changed files, and will checksum any
749 file that has the same size as the corresponding sender's file: files with
750 either a changed size or a changed checksum are selected for transfer.
751
752+See also the bf(--sumfiles) option for a way to use cached checksum data.
753+
754 Note that rsync always verifies that each em(transferred) file was
755 correctly reconstructed on the receiving side by checking a whole-file
756 checksum that is generated as the file is transferred, but that
757@@ -587,6 +590,36 @@ option's before-the-transfer "Does this file need to be updated?" check.
758 For protocol 30 and beyond (first supported in 3.0.0), the checksum used is
759 MD5. For older protocols, the checksum used is MD4.
760
761+dit(bf(--sumfiles=MODE)) This option tells rsync to make use of any cached
762+checksum information it finds in per-directory .rsyncsums files when the
763+current transfer is using the bf(--checksum) option. If the checksum data
764+is up-to-date, it is used instead of recomputing it, saving both disk I/O
765+and CPU time. If the checksum data is missing or outdated, the checksum is
766+computed just as it would be if bf(--sumfiles) was not specified.
767+
768+The MODE value is either "lax", for relaxed checking (which compares size
769+and mtime), "strict" (which also compares ctime and inode), or "none" to
770+ignore any .rsyncsums files ("none" is the default). Rsync does not create
771+or update these files, but there is a perl script in the support directory
772+named "rsyncsums" that can be used for that.
773+
774+This option has no effect unless bf(--checksum, -c) was also specified. It
775+also only affects the current side of the transfer, so if you want the
776+remote side to parse its own .rsyncsums files, specify the option via the
777+bf(--rsync-path) option (e.g. "--rsync-path="rsync --sumfiles=lax").
778+
779+To avoid transferring the system's checksum files, you can use an exclude
780+(e.g. bf(--exclude=.rsyncsums)). To make this easier to type, you can use
781+a popt alias. For instance, adding the following line in your ~/.popt file
782+defines a bf(--cc) option that enables lax checksum files and excludes the
783+checksum files:
784+
785+verb( rsync alias --cc -c --sumfiles=lax --exclude=.rsyncsums)
786+
787+An rsync daemon does not allow the client to control this setting, so see
788+the "checksum files" daemon parameter for information on how to make a
789+daemon use cached checksum data.
790+
791 dit(bf(-a, --archive)) This is equivalent to bf(-rlptgoD). It is a quick
792 way of saying you want recursion and want to preserve almost
793 everything (with -H being a notable omission).
794diff --git a/rsyncd.conf.yo b/rsyncd.conf.yo
795--- a/rsyncd.conf.yo
796+++ b/rsyncd.conf.yo
797@@ -312,6 +312,17 @@ locking on this file to ensure that the max connections limit is not
798 exceeded for the modules sharing the lock file.
799 The default is tt(/var/run/rsyncd.lock).
800
801+dit(bf(checksum files)) This parameter tells rsync to make use of any cached
802+checksum information it finds in per-directory .rsyncsums files when the
803+current transfer is using the bf(--checksum) option. The value can be set
804+to either "lax", "strict", or "none" -- see the client's bf(--sumfiles)
805+option for what these choices do.
806+
807+Note also that the client's command-line option, bf(--sumfiles), has no
808+effect on a daemon. A daemon will only access checksum files if this
809+config option tells it to. See also the bf(exclude) directive for a way
810+to hide the .rsyncsums files from the user.
811+
812 dit(bf(read only)) This parameter determines whether clients
813 will be able to upload files or not. If "read only" is true then any
814 attempted uploads will fail. If "read only" is false then uploads will
815diff --git a/support/rsyncsums b/support/rsyncsums
816new file mode 100755
817--- /dev/null
818+++ b/support/rsyncsums
819@@ -0,0 +1,201 @@
820+#!/usr/bin/perl -w
821+use strict;
822+
823+use Getopt::Long;
824+use Cwd qw(abs_path cwd);
825+use Digest::MD4;
826+use Digest::MD5;
827+
828+our $SUMS_FILE = '.rsyncsums';
829+
830+&Getopt::Long::Configure('bundling');
831+&usage if !&GetOptions(
832+ 'recurse|r' => \( my $recurse_opt ),
833+ 'mode|m=s' => \( my $cmp_mode = 'strict' ),
834+ 'check|c' => \( my $check_opt ),
835+ 'verbose|v+' => \( my $verbosity = 0 ),
836+ 'help|h' => \( my $help_opt ),
837+);
838+&usage if $help_opt || $cmp_mode !~ /^(lax|strict)$/;
839+
840+my $ignore_ctime_and_inode = $cmp_mode eq 'lax' ? 0 : 1;
841+
842+my $start_dir = cwd();
843+
844+my @dirs = @ARGV;
845+@dirs = '.' unless @dirs;
846+foreach (@dirs) {
847+ $_ = abs_path($_);
848+}
849+
850+$| = 1;
851+
852+my $exit_code = 0;
853+
854+my $md4 = Digest::MD4->new;
855+my $md5 = Digest::MD5->new;
856+
857+while (@dirs) {
858+ my $dir = shift @dirs;
859+
860+ if (!chdir($dir)) {
861+ warn "Unable to chdir to $dir: $!\n";
862+ next;
863+ }
864+ if (!opendir(DP, '.')) {
865+ warn "Unable to opendir $dir: $!\n";
866+ next;
867+ }
868+
869+ my $reldir = $dir;
870+ $reldir =~ s#^$start_dir(/|$)# $1 ? '' : '.' #eo;
871+ if ($verbosity) {
872+ print "$reldir ... ";
873+ print "\n" if $check_opt;
874+ }
875+
876+ my %cache;
877+ my $f_cnt = 0;
878+ if (open(FP, '<', $SUMS_FILE)) {
879+ while (<FP>) {
880+ chomp;
881+ my($sum4, $sum5, $size, $mtime, $ctime, $inode, $fn) = split(' ', $_, 7);
882+ $cache{$fn} = [ 0, $sum4, $sum5, $size, $mtime, $ctime & 0xFFFFFFFF, $inode & 0xFFFFFFFF ];
883+ $f_cnt++;
884+ }
885+ close FP;
886+ }
887+
888+ my @subdirs;
889+ my $d_cnt = 0;
890+ my $update_cnt = 0;
891+ while (defined(my $fn = readdir(DP))) {
892+ next if $fn =~ /^\.\.?$/ || $fn =~ /^\Q$SUMS_FILE\E$/o || -l $fn;
893+ if (-d _) {
894+ push(@subdirs, "$dir/$fn") unless $fn =~ /^(CVS|\.svn|\.git|\.bzr)$/;
895+ next;
896+ }
897+ next unless -f _;
898+
899+ my($size,$mtime,$ctime,$inode) = (stat(_))[7,9,10,1];
900+ $ctime &= 0xFFFFFFFF;
901+ $inode &= 0xFFFFFFFF;
902+ my $ref = $cache{$fn};
903+ $d_cnt++;
904+
905+ if (!$check_opt) {
906+ if (defined $ref) {
907+ $$ref[0] = 1;
908+ if ($$ref[3] == $size
909+ && $$ref[4] == $mtime
910+ && ($ignore_ctime_and_inode || ($$ref[5] == $ctime && $$ref[6] == $inode))
911+ && $$ref[1] !~ /=/ && $$ref[2] !~ /=/) {
912+ next;
913+ }
914+ }
915+ if (!$update_cnt++) {
916+ print "UPDATING\n" if $verbosity;
917+ }
918+ }
919+
920+ if (!open(IN, $fn)) {
921+ print STDERR "Unable to read $fn: $!\n";
922+ if (defined $ref) {
923+ delete $cache{$fn};
924+ $f_cnt--;
925+ }
926+ next;
927+ }
928+
929+ my($sum4, $sum5);
930+ while (1) {
931+ while (sysread(IN, $_, 64*1024)) {
932+ $md4->add($_);
933+ $md5->add($_);
934+ }
935+ $sum4 = $md4->hexdigest;
936+ $sum5 = $md5->hexdigest;
937+ print " $sum4 $sum5" if $verbosity > 2;
938+ print " $fn" if $verbosity > 1;
939+ my($size2,$mtime2,$ctime2,$inode2) = (stat(IN))[7,9,10,1];
940+ $ctime2 &= 0xFFFFFFFF;
941+ $inode2 &= 0xFFFFFFFF;
942+ last if $size == $size2 && $mtime == $mtime2
943+ && ($ignore_ctime_and_inode || ($ctime == $ctime2 && $inode == $inode2));
944+ $size = $size2;
945+ $mtime = $mtime2;
946+ $ctime = $ctime2;
947+ $inode = $inode2;
948+ sysseek(IN, 0, 0);
949+ print " REREADING\n" if $verbosity > 1;
950+ }
951+
952+ close IN;
953+
954+ if ($check_opt) {
955+ my $dif;
956+ if (!defined $ref) {
957+ $dif = 'MISSING';
958+ } elsif ($sum4 ne $$ref[1] || $sum5 ne $$ref[2]) {
959+ $dif = 'FAILED';
960+ } else {
961+ print " OK\n" if $verbosity > 1;
962+ next;
963+ }
964+ if ($verbosity < 2) {
965+ print $verbosity ? ' ' : "$reldir/";
966+ print $fn;
967+ }
968+ print " $dif\n";
969+ $exit_code = 1;
970+ } else {
971+ print "\n" if $verbosity > 1;
972+ $cache{$fn} = [ 1, $sum4, $sum5, $size, $mtime, $ctime, $inode ];
973+ }
974+ }
975+
976+ closedir DP;
977+
978+ unshift(@dirs, sort @subdirs) if $recurse_opt;
979+
980+ if ($check_opt) {
981+ ;
982+ } elsif ($d_cnt == 0) {
983+ if ($f_cnt) {
984+ print "(removed $SUMS_FILE) " if $verbosity;
985+ unlink($SUMS_FILE);
986+ }
987+ print "empty\n" if $verbosity;
988+ } elsif ($update_cnt || $d_cnt != $f_cnt) {
989+ print "UPDATING\n" if $verbosity && !$update_cnt;
990+ open(FP, '>', $SUMS_FILE) or die "Unable to write $dir/$SUMS_FILE: $!\n";
991+
992+ foreach my $fn (sort keys %cache) {
993+ my $ref = $cache{$fn};
994+ my($found, $sum4, $sum5, $size, $mtime, $ctime, $inode) = @$ref;
995+ next unless $found;
996+ printf FP '%s %s %10d %10d %10d %10d %s' . "\n", $sum4, $sum5, $size, $mtime, $ctime, $inode, $fn;
997+ }
998+ close FP;
999+ } else {
1000+ print "ok\n" if $verbosity;
1001+ }
1002+}
1003+
1004+exit $exit_code;
1005+
1006+sub usage
1007+{
1008+ die <<EOT;
1009+Usage: rsyncsums [OPTIONS] [DIRS]
1010+
1011+Options:
1012+ -r, --recurse Update $SUMS_FILE files in subdirectories too.
1013+ -m, --mode=MODE Compare entries in either "lax" or "strict" mode. Using
1014+ "lax" compares size and mtime, while "strict" additionally
1015+ compares ctime and inode. Default: strict.
1016+ -c, --check Check if the checksums are right (doesn't update).
1017+ -v, --verbose Mention what we're doing. Repeat for more info.
1018+ -h, --help Display this help message.
1019+EOT
1020+}