The patches for 3.0.0.
[rsync/rsync-patches.git] / checksum-reading.diff
CommitLineData
f9df736a 1Optimize the --checksum option using externally created .rsyncsums files.
213d4328 2
f9df736a
WD
3This adds a new option, --sumfiles=MODE, that allows you to use a cache of
4checksums when performing a --checksum transfer. These checksum files
5(.rsyncsums) must be created by some other process -- see the perl script,
6rsyncsums, in the support dir for one way.
213d4328 7
f9df736a
WD
8This option can be particularly helpful to a public mirror that wants to
9pre-compute their .rsyncsums files, set the "checksum files = strict" option
10in their daemon config file, and thus make it quite efficient for a client
11rsync to make use of the --checksum option on their server.
213d4328
WD
12
13To use this patch, run these commands for a successful build:
14
cc3e685d 15 patch -p1 <patches/checksum-reading.diff
213d4328
WD
16 ./configure (optional if already run)
17 make
18
f9df736a
WD
19diff --git a/checksum.c b/checksum.c
20--- a/checksum.c
21+++ b/checksum.c
22@@ -100,7 +100,7 @@ void get_checksum2(char *buf, int32 len, char *sum)
23 }
24 }
25
26-void file_checksum(char *fname, char *sum, OFF_T size)
27+void file_checksum(const char *fname, OFF_T size, char *sum)
28 {
29 struct map_struct *buf;
30 OFF_T i, len = size;
31diff --git a/clientserver.c b/clientserver.c
32--- a/clientserver.c
33+++ b/clientserver.c
34@@ -39,6 +39,8 @@ extern int numeric_ids;
35 extern int filesfrom_fd;
36 extern int remote_protocol;
37 extern int protocol_version;
38+extern int always_checksum;
39+extern int checksum_files;
40 extern int io_timeout;
41 extern int no_detach;
42 extern int write_batch;
43@@ -750,6 +752,9 @@ static int rsync_module(int f_in, int f_out, int i, char *addr, char *host)
44 else if (am_root < 0) /* Treat --fake-super from client as --super. */
45 am_root = 2;
46
47+ checksum_files = always_checksum ? lp_checksum_files(i)
48+ : CSF_IGNORE_FILES;
49+
50 if (filesfrom_fd == 0)
51 filesfrom_fd = f_in;
52
cc3e685d
WD
53diff --git a/flist.c b/flist.c
54--- a/flist.c
55+++ b/flist.c
f9df736a
WD
56@@ -34,6 +34,7 @@ extern int am_generator;
57 extern int inc_recurse;
58 extern int do_progress;
59 extern int always_checksum;
60+extern int basis_dir_cnt;
61 extern int module_id;
62 extern int ignore_errors;
63 extern int numeric_ids;
64@@ -61,6 +62,7 @@ extern int file_extra_cnt;
65 extern int ignore_perishable;
66 extern int non_perishable_cnt;
67 extern int prune_empty_dirs;
68+extern int checksum_files;
69 extern int copy_links;
70 extern int copy_unsafe_links;
71 extern int protocol_version;
72@@ -68,6 +70,7 @@ extern int sanitize_paths;
73 extern int munge_symlinks;
74 extern int need_unsorted_flist;
75 extern int unsort_ndx;
76+extern char *basis_dir[];
77 extern struct stats stats;
78 extern char *filesfrom_host;
79
80@@ -83,6 +86,11 @@ extern int filesfrom_convert;
81 extern iconv_t ic_send, ic_recv;
82 #endif
83
84+#define RSYNCSUMS_FILE ".rsyncsums"
85+
86+#define CLEAN_STRIP_ROOT (1<<0)
87+#define CLEAN_KEEP_LAST (1<<1)
88+
89 #define PTR_SIZE (sizeof (struct file_struct *))
90
91 int io_error;
92@@ -124,7 +132,11 @@ static char empty_sum[MAX_DIGEST_LEN];
213d4328
WD
93 static int flist_count_offset; /* for --delete --progress */
94 static int dir_count = 0;
213d4328 95
f9df736a
WD
96-static void flist_sort_and_clean(struct file_list *flist, int strip_root);
97+static struct csum_cache {
98+ struct file_list *flist;
99+} *csum_cache = NULL;
100+
101+static void flist_sort_and_clean(struct file_list *flist, int flags);
213d4328 102 static void output_flist(struct file_list *flist);
f9df736a
WD
103
104 void init_flist(void)
105@@ -315,6 +327,238 @@ static void flist_done_allocating(struct file_list *flist)
213d4328
WD
106 flist->pool_boundary = ptr;
107 }
108
f9df736a
WD
109+void reset_checksum_cache()
110+{
111+ int slot, slots = am_sender ? 1 : basis_dir_cnt + 1;
112+
113+ if (!csum_cache) {
114+ csum_cache = new_array0(struct csum_cache, slots);
115+ if (!csum_cache)
116+ out_of_memory("reset_checksum_cache");
117+ }
118+
119+ for (slot = 0; slot < slots; slot++) {
120+ struct file_list *flist = csum_cache[slot].flist;
121+
122+ if (flist) {
123+ /* Reset the pool memory and empty the file-list array. */
124+ pool_free_old(flist->file_pool,
125+ pool_boundary(flist->file_pool, 0));
126+ flist->used = 0;
127+ } else
128+ flist = csum_cache[slot].flist = flist_new(FLIST_TEMP, "reset_checksum_cache");
129+
130+ flist->low = 0;
131+ flist->high = -1;
132+ flist->next = NULL;
133+ }
134+}
135+
136+
137+/* The basename_len count is the length of the basename + 1 for the null. */
138+static int add_checksum(struct file_list *flist, const char *dirname,
139+ const char *basename, int basename_len, OFF_T file_length,
140+ time_t mtime, uint32 ctime, uint32 inode,
141+ const char *sum)
213d4328
WD
142+{
143+ struct file_struct *file;
144+ int alloc_len, extra_len;
145+ char *bp;
146+
f9df736a 147+ if (basename_len == 10+1 && *basename == '.' && strcmp(basename, RSYNCSUMS_FILE) == 0)
213d4328
WD
148+ return 0;
149+
f9df736a
WD
150+ /* "2" is for a 32-bit ctime num and an 32-bit inode num. */
151+ extra_len = (file_extra_cnt + (file_length > 0xFFFFFFFFu) + SUM_EXTRA_CNT + 2)
213d4328
WD
152+ * EXTRA_LEN;
153+#if EXTRA_ROUNDING > 0
154+ if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN))
155+ extra_len = (extra_len | (EXTRA_ROUNDING * EXTRA_LEN)) + EXTRA_LEN;
156+#endif
f9df736a
WD
157+ alloc_len = FILE_STRUCT_LEN + extra_len + basename_len;
158+ bp = pool_alloc(flist->file_pool, alloc_len, "add_checksum");
213d4328
WD
159+
160+ memset(bp, 0, extra_len + FILE_STRUCT_LEN);
161+ bp += extra_len;
162+ file = (struct file_struct *)bp;
163+ bp += FILE_STRUCT_LEN;
164+
f9df736a 165+ memcpy(bp, basename, basename_len);
213d4328
WD
166+
167+ file->mode = S_IFREG;
168+ file->modtime = mtime;
169+ file->len32 = (uint32)file_length;
170+ if (file_length > 0xFFFFFFFFu) {
171+ file->flags |= FLAG_LENGTH64;
172+ OPT_EXTRA(file, 0)->unum = (uint32)(file_length >> 32);
173+ }
174+ file->dirname = dirname;
f9df736a
WD
175+ F_CTIME(file) = ctime;
176+ F_INODE(file) = inode;
213d4328
WD
177+ bp = F_SUM(file);
178+ memcpy(bp, sum, checksum_len);
179+
f9df736a
WD
180+ flist_expand(flist, 1);
181+ flist->files[flist->used++] = file;
213d4328 182+
f9df736a 183+ flist->sorted = flist->files;
213d4328
WD
184+
185+ return 1;
186+}
187+
f9df736a
WD
188+/* The "dirname" arg's data must remain unchanged during the lifespan of
189+ * the created csum_cache[].flist object because we use it directly. */
190+static void read_checksums(int slot, struct file_list *flist, const char *dirname)
213d4328
WD
191+{
192+ char line[MAXPATHLEN+1024], fbuf[MAXPATHLEN], sum[MAX_DIGEST_LEN];
213d4328 193+ FILE *fp;
f9df736a
WD
194+ char *cp;
195+ int len, i;
196+ time_t mtime;
197+ OFF_T file_length;
198+ uint32 ctime, inode;
199+ int dlen = dirname ? strlcpy(fbuf, dirname, sizeof fbuf) : 0;
213d4328 200+
f9df736a 201+ if (dlen >= (int)(sizeof fbuf - sizeof RSYNCSUMS_FILE))
213d4328
WD
202+ return;
203+ if (dlen)
204+ fbuf[dlen++] = '/';
205+ else
206+ dirname = NULL;
f9df736a
WD
207+ strlcpy(fbuf+dlen, RSYNCSUMS_FILE, sizeof fbuf - dlen);
208+ if (slot) {
209+ pathjoin(line, sizeof line, basis_dir[slot-1], fbuf);
210+ cp = line;
211+ } else
212+ cp = fbuf;
213+ if (!(fp = fopen(cp, "r")))
213d4328
WD
214+ return;
215+
216+ while (fgets(line, sizeof line, fp)) {
217+ cp = line;
218+ if (protocol_version >= 30) {
219+ char *alt_sum = cp;
220+ if (*cp == '=')
221+ while (*++cp == '=') {}
222+ else
223+ while (isXDigit(cp)) cp++;
224+ if (cp - alt_sum != MD4_DIGEST_LEN*2 || *cp != ' ')
225+ break;
226+ while (*++cp == ' ') {}
227+ }
228+
229+ if (*cp == '=') {
230+ continue;
231+ } else {
232+ for (i = 0; i < checksum_len*2; i++, cp++) {
233+ int x;
234+ if (isXDigit(cp)) {
235+ if (isDigit(cp))
236+ x = *cp - '0';
237+ else
238+ x = (*cp & 0xF) + 9;
239+ } else {
240+ cp = "";
241+ break;
242+ }
243+ if (i & 1)
244+ sum[i/2] |= x;
245+ else
246+ sum[i/2] = x << 4;
247+ }
248+ }
249+ if (*cp != ' ')
250+ break;
251+ while (*++cp == ' ') {}
252+
253+ if (protocol_version < 30) {
254+ char *alt_sum = cp;
255+ if (*cp == '=')
256+ while (*++cp == '=') {}
257+ else
258+ while (isXDigit(cp)) cp++;
259+ if (cp - alt_sum != MD5_DIGEST_LEN*2 || *cp != ' ')
260+ break;
261+ while (*++cp == ' ') {}
262+ }
263+
264+ file_length = 0;
265+ while (isDigit(cp))
266+ file_length = file_length * 10 + *cp++ - '0';
267+ if (*cp != ' ')
268+ break;
269+ while (*++cp == ' ') {}
270+
271+ mtime = 0;
272+ while (isDigit(cp))
273+ mtime = mtime * 10 + *cp++ - '0';
274+ if (*cp != ' ')
275+ break;
276+ while (*++cp == ' ') {}
277+
f9df736a 278+ ctime = 0;
213d4328 279+ while (isDigit(cp))
f9df736a 280+ ctime = ctime * 10 + *cp++ - '0';
213d4328
WD
281+ if (*cp != ' ')
282+ break;
283+ while (*++cp == ' ') {}
284+
f9df736a 285+ inode = 0;
213d4328 286+ while (isDigit(cp))
f9df736a 287+ inode = inode * 10 + *cp++ - '0';
213d4328
WD
288+ if (*cp != ' ')
289+ break;
290+ while (*++cp == ' ') {}
291+
292+ len = strlen(cp);
293+ while (len && (cp[len-1] == '\n' || cp[len-1] == '\r'))
294+ len--;
295+ if (!len)
296+ break;
297+ cp[len++] = '\0'; /* len now counts the null */
298+ if (strchr(cp, '/'))
299+ break;
300+ if (len > MAXPATHLEN)
301+ continue;
302+
303+ strlcpy(fbuf+dlen, cp, sizeof fbuf - dlen);
304+
f9df736a
WD
305+ add_checksum(flist, dirname, cp, len, file_length,
306+ mtime, ctime, inode,
307+ sum);
213d4328
WD
308+ }
309+ fclose(fp);
310+
f9df736a
WD
311+ flist_sort_and_clean(flist, CLEAN_KEEP_LAST);
312+}
313+
314+void get_cached_checksum(int slot, const char *fname, struct file_struct *file,
315+ STRUCT_STAT *stp, char *sum_buf)
316+{
317+ struct file_list *flist = csum_cache[slot].flist;
318+ int j;
319+
320+ if (!flist->next) {
321+ flist->next = cur_flist; /* next points from checksum flist to file flist */
322+ read_checksums(slot, flist, file->dirname);
323+ }
324+
325+ if ((j = flist_find(flist, file)) >= 0) {
326+ struct file_struct *fp = flist->sorted[j];
327+
328+ if (F_LENGTH(fp) == stp->st_size
329+ && fp->modtime == stp->st_mtime
330+ && (checksum_files & CSF_LAX
331+ || (F_CTIME(fp) == (uint32)stp->st_ctime
332+ && F_INODE(fp) == (uint32)stp->st_ino))) {
333+ memcpy(sum_buf, F_SUM(fp), MAX_DIGEST_LEN);
334+ return;
335+ }
336+ }
337+
338+ file_checksum(fname, stp->st_size, sum_buf);
213d4328
WD
339+}
340+
341 int push_pathname(const char *dir, int len)
342 {
343 if (dir == pathname)
f9df736a 344@@ -1005,7 +1249,7 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
213d4328
WD
345 STRUCT_STAT *stp, int flags, int filter_level)
346 {
347 static char *lastdir;
348- static int lastdir_len = -1;
349+ static int lastdir_len = -2;
350 struct file_struct *file;
351 char thisname[MAXPATHLEN];
352 char linkname[MAXPATHLEN];
f9df736a 353@@ -1138,9 +1382,16 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
213d4328
WD
354 memcpy(lastdir, thisname, len);
355 lastdir[len] = '\0';
356 lastdir_len = len;
f9df736a
WD
357+ if (checksum_files && am_sender && flist)
358+ reset_checksum_cache();
213d4328
WD
359 }
360- } else
361+ } else {
362 basename = thisname;
f9df736a 363+ if (checksum_files && am_sender && flist && lastdir_len == -2) {
213d4328 364+ lastdir_len = -1;
f9df736a 365+ reset_checksum_cache();
213d4328
WD
366+ }
367+ }
368 basename_len = strlen(basename) + 1; /* count the '\0' */
369
370 #ifdef SUPPORT_LINKS
f9df736a 371@@ -1216,11 +1467,15 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
213d4328
WD
372 }
373 #endif
374
375- if (always_checksum && am_sender && S_ISREG(st.st_mode))
376- file_checksum(thisname, tmp_sum, st.st_size);
377-
378 F_PATHNAME(file) = pathname;
379
380+ if (always_checksum && am_sender && S_ISREG(st.st_mode)) {
f9df736a
WD
381+ if (flist && checksum_files)
382+ get_cached_checksum(0, thisname, file, &st, tmp_sum);
383+ else
384+ file_checksum(thisname, st.st_size, tmp_sum);
213d4328
WD
385+ }
386+
387 /* This code is only used by the receiver when it is building
388 * a list of files for a delete pass. */
389 if (keep_dirlinks && linkname_len && flist) {
f9df736a 390@@ -2076,7 +2331,8 @@ struct file_list *send_file_list(int f, int argc, char *argv[])
6e9495c7
WD
391 * file-list to check if this is a 1-file xfer. */
392 send_extra_file_list(f, 1);
393 }
394- }
395+ } else
396+ flist_eof = 1;
6e9495c7
WD
397
398 return flist;
399 }
f9df736a
WD
400@@ -2178,7 +2434,7 @@ struct file_list *recv_file_list(int f)
401 else if (f >= 0)
402 recv_id_list(f, flist);
403
404- flist_sort_and_clean(flist, relative_paths);
405+ flist_sort_and_clean(flist, relative_paths ? CLEAN_STRIP_ROOT : 0);
406
407 if (protocol_version < 30) {
408 /* Recv the io_error flag */
409@@ -2376,7 +2632,7 @@ void flist_free(struct file_list *flist)
410
411 /* This routine ensures we don't have any duplicate names in our file list.
412 * duplicate names can cause corruption because of the pipelining. */
413-static void flist_sort_and_clean(struct file_list *flist, int strip_root)
414+static void flist_sort_and_clean(struct file_list *flist, int flags)
415 {
416 char fbuf[MAXPATHLEN];
417 int i, prev_i;
418@@ -2427,7 +2683,7 @@ static void flist_sort_and_clean(struct file_list *flist, int strip_root)
419 /* If one is a dir and the other is not, we want to
420 * keep the dir because it might have contents in the
421 * list. Otherwise keep the first one. */
422- if (S_ISDIR(file->mode)) {
423+ if (S_ISDIR(file->mode) || flags & CLEAN_KEEP_LAST) {
424 struct file_struct *fp = flist->sorted[j];
425 if (!S_ISDIR(fp->mode))
426 keep = i, drop = j;
427@@ -2443,8 +2699,8 @@ static void flist_sort_and_clean(struct file_list *flist, int strip_root)
428 } else
429 keep = j, drop = i;
430
431- if (!am_sender) {
432- if (verbose > 1) {
433+ if (!am_sender || flags & CLEAN_KEEP_LAST) {
434+ if (verbose > 1 && !(flags & CLEAN_KEEP_LAST)) {
435 rprintf(FINFO,
436 "removing duplicate name %s from file list (%d)\n",
437 f_name(file, fbuf), drop + flist->ndx_start);
438@@ -2466,7 +2722,7 @@ static void flist_sort_and_clean(struct file_list *flist, int strip_root)
439 }
440 flist->high = prev_i;
441
442- if (strip_root) {
443+ if (flags & CLEAN_STRIP_ROOT) {
444 /* We need to strip off the leading slashes for relative
445 * paths, but this must be done _after_ the sorting phase. */
446 for (i = flist->low; i <= flist->high; i++) {
447diff --git a/generator.c b/generator.c
448--- a/generator.c
449+++ b/generator.c
450@@ -53,6 +53,7 @@ extern int delete_during;
451 extern int delete_after;
452 extern int msgdone_cnt;
453 extern int ignore_errors;
454+extern int checksum_files;
455 extern int remove_source_files;
456 extern int delay_updates;
457 extern int update_only;
458@@ -694,7 +695,7 @@ void itemize(const char *fnamecmp, struct file_struct *file, int ndx, int statre
459
460
461 /* Perform our quick-check heuristic for determining if a file is unchanged. */
462-int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st)
463+int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st, int slot)
464 {
465 if (st->st_size != F_LENGTH(file))
466 return 0;
467@@ -703,7 +704,10 @@ int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st)
468 of the file time to determine whether to sync */
469 if (always_checksum > 0 && S_ISREG(st->st_mode)) {
470 char sum[MAX_DIGEST_LEN];
471- file_checksum(fn, sum, st->st_size);
472+ if (checksum_files && slot >= 0)
473+ get_cached_checksum(slot, fn, file, st, sum);
474+ else
475+ file_checksum(fn, st->st_size, sum);
476 return memcmp(sum, F_SUM(file), checksum_len) == 0;
477 }
478
479@@ -957,7 +961,7 @@ static int try_dests_reg(struct file_struct *file, char *fname, int ndx,
480 match_level = 1;
481 /* FALL THROUGH */
482 case 1:
483- if (!unchanged_file(cmpbuf, file, &sxp->st))
484+ if (!unchanged_file(cmpbuf, file, &sxp->st, j+1))
485 continue;
486 best_match = j;
487 match_level = 2;
488@@ -1219,7 +1223,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
489 static const char *parent_dirname = "";
490 static struct file_struct *missing_dir = NULL, *excluded_dir = NULL;
491 static struct file_list *fuzzy_dirlist = NULL;
492- static int need_fuzzy_dirlist = 0;
493+ static int need_new_dirscan = 0;
494 struct file_struct *fuzzy_file = NULL;
495 int fd = -1, f_copy = -1;
496 stat_x sx, real_sx;
497@@ -1309,8 +1313,8 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
498 flist_free(fuzzy_dirlist);
499 fuzzy_dirlist = NULL;
500 }
501- if (fuzzy_basis)
502- need_fuzzy_dirlist = 1;
503+ if (fuzzy_basis || checksum_files)
504+ need_new_dirscan = 1;
505 #ifdef SUPPORT_ACLS
506 if (!preserve_perms)
507 dflt_perms = default_perms_for_dir(dn);
508@@ -1318,10 +1322,15 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
509 }
510 parent_dirname = dn;
511
512- if (need_fuzzy_dirlist && S_ISREG(file->mode)) {
513- strlcpy(fnamecmpbuf, dn, sizeof fnamecmpbuf);
514- fuzzy_dirlist = get_dirlist(fnamecmpbuf, -1, 1);
515- need_fuzzy_dirlist = 0;
516+ if (need_new_dirscan && S_ISREG(file->mode)) {
517+ if (fuzzy_basis) {
518+ strlcpy(fnamecmpbuf, dn, sizeof fnamecmpbuf);
519+ fuzzy_dirlist = get_dirlist(fnamecmpbuf, -1, 1);
520+ }
521+ if (checksum_files) {
522+ reset_checksum_cache();
523+ }
524+ need_new_dirscan = 0;
525 }
526
527 statret = link_stat(fname, &sx.st, keep_dirlinks && is_dir);
528@@ -1742,7 +1751,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
529 ;
530 else if (fnamecmp_type == FNAMECMP_FUZZY)
531 ;
532- else if (unchanged_file(fnamecmp, file, &sx.st)) {
533+ else if (unchanged_file(fnamecmp, file, &sx.st, fnamecmp_type == FNAMECMP_FNAME ? 0 : -1)) {
534 if (partialptr) {
535 do_unlink(partialptr);
536 handle_partial_dir(partialptr, PDIR_DELETE);
537diff --git a/hlink.c b/hlink.c
538--- a/hlink.c
539+++ b/hlink.c
540@@ -351,7 +351,7 @@ int hard_link_check(struct file_struct *file, int ndx, const char *fname,
541 }
542 break;
543 }
544- if (!unchanged_file(cmpbuf, file, &alt_sx.st))
545+ if (!unchanged_file(cmpbuf, file, &alt_sx.st, j+1))
546 continue;
547 statret = 1;
548 if (unchanged_attrs(cmpbuf, file, &alt_sx))
cc3e685d
WD
549diff --git a/ifuncs.h b/ifuncs.h
550--- a/ifuncs.h
551+++ b/ifuncs.h
213d4328
WD
552@@ -64,6 +64,12 @@ isDigit(const char *ptr)
553 }
554
555 static inline int
556+isXDigit(const char *ptr)
557+{
558+ return isxdigit(*(unsigned char *)ptr);
559+}
560+
561+static inline int
562 isPrint(const char *ptr)
563 {
564 return isprint(*(unsigned char *)ptr);
f9df736a
WD
565diff --git a/loadparm.c b/loadparm.c
566--- a/loadparm.c
567+++ b/loadparm.c
568@@ -149,6 +149,7 @@ typedef struct
569 char *temp_dir;
570 char *uid;
571
572+ int checksum_files;
573 int max_connections;
574 int max_verbosity;
575 int syslog_facility;
576@@ -200,6 +201,7 @@ static service sDefault =
577 /* temp_dir; */ NULL,
578 /* uid; */ NOBODY_USER,
579
580+ /* checksum_files; */ CSF_IGNORE_FILES,
581 /* max_connections; */ 0,
582 /* max_verbosity; */ 1,
583 /* syslog_facility; */ LOG_DAEMON,
584@@ -294,6 +296,12 @@ static struct enum_list enum_facilities[] = {
585 #endif
586 { -1, NULL }};
587
588+static struct enum_list enum_csum_modes[] = {
589+ { CSF_IGNORE_FILES, "none" },
590+ { CSF_LAX_MODE, "lax" },
591+ { CSF_STRICT_MODE, "strict" },
592+ { -1, NULL }
593+};
594
595 /* note that we do not initialise the defaults union - it is not allowed in ANSI C */
596 static struct parm_struct parm_table[] =
597@@ -306,6 +314,7 @@ static struct parm_struct parm_table[] =
598
599 {"auth users", P_STRING, P_LOCAL, &sDefault.auth_users, NULL,0},
600 {"charset", P_STRING, P_LOCAL, &sDefault.charset, NULL,0},
601+ {"checksum files", P_ENUM, P_LOCAL, &sDefault.checksum_files, enum_csum_modes,0},
602 {"comment", P_STRING, P_LOCAL, &sDefault.comment, NULL,0},
603 {"dont compress", P_STRING, P_LOCAL, &sDefault.dont_compress, NULL,0},
604 {"exclude from", P_STRING, P_LOCAL, &sDefault.exclude_from, NULL,0},
605@@ -423,6 +432,7 @@ FN_LOCAL_STRING(lp_secrets_file, secrets_file)
606 FN_LOCAL_STRING(lp_temp_dir, temp_dir)
607 FN_LOCAL_STRING(lp_uid, uid)
608
609+FN_LOCAL_INTEGER(lp_checksum_files, checksum_files)
610 FN_LOCAL_INTEGER(lp_max_connections, max_connections)
611 FN_LOCAL_INTEGER(lp_max_verbosity, max_verbosity)
612 FN_LOCAL_INTEGER(lp_syslog_facility, syslog_facility)
613diff --git a/options.c b/options.c
614--- a/options.c
615+++ b/options.c
616@@ -112,6 +112,7 @@ size_t bwlimit_writemax = 0;
617 int ignore_existing = 0;
618 int ignore_non_existing = 0;
619 int need_messages_from_generator = 0;
620+int checksum_files = CSF_IGNORE_FILES;
621 int max_delete = INT_MIN;
622 OFF_T max_size = 0;
623 OFF_T min_size = 0;
624@@ -316,6 +317,7 @@ void usage(enum logcode F)
625 rprintf(F," -q, --quiet suppress non-error messages\n");
626 rprintf(F," --no-motd suppress daemon-mode MOTD (see manpage caveat)\n");
627 rprintf(F," -c, --checksum skip based on checksum, not mod-time & size\n");
628+ rprintf(F," --sumfiles=MODE use .rsyncsums to speedup --checksum mode\n");
629 rprintf(F," -a, --archive archive mode; equals -rlptgoD (no -H,-A,-X)\n");
630 rprintf(F," --no-OPTION turn off an implied OPTION (e.g. --no-D)\n");
631 rprintf(F," -r, --recursive recurse into directories\n");
632@@ -445,7 +447,7 @@ enum {OPT_VERSION = 1000, OPT_DAEMON, OPT_SENDER, OPT_EXCLUDE, OPT_EXCLUDE_FROM,
633 OPT_FILTER, OPT_COMPARE_DEST, OPT_COPY_DEST, OPT_LINK_DEST, OPT_HELP,
634 OPT_INCLUDE, OPT_INCLUDE_FROM, OPT_MODIFY_WINDOW, OPT_MIN_SIZE, OPT_CHMOD,
635 OPT_READ_BATCH, OPT_WRITE_BATCH, OPT_ONLY_WRITE_BATCH, OPT_MAX_SIZE,
636- OPT_NO_D, OPT_APPEND, OPT_NO_ICONV,
637+ OPT_NO_D, OPT_APPEND, OPT_NO_ICONV, OPT_SUMFILES,
638 OPT_SERVER, OPT_REFUSED_BASE = 9000};
639
640 static struct poptOption long_options[] = {
641@@ -566,6 +568,7 @@ static struct poptOption long_options[] = {
642 {"checksum", 'c', POPT_ARG_VAL, &always_checksum, 1, 0, 0 },
643 {"no-checksum", 0, POPT_ARG_VAL, &always_checksum, 0, 0, 0 },
644 {"no-c", 0, POPT_ARG_VAL, &always_checksum, 0, 0, 0 },
645+ {"sumfiles", 0, POPT_ARG_STRING, 0, OPT_SUMFILES, 0, 0 },
646 {"block-size", 'B', POPT_ARG_LONG, &block_size, 0, 0, 0 },
647 {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
648 {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
649@@ -1212,6 +1215,23 @@ int parse_arguments(int *argc_p, const char ***argv_p, int frommain)
650 }
651 break;
652
653+ case OPT_SUMFILES:
654+ arg = poptGetOptArg(pc);
655+ checksum_files = 0;
656+ if (strcmp(arg, "lax") == 0)
657+ checksum_files |= CSF_LAX_MODE;
658+ else if (strcmp(arg, "strict") == 0)
659+ checksum_files |= CSF_STRICT_MODE;
660+ else if (strcmp(arg, "none") == 0)
661+ checksum_files = CSF_IGNORE_FILES;
662+ else {
663+ snprintf(err_buf, sizeof err_buf,
664+ "Invalid argument passed to --sumfiles (%s)\n",
665+ arg);
666+ return 0;
667+ }
668+ break;
669+
670 case OPT_HELP:
671 usage(FINFO);
672 exit_cleanup(0);
673@@ -1311,6 +1331,9 @@ int parse_arguments(int *argc_p, const char ***argv_p, int frommain)
674 }
675 #endif
676
677+ if (checksum_files && !always_checksum)
678+ checksum_files = CSF_IGNORE_FILES;
679+
680 if (write_batch && read_batch) {
681 snprintf(err_buf, sizeof err_buf,
682 "--write-batch and --read-batch can not be used together\n");
683diff --git a/rsync.h b/rsync.h
684--- a/rsync.h
685+++ b/rsync.h
686@@ -680,6 +680,10 @@ extern int xattrs_ndx;
687 #define F_SUM(f) ((char*)OPT_EXTRA(f, LEN64_BUMP(f) + HLINK_BUMP(f) \
688 + SUM_EXTRA_CNT - 1))
689
690+/* These are only valid on an entry read from a checksum file. */
691+#define F_CTIME(f) OPT_EXTRA(f, LEN64_BUMP(f) + SUM_EXTRA_CNT)->unum
692+#define F_INODE(f) OPT_EXTRA(f, LEN64_BUMP(f) + SUM_EXTRA_CNT + 1)->unum
693+
694 /* Some utility defines: */
695 #define F_IS_ACTIVE(f) (f)->basename[0]
696 #define F_IS_HLINKED(f) ((f)->flags & FLAG_HLINKED)
697@@ -858,6 +862,13 @@ typedef struct {
698 char fname[1]; /* has variable size */
699 } relnamecache;
700
701+#define CSF_ENABLE (1<<1)
702+#define CSF_LAX (1<<2)
703+
704+#define CSF_IGNORE_FILES 0
705+#define CSF_LAX_MODE (CSF_ENABLE|CSF_LAX)
706+#define CSF_STRICT_MODE (CSF_ENABLE)
707+
708 #include "byteorder.h"
709 #include "lib/mdigest.h"
710 #include "lib/wildmatch.h"
711diff --git a/rsync.yo b/rsync.yo
712--- a/rsync.yo
713+++ b/rsync.yo
714@@ -317,6 +317,7 @@ to the detailed description below for a complete description. verb(
715 -q, --quiet suppress non-error messages
716 --no-motd suppress daemon-mode MOTD (see caveat)
717 -c, --checksum skip based on checksum, not mod-time & size
718+ --sumfiles=MODE use .rsyncsums to speedup --checksum mode
719 -a, --archive archive mode; equals -rlptgoD (no -H,-A,-X)
720 --no-OPTION turn off an implied OPTION (e.g. --no-D)
721 -r, --recursive recurse into directories
722@@ -516,9 +517,9 @@ uses a "quick check" that (by default) checks if each file's size and time
723 of last modification match between the sender and receiver. This option
724 changes this to compare a 128-bit MD4 checksum for each file that has a
725 matching size. Generating the checksums means that both sides will expend
726-a lot of disk I/O reading all the data in the files in the transfer (and
727-this is prior to any reading that will be done to transfer changed files),
728-so this can slow things down significantly.
729+a lot of disk I/O reading the data in all the files in the transfer, so
730+this can slow things down significantly (and this is prior to any reading
731+that will be done to transfer the files that have changed).
732
733 The sending side generates its checksums while it is doing the file-system
734 scan that builds the list of the available files. The receiver generates
735@@ -526,12 +527,44 @@ its checksums when it is scanning for changed files, and will checksum any
736 file that has the same size as the corresponding sender's file: files with
737 either a changed size or a changed checksum are selected for transfer.
738
739+See also the bf(--sumfiles) option for a way to use cached checksum data.
740+
741 Note that rsync always verifies that each em(transferred) file was
742 correctly reconstructed on the receiving side by checking a whole-file
743 checksum that is generated as the file is transferred, but that
744 automatic after-the-transfer verification has nothing to do with this
745 option's before-the-transfer "Does this file need to be updated?" check.
746
747+dit(bf(--sumfiles=MODE)) This option tells rsync to make use of any cached
748+checksum information it finds in per-directory .rsyncsums files when the
749+current transfer is using the bf(--checksum) option. If the checksum data
750+is up-to-date, it is used instead of recomputing it, saving both disk I/O
751+and CPU time. If the checksum data is missing or outdated, the checksum is
752+computed just as it would be if bf(--sumfiles) was not specified.
753+
754+The MODE value is either "lax", for relaxed checking (which compares size
755+and mtime), "strict" (which also compares ctime and inode), or "none" to
756+ignore any .rsyncsums files ("none" is the default). Rsync does not create
757+or update these files, but there is a perl script in the support directory
758+named "rsyncsums" that can be used for that.
759+
760+This option has no effect unless bf(--checksum, -c) was also specified. It
761+also only affects the current side of the transfer, so if you want the
762+remote side to parse its own .rsyncsums files, specify the option via the
763+bf(--rsync-path) option (e.g. "--rsync-path="rsync --sumfiles=lax").
764+
765+To avoid transferring the system's checksum files, you can use an exclude
766+(e.g. bf(--exclude=.rsyncsums)). To make this easier to type, you can use
767+a popt alias. For instance, adding the following line in your ~/.popt file
768+defines a bf(--cc) option that enables lax checksum files and excludes the
769+checksum files:
770+
771+verb( rsync alias --cc -c --sumfiles=lax --exclude=.rsyncsums)
772+
773+An rsync daemon does not allow the client to control this setting, so see
774+the "checksum files" daemon parameter for information on how to make a
775+daemon use cached checksum data.
776+
777 dit(bf(-a, --archive)) This is equivalent to bf(-rlptgoD). It is a quick
778 way of saying you want recursion and want to preserve almost
779 everything (with -H being a notable omission).
780diff --git a/rsyncd.conf.yo b/rsyncd.conf.yo
781--- a/rsyncd.conf.yo
782+++ b/rsyncd.conf.yo
783@@ -281,6 +281,17 @@ locking on this file to ensure that the max connections limit is not
784 exceeded for the modules sharing the lock file.
785 The default is tt(/var/run/rsyncd.lock).
786
787+dit(bf(checksum files)) This option tells rsync to make use of any cached
788+checksum information it finds in per-directory .rsyncsums files when the
789+current transfer is using the bf(--checksum) option. The value can be set
790+to either "lax", "strict", or "none" -- see the client's bf(--sumfiles)
791+option for what these choices do.
792+
793+Note also that the client's command-line option, bf(--sumfiles), has no
794+effect on a daemon. A daemon will only access checksum files if this
795+config option tells it to. See also the bf(exclude) directive for a way
796+to hide the .rsyncsums files from the user.
797+
798 dit(bf(read only)) The "read only" option determines whether clients
799 will be able to upload files or not. If "read only" is true then any
800 attempted uploads will fail. If "read only" is false then uploads will
cc3e685d 801diff --git a/support/rsyncsums b/support/rsyncsums
f9df736a 802new file mode 100755
cc3e685d
WD
803--- /dev/null
804+++ b/support/rsyncsums
f9df736a 805@@ -0,0 +1,201 @@
213d4328
WD
806+#!/usr/bin/perl -w
807+use strict;
808+
809+use Getopt::Long;
810+use Cwd qw(abs_path cwd);
811+use Digest::MD4;
812+use Digest::MD5;
813+
814+our $SUMS_FILE = '.rsyncsums';
815+
816+&Getopt::Long::Configure('bundling');
817+&usage if !&GetOptions(
213d4328 818+ 'recurse|r' => \( my $recurse_opt ),
f9df736a 819+ 'mode|m=s' => \( my $cmp_mode = 'strict' ),
d1a75c9f 820+ 'check|c' => \( my $check_opt ),
213d4328
WD
821+ 'verbose|v+' => \( my $verbosity = 0 ),
822+ 'help|h' => \( my $help_opt ),
823+);
f9df736a
WD
824+&usage if $help_opt || $cmp_mode !~ /^(lax|strict)$/;
825+
826+my $ignore_ctime_and_inode = $cmp_mode eq 'lax' ? 0 : 1;
213d4328
WD
827+
828+my $start_dir = cwd();
829+
830+my @dirs = @ARGV;
831+@dirs = '.' unless @dirs;
832+foreach (@dirs) {
833+ $_ = abs_path($_);
834+}
835+
836+$| = 1;
837+
d1a75c9f
WD
838+my $exit_code = 0;
839+
213d4328
WD
840+my $md4 = Digest::MD4->new;
841+my $md5 = Digest::MD5->new;
842+
843+while (@dirs) {
844+ my $dir = shift @dirs;
845+
846+ if (!chdir($dir)) {
847+ warn "Unable to chdir to $dir: $!\n";
848+ next;
849+ }
850+ if (!opendir(DP, '.')) {
851+ warn "Unable to opendir $dir: $!\n";
852+ next;
853+ }
854+
d1a75c9f
WD
855+ my $reldir = $dir;
856+ $reldir =~ s#^$start_dir(/|$)# $1 ? '' : '.' #eo;
213d4328 857+ if ($verbosity) {
213d4328 858+ print "$reldir ... ";
d1a75c9f 859+ print "\n" if $check_opt;
213d4328
WD
860+ }
861+
213d4328 862+ my %cache;
d1a75c9f
WD
863+ my $f_cnt = 0;
864+ if (open(FP, '<', $SUMS_FILE)) {
865+ while (<FP>) {
866+ chomp;
867+ my($sum4, $sum5, $size, $mtime, $ctime, $inode, $fn) = split(' ', $_, 7);
868+ $cache{$fn} = [ 0, $sum4, $sum5, $size, $mtime, $ctime & 0xFFFFFFFF, $inode & 0xFFFFFFFF ];
869+ $f_cnt++;
870+ }
871+ close FP;
872+ }
873+
213d4328 874+ my @subdirs;
d1a75c9f
WD
875+ my $d_cnt = 0;
876+ my $update_cnt = 0;
213d4328
WD
877+ while (defined(my $fn = readdir(DP))) {
878+ next if $fn =~ /^\.\.?$/ || $fn =~ /^\Q$SUMS_FILE\E$/o || -l $fn;
879+ if (-d _) {
880+ push(@subdirs, "$dir/$fn") unless $fn =~ /^(CVS|\.svn|\.git|\.bzr)$/;
881+ next;
882+ }
883+ next unless -f _;
884+
885+ my($size,$mtime,$ctime,$inode) = (stat(_))[7,9,10,1];
f9df736a
WD
886+ $ctime &= 0xFFFFFFFF;
887+ $inode &= 0xFFFFFFFF;
d1a75c9f 888+ my $ref = $cache{$fn};
d1a75c9f 889+ $d_cnt++;
213d4328 890+
d1a75c9f 891+ if (!$check_opt) {
213d4328 892+ if (defined $ref) {
d1a75c9f
WD
893+ $$ref[0] = 1;
894+ if ($$ref[3] == $size
895+ && $$ref[4] == $mtime
896+ && ($ignore_ctime_and_inode || ($$ref[5] == $ctime && $$ref[6] == $inode))
897+ && $$ref[1] !~ /=/ && $$ref[2] !~ /=/) {
898+ next;
213d4328 899+ }
d1a75c9f
WD
900+ }
901+ if (!$update_cnt++) {
902+ print "UPDATING\n" if $verbosity;
213d4328
WD
903+ }
904+ }
213d4328 905+
d1a75c9f
WD
906+ if (!open(IN, $fn)) {
907+ print STDERR "Unable to read $fn: $!\n";
908+ if (defined $ref) {
213d4328 909+ delete $cache{$fn};
d1a75c9f 910+ $f_cnt--;
213d4328 911+ }
d1a75c9f
WD
912+ next;
913+ }
213d4328 914+
d1a75c9f
WD
915+ my($sum4, $sum5);
916+ while (1) {
917+ while (sysread(IN, $_, 64*1024)) {
918+ $md4->add($_);
919+ $md5->add($_);
213d4328 920+ }
d1a75c9f
WD
921+ $sum4 = $md4->hexdigest;
922+ $sum5 = $md5->hexdigest;
923+ print " $sum4 $sum5" if $verbosity > 2;
924+ print " $fn" if $verbosity > 1;
925+ my($size2,$mtime2,$ctime2,$inode2) = (stat(IN))[7,9,10,1];
f9df736a
WD
926+ $ctime2 &= 0xFFFFFFFF;
927+ $inode2 &= 0xFFFFFFFF;
d1a75c9f
WD
928+ last if $size == $size2 && $mtime == $mtime2
929+ && ($ignore_ctime_and_inode || ($ctime == $ctime2 && $inode == $inode2));
930+ $size = $size2;
931+ $mtime = $mtime2;
932+ $ctime = $ctime2;
933+ $inode = $inode2;
934+ sysseek(IN, 0, 0);
935+ print " REREADING\n" if $verbosity > 1;
936+ }
213d4328 937+
d1a75c9f
WD
938+ close IN;
939+
940+ if ($check_opt) {
941+ my $dif;
942+ if (!defined $ref) {
943+ $dif = 'MISSING';
944+ } elsif ($sum4 ne $$ref[1] || $sum5 ne $$ref[2]) {
945+ $dif = 'FAILED';
946+ } else {
947+ print " OK\n" if $verbosity > 1;
948+ next;
949+ }
950+ if ($verbosity < 2) {
951+ print $verbosity ? ' ' : "$reldir/";
952+ print $fn;
213d4328 953+ }
d1a75c9f
WD
954+ print " $dif\n";
955+ $exit_code = 1;
956+ } else {
957+ print "\n" if $verbosity > 1;
f9df736a 958+ $cache{$fn} = [ 1, $sum4, $sum5, $size, $mtime, $ctime, $inode ];
d1a75c9f
WD
959+ }
960+ }
961+
962+ closedir DP;
213d4328 963+
d1a75c9f
WD
964+ unshift(@dirs, sort @subdirs) if $recurse_opt;
965+
966+ if ($check_opt) {
967+ ;
968+ } elsif ($d_cnt == 0) {
969+ if ($f_cnt) {
970+ print "(removed $SUMS_FILE) " if $verbosity;
971+ unlink($SUMS_FILE);
213d4328 972+ }
d1a75c9f
WD
973+ print "empty\n" if $verbosity;
974+ } elsif ($update_cnt || $d_cnt != $f_cnt) {
975+ print "UPDATING\n" if $verbosity && !$update_cnt;
976+ open(FP, '>', $SUMS_FILE) or die "Unable to write $dir/$SUMS_FILE: $!\n";
213d4328 977+
213d4328
WD
978+ foreach my $fn (sort keys %cache) {
979+ my $ref = $cache{$fn};
d1a75c9f
WD
980+ my($found, $sum4, $sum5, $size, $mtime, $ctime, $inode) = @$ref;
981+ next unless $found;
213d4328
WD
982+ printf FP '%s %s %10d %10d %10d %10d %s' . "\n", $sum4, $sum5, $size, $mtime, $ctime, $inode, $fn;
983+ }
d1a75c9f 984+ close FP;
213d4328
WD
985+ } else {
986+ print "ok\n" if $verbosity;
987+ }
213d4328
WD
988+}
989+
d1a75c9f
WD
990+exit $exit_code;
991+
213d4328
WD
992+sub usage
993+{
994+ die <<EOT;
995+Usage: rsyncsums [OPTIONS] [DIRS]
996+
997+Options:
998+ -r, --recurse Update $SUMS_FILE files in subdirectories too.
f9df736a
WD
999+ -m, --mode=MODE Compare entries in either "lax" or "strict" mode. Using
1000+ "lax" compares size and mtime, while "strict" additionally
1001+ compares ctime and inode. Default: strict.
d1a75c9f 1002+ -c, --check Check if the checksums are right (doesn't update).
213d4328
WD
1003+ -v, --verbose Mention what we're doing. Repeat for more info.
1004+ -h, --help Display this help message.
1005+EOT
1006+}