Use "use warnings" rather than -w on the #! line.
[rsync/rsync-patches.git] / checksum-reading.diff
CommitLineData
f9df736a 1Optimize the --checksum option using externally created .rsyncsums files.
213d4328 2
f9df736a
WD
3This adds a new option, --sumfiles=MODE, that allows you to use a cache of
4checksums when performing a --checksum transfer. These checksum files
5(.rsyncsums) must be created by some other process -- see the perl script,
6rsyncsums, in the support dir for one way.
213d4328 7
f9df736a
WD
8This option can be particularly helpful to a public mirror that wants to
9pre-compute their .rsyncsums files, set the "checksum files = strict" option
10in their daemon config file, and thus make it quite efficient for a client
11rsync to make use of the --checksum option on their server.
213d4328
WD
12
13To use this patch, run these commands for a successful build:
14
cc3e685d 15 patch -p1 <patches/checksum-reading.diff
213d4328
WD
16 ./configure (optional if already run)
17 make
18
f9df736a
WD
19diff --git a/checksum.c b/checksum.c
20--- a/checksum.c
21+++ b/checksum.c
22@@ -100,7 +100,7 @@ void get_checksum2(char *buf, int32 len, char *sum)
23 }
24 }
25
26-void file_checksum(char *fname, char *sum, OFF_T size)
27+void file_checksum(const char *fname, OFF_T size, char *sum)
28 {
29 struct map_struct *buf;
30 OFF_T i, len = size;
31diff --git a/clientserver.c b/clientserver.c
32--- a/clientserver.c
33+++ b/clientserver.c
c0c7984e 34@@ -42,6 +42,8 @@ extern int numeric_ids;
f9df736a
WD
35 extern int filesfrom_fd;
36 extern int remote_protocol;
37 extern int protocol_version;
38+extern int always_checksum;
39+extern int checksum_files;
40 extern int io_timeout;
41 extern int no_detach;
42 extern int write_batch;
abd3adb8 43@@ -779,6 +781,9 @@ static int rsync_module(int f_in, int f_out, int i, char *addr, char *host)
c0c7984e 44 } else if (am_root < 0) /* Treat --fake-super from client as --super. */
f9df736a
WD
45 am_root = 2;
46
47+ checksum_files = always_checksum ? lp_checksum_files(i)
48+ : CSF_IGNORE_FILES;
49+
50 if (filesfrom_fd == 0)
51 filesfrom_fd = f_in;
52
cc3e685d
WD
53diff --git a/flist.c b/flist.c
54--- a/flist.c
55+++ b/flist.c
f9df736a
WD
56@@ -34,6 +34,7 @@ extern int am_generator;
57 extern int inc_recurse;
58 extern int do_progress;
59 extern int always_checksum;
60+extern int basis_dir_cnt;
61 extern int module_id;
62 extern int ignore_errors;
63 extern int numeric_ids;
64@@ -61,6 +62,7 @@ extern int file_extra_cnt;
65 extern int ignore_perishable;
66 extern int non_perishable_cnt;
67 extern int prune_empty_dirs;
68+extern int checksum_files;
69 extern int copy_links;
70 extern int copy_unsafe_links;
71 extern int protocol_version;
ae306a29 72@@ -69,6 +71,7 @@ extern int munge_symlinks;
f9df736a 73 extern int need_unsorted_flist;
ae306a29 74 extern int sender_symlink_iconv;
f9df736a
WD
75 extern int unsort_ndx;
76+extern char *basis_dir[];
77 extern struct stats stats;
78 extern char *filesfrom_host;
79
ae306a29 80@@ -84,6 +87,12 @@ extern int filesfrom_convert;
f9df736a
WD
81 extern iconv_t ic_send, ic_recv;
82 #endif
83
84+#define RSYNCSUMS_FILE ".rsyncsums"
c0c7984e 85+#define RSYNCSUMS_LEN (sizeof RSYNCSUMS_FILE-1)
f9df736a
WD
86+
87+#define CLEAN_STRIP_ROOT (1<<0)
88+#define CLEAN_KEEP_LAST (1<<1)
89+
90 #define PTR_SIZE (sizeof (struct file_struct *))
91
92 int io_error;
ae306a29 93@@ -125,7 +134,11 @@ static char empty_sum[MAX_DIGEST_LEN];
213d4328
WD
94 static int flist_count_offset; /* for --delete --progress */
95 static int dir_count = 0;
213d4328 96
f9df736a
WD
97-static void flist_sort_and_clean(struct file_list *flist, int strip_root);
98+static struct csum_cache {
99+ struct file_list *flist;
100+} *csum_cache = NULL;
101+
102+static void flist_sort_and_clean(struct file_list *flist, int flags);
213d4328 103 static void output_flist(struct file_list *flist);
f9df736a
WD
104
105 void init_flist(void)
ae306a29 106@@ -339,6 +352,238 @@ static void flist_done_allocating(struct file_list *flist)
213d4328
WD
107 flist->pool_boundary = ptr;
108 }
109
f9df736a
WD
110+void reset_checksum_cache()
111+{
112+ int slot, slots = am_sender ? 1 : basis_dir_cnt + 1;
113+
114+ if (!csum_cache) {
115+ csum_cache = new_array0(struct csum_cache, slots);
116+ if (!csum_cache)
117+ out_of_memory("reset_checksum_cache");
118+ }
119+
120+ for (slot = 0; slot < slots; slot++) {
121+ struct file_list *flist = csum_cache[slot].flist;
122+
123+ if (flist) {
124+ /* Reset the pool memory and empty the file-list array. */
125+ pool_free_old(flist->file_pool,
126+ pool_boundary(flist->file_pool, 0));
127+ flist->used = 0;
128+ } else
129+ flist = csum_cache[slot].flist = flist_new(FLIST_TEMP, "reset_checksum_cache");
130+
131+ flist->low = 0;
132+ flist->high = -1;
133+ flist->next = NULL;
134+ }
135+}
136+
c0c7984e 137+/* The basename_len count is the length of the basename + 1 for the '\0'. */
f9df736a
WD
138+static int add_checksum(struct file_list *flist, const char *dirname,
139+ const char *basename, int basename_len, OFF_T file_length,
140+ time_t mtime, uint32 ctime, uint32 inode,
141+ const char *sum)
213d4328
WD
142+{
143+ struct file_struct *file;
144+ int alloc_len, extra_len;
145+ char *bp;
146+
c0c7984e
WD
147+ if (basename_len == RSYNCSUMS_LEN+1 && *basename == '.'
148+ && strcmp(basename, RSYNCSUMS_FILE) == 0)
213d4328
WD
149+ return 0;
150+
f9df736a
WD
151+ /* "2" is for a 32-bit ctime num and an 32-bit inode num. */
152+ extra_len = (file_extra_cnt + (file_length > 0xFFFFFFFFu) + SUM_EXTRA_CNT + 2)
213d4328
WD
153+ * EXTRA_LEN;
154+#if EXTRA_ROUNDING > 0
155+ if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN))
156+ extra_len = (extra_len | (EXTRA_ROUNDING * EXTRA_LEN)) + EXTRA_LEN;
157+#endif
f9df736a
WD
158+ alloc_len = FILE_STRUCT_LEN + extra_len + basename_len;
159+ bp = pool_alloc(flist->file_pool, alloc_len, "add_checksum");
213d4328
WD
160+
161+ memset(bp, 0, extra_len + FILE_STRUCT_LEN);
162+ bp += extra_len;
163+ file = (struct file_struct *)bp;
164+ bp += FILE_STRUCT_LEN;
165+
f9df736a 166+ memcpy(bp, basename, basename_len);
213d4328
WD
167+
168+ file->mode = S_IFREG;
169+ file->modtime = mtime;
170+ file->len32 = (uint32)file_length;
171+ if (file_length > 0xFFFFFFFFu) {
172+ file->flags |= FLAG_LENGTH64;
173+ OPT_EXTRA(file, 0)->unum = (uint32)(file_length >> 32);
174+ }
175+ file->dirname = dirname;
f9df736a
WD
176+ F_CTIME(file) = ctime;
177+ F_INODE(file) = inode;
213d4328
WD
178+ bp = F_SUM(file);
179+ memcpy(bp, sum, checksum_len);
180+
f9df736a
WD
181+ flist_expand(flist, 1);
182+ flist->files[flist->used++] = file;
213d4328 183+
f9df736a 184+ flist->sorted = flist->files;
213d4328
WD
185+
186+ return 1;
187+}
188+
f9df736a
WD
189+/* The "dirname" arg's data must remain unchanged during the lifespan of
190+ * the created csum_cache[].flist object because we use it directly. */
191+static void read_checksums(int slot, struct file_list *flist, const char *dirname)
213d4328
WD
192+{
193+ char line[MAXPATHLEN+1024], fbuf[MAXPATHLEN], sum[MAX_DIGEST_LEN];
213d4328 194+ FILE *fp;
f9df736a
WD
195+ char *cp;
196+ int len, i;
197+ time_t mtime;
198+ OFF_T file_length;
199+ uint32 ctime, inode;
200+ int dlen = dirname ? strlcpy(fbuf, dirname, sizeof fbuf) : 0;
213d4328 201+
c0c7984e 202+ if (dlen >= (int)(sizeof fbuf - 1 - RSYNCSUMS_LEN))
213d4328
WD
203+ return;
204+ if (dlen)
205+ fbuf[dlen++] = '/';
206+ else
207+ dirname = NULL;
f9df736a
WD
208+ strlcpy(fbuf+dlen, RSYNCSUMS_FILE, sizeof fbuf - dlen);
209+ if (slot) {
210+ pathjoin(line, sizeof line, basis_dir[slot-1], fbuf);
211+ cp = line;
212+ } else
213+ cp = fbuf;
214+ if (!(fp = fopen(cp, "r")))
213d4328
WD
215+ return;
216+
217+ while (fgets(line, sizeof line, fp)) {
218+ cp = line;
219+ if (protocol_version >= 30) {
220+ char *alt_sum = cp;
221+ if (*cp == '=')
222+ while (*++cp == '=') {}
223+ else
224+ while (isXDigit(cp)) cp++;
225+ if (cp - alt_sum != MD4_DIGEST_LEN*2 || *cp != ' ')
226+ break;
227+ while (*++cp == ' ') {}
228+ }
229+
230+ if (*cp == '=') {
231+ continue;
232+ } else {
233+ for (i = 0; i < checksum_len*2; i++, cp++) {
234+ int x;
235+ if (isXDigit(cp)) {
236+ if (isDigit(cp))
237+ x = *cp - '0';
238+ else
239+ x = (*cp & 0xF) + 9;
240+ } else {
241+ cp = "";
242+ break;
243+ }
244+ if (i & 1)
245+ sum[i/2] |= x;
246+ else
247+ sum[i/2] = x << 4;
248+ }
249+ }
250+ if (*cp != ' ')
251+ break;
252+ while (*++cp == ' ') {}
253+
254+ if (protocol_version < 30) {
255+ char *alt_sum = cp;
256+ if (*cp == '=')
257+ while (*++cp == '=') {}
258+ else
259+ while (isXDigit(cp)) cp++;
260+ if (cp - alt_sum != MD5_DIGEST_LEN*2 || *cp != ' ')
261+ break;
262+ while (*++cp == ' ') {}
263+ }
264+
265+ file_length = 0;
266+ while (isDigit(cp))
267+ file_length = file_length * 10 + *cp++ - '0';
268+ if (*cp != ' ')
269+ break;
270+ while (*++cp == ' ') {}
271+
272+ mtime = 0;
273+ while (isDigit(cp))
274+ mtime = mtime * 10 + *cp++ - '0';
275+ if (*cp != ' ')
276+ break;
277+ while (*++cp == ' ') {}
278+
f9df736a 279+ ctime = 0;
213d4328 280+ while (isDigit(cp))
f9df736a 281+ ctime = ctime * 10 + *cp++ - '0';
213d4328
WD
282+ if (*cp != ' ')
283+ break;
284+ while (*++cp == ' ') {}
285+
f9df736a 286+ inode = 0;
213d4328 287+ while (isDigit(cp))
f9df736a 288+ inode = inode * 10 + *cp++ - '0';
213d4328
WD
289+ if (*cp != ' ')
290+ break;
291+ while (*++cp == ' ') {}
292+
293+ len = strlen(cp);
294+ while (len && (cp[len-1] == '\n' || cp[len-1] == '\r'))
295+ len--;
296+ if (!len)
297+ break;
298+ cp[len++] = '\0'; /* len now counts the null */
299+ if (strchr(cp, '/'))
300+ break;
301+ if (len > MAXPATHLEN)
302+ continue;
303+
304+ strlcpy(fbuf+dlen, cp, sizeof fbuf - dlen);
305+
f9df736a
WD
306+ add_checksum(flist, dirname, cp, len, file_length,
307+ mtime, ctime, inode,
308+ sum);
213d4328
WD
309+ }
310+ fclose(fp);
311+
f9df736a
WD
312+ flist_sort_and_clean(flist, CLEAN_KEEP_LAST);
313+}
314+
315+void get_cached_checksum(int slot, const char *fname, struct file_struct *file,
316+ STRUCT_STAT *stp, char *sum_buf)
317+{
318+ struct file_list *flist = csum_cache[slot].flist;
319+ int j;
320+
321+ if (!flist->next) {
322+ flist->next = cur_flist; /* next points from checksum flist to file flist */
323+ read_checksums(slot, flist, file->dirname);
324+ }
325+
326+ if ((j = flist_find(flist, file)) >= 0) {
327+ struct file_struct *fp = flist->sorted[j];
328+
329+ if (F_LENGTH(fp) == stp->st_size
330+ && fp->modtime == stp->st_mtime
331+ && (checksum_files & CSF_LAX
332+ || (F_CTIME(fp) == (uint32)stp->st_ctime
333+ && F_INODE(fp) == (uint32)stp->st_ino))) {
334+ memcpy(sum_buf, F_SUM(fp), MAX_DIGEST_LEN);
335+ return;
336+ }
337+ }
338+
339+ file_checksum(fname, stp->st_size, sum_buf);
213d4328
WD
340+}
341+
cbdf862c
WD
342 /* Call this with EITHER (1) "file, NULL, 0" to chdir() to the file's
343 * F_PATHNAME(), or (2) "NULL, dir, dirlen" to chdir() to the supplied dir,
344 * with dir == NULL taken to be the starting directory, and dirlen < 0
ae306a29 345@@ -1068,7 +1313,7 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
213d4328
WD
346 STRUCT_STAT *stp, int flags, int filter_level)
347 {
348 static char *lastdir;
349- static int lastdir_len = -1;
350+ static int lastdir_len = -2;
351 struct file_struct *file;
352 char thisname[MAXPATHLEN];
353 char linkname[MAXPATHLEN];
ae306a29 354@@ -1207,9 +1452,16 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
213d4328
WD
355 memcpy(lastdir, thisname, len);
356 lastdir[len] = '\0';
357 lastdir_len = len;
f9df736a
WD
358+ if (checksum_files && am_sender && flist)
359+ reset_checksum_cache();
213d4328
WD
360 }
361- } else
362+ } else {
363 basename = thisname;
f9df736a 364+ if (checksum_files && am_sender && flist && lastdir_len == -2) {
213d4328 365+ lastdir_len = -1;
f9df736a 366+ reset_checksum_cache();
213d4328
WD
367+ }
368+ }
369 basename_len = strlen(basename) + 1; /* count the '\0' */
370
371 #ifdef SUPPORT_LINKS
ae306a29 372@@ -1286,14 +1538,18 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
c0c7984e 373 memcpy(bp + basename_len, linkname, linkname_len);
213d4328
WD
374 #endif
375
376- if (always_checksum && am_sender && S_ISREG(st.st_mode))
377- file_checksum(thisname, tmp_sum, st.st_size);
378-
c0c7984e
WD
379 if (am_sender)
380 F_PATHNAME(file) = pathname;
381 else if (!pool)
382 F_DEPTH(file) = extra_len / EXTRA_LEN;
213d4328
WD
383
384+ if (always_checksum && am_sender && S_ISREG(st.st_mode)) {
f9df736a
WD
385+ if (flist && checksum_files)
386+ get_cached_checksum(0, thisname, file, &st, tmp_sum);
387+ else
388+ file_checksum(thisname, st.st_size, tmp_sum);
213d4328
WD
389+ }
390+
abd3adb8
WD
391 if (basename_len == 0+1) {
392 if (!pool)
393 unmake_file(file);
bf1bd9d4 394@@ -2236,7 +2492,8 @@ struct file_list *send_file_list(int f, int argc, char *argv[])
6e9495c7
WD
395 * file-list to check if this is a 1-file xfer. */
396 send_extra_file_list(f, 1);
397 }
398- }
399+ } else
400+ flist_eof = 1;
6e9495c7
WD
401
402 return flist;
403 }
bf1bd9d4 404@@ -2342,7 +2599,7 @@ struct file_list *recv_file_list(int f)
f9df736a
WD
405 else if (f >= 0)
406 recv_id_list(f, flist);
407
408- flist_sort_and_clean(flist, relative_paths);
409+ flist_sort_and_clean(flist, relative_paths ? CLEAN_STRIP_ROOT : 0);
410
411 if (protocol_version < 30) {
412 /* Recv the io_error flag */
bf1bd9d4 413@@ -2562,7 +2819,7 @@ void flist_free(struct file_list *flist)
f9df736a
WD
414
415 /* This routine ensures we don't have any duplicate names in our file list.
416 * duplicate names can cause corruption because of the pipelining. */
417-static void flist_sort_and_clean(struct file_list *flist, int strip_root)
418+static void flist_sort_and_clean(struct file_list *flist, int flags)
419 {
420 char fbuf[MAXPATHLEN];
421 int i, prev_i;
bf1bd9d4 422@@ -2613,7 +2870,7 @@ static void flist_sort_and_clean(struct file_list *flist, int strip_root)
f9df736a
WD
423 /* If one is a dir and the other is not, we want to
424 * keep the dir because it might have contents in the
425 * list. Otherwise keep the first one. */
426- if (S_ISDIR(file->mode)) {
427+ if (S_ISDIR(file->mode) || flags & CLEAN_KEEP_LAST) {
428 struct file_struct *fp = flist->sorted[j];
429 if (!S_ISDIR(fp->mode))
430 keep = i, drop = j;
bf1bd9d4 431@@ -2629,8 +2886,8 @@ static void flist_sort_and_clean(struct file_list *flist, int strip_root)
f9df736a
WD
432 } else
433 keep = j, drop = i;
434
435- if (!am_sender) {
436- if (verbose > 1) {
437+ if (!am_sender || flags & CLEAN_KEEP_LAST) {
438+ if (verbose > 1 && !(flags & CLEAN_KEEP_LAST)) {
439 rprintf(FINFO,
440 "removing duplicate name %s from file list (%d)\n",
441 f_name(file, fbuf), drop + flist->ndx_start);
bf1bd9d4 442@@ -2652,7 +2909,7 @@ static void flist_sort_and_clean(struct file_list *flist, int strip_root)
f9df736a
WD
443 }
444 flist->high = prev_i;
445
446- if (strip_root) {
447+ if (flags & CLEAN_STRIP_ROOT) {
448 /* We need to strip off the leading slashes for relative
449 * paths, but this must be done _after_ the sorting phase. */
450 for (i = flist->low; i <= flist->high; i++) {
451diff --git a/generator.c b/generator.c
452--- a/generator.c
453+++ b/generator.c
c0c7984e 454@@ -52,6 +52,7 @@ extern int delete_during;
f9df736a
WD
455 extern int delete_after;
456 extern int msgdone_cnt;
457 extern int ignore_errors;
458+extern int checksum_files;
459 extern int remove_source_files;
460 extern int delay_updates;
461 extern int update_only;
abd3adb8 462@@ -715,7 +716,7 @@ void itemize(const char *fnamecmp, struct file_struct *file, int ndx, int statre
f9df736a
WD
463
464
465 /* Perform our quick-check heuristic for determining if a file is unchanged. */
466-int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st)
467+int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st, int slot)
468 {
469 if (st->st_size != F_LENGTH(file))
470 return 0;
abd3adb8 471@@ -724,7 +725,10 @@ int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st)
f9df736a
WD
472 of the file time to determine whether to sync */
473 if (always_checksum > 0 && S_ISREG(st->st_mode)) {
474 char sum[MAX_DIGEST_LEN];
475- file_checksum(fn, sum, st->st_size);
476+ if (checksum_files && slot >= 0)
477+ get_cached_checksum(slot, fn, file, st, sum);
478+ else
479+ file_checksum(fn, st->st_size, sum);
480 return memcmp(sum, F_SUM(file), checksum_len) == 0;
481 }
482
abd3adb8 483@@ -988,7 +992,7 @@ static int try_dests_reg(struct file_struct *file, char *fname, int ndx,
f9df736a
WD
484 match_level = 1;
485 /* FALL THROUGH */
486 case 1:
487- if (!unchanged_file(cmpbuf, file, &sxp->st))
488+ if (!unchanged_file(cmpbuf, file, &sxp->st, j+1))
489 continue;
490 best_match = j;
491 match_level = 2;
abd3adb8 492@@ -1261,7 +1265,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
c0c7984e
WD
493 * --ignore-non-existing, daemon exclude, or mkdir failure. */
494 static struct file_struct *skip_dir = NULL;
f9df736a
WD
495 static struct file_list *fuzzy_dirlist = NULL;
496- static int need_fuzzy_dirlist = 0;
497+ static int need_new_dirscan = 0;
498 struct file_struct *fuzzy_file = NULL;
499 int fd = -1, f_copy = -1;
500 stat_x sx, real_sx;
abd3adb8 501@@ -1350,8 +1354,8 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
f9df736a
WD
502 flist_free(fuzzy_dirlist);
503 fuzzy_dirlist = NULL;
504 }
505- if (fuzzy_basis)
506- need_fuzzy_dirlist = 1;
507+ if (fuzzy_basis || checksum_files)
508+ need_new_dirscan = 1;
509 #ifdef SUPPORT_ACLS
510 if (!preserve_perms)
511 dflt_perms = default_perms_for_dir(dn);
abd3adb8 512@@ -1359,10 +1363,15 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
f9df736a
WD
513 }
514 parent_dirname = dn;
515
516- if (need_fuzzy_dirlist && S_ISREG(file->mode)) {
517- strlcpy(fnamecmpbuf, dn, sizeof fnamecmpbuf);
518- fuzzy_dirlist = get_dirlist(fnamecmpbuf, -1, 1);
519- need_fuzzy_dirlist = 0;
520+ if (need_new_dirscan && S_ISREG(file->mode)) {
521+ if (fuzzy_basis) {
522+ strlcpy(fnamecmpbuf, dn, sizeof fnamecmpbuf);
523+ fuzzy_dirlist = get_dirlist(fnamecmpbuf, -1, 1);
524+ }
525+ if (checksum_files) {
526+ reset_checksum_cache();
527+ }
528+ need_new_dirscan = 0;
529 }
530
531 statret = link_stat(fname, &sx.st, keep_dirlinks && is_dir);
abd3adb8 532@@ -1796,7 +1805,7 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
f9df736a
WD
533 ;
534 else if (fnamecmp_type == FNAMECMP_FUZZY)
535 ;
536- else if (unchanged_file(fnamecmp, file, &sx.st)) {
537+ else if (unchanged_file(fnamecmp, file, &sx.st, fnamecmp_type == FNAMECMP_FNAME ? 0 : -1)) {
538 if (partialptr) {
539 do_unlink(partialptr);
540 handle_partial_dir(partialptr, PDIR_DELETE);
541diff --git a/hlink.c b/hlink.c
542--- a/hlink.c
543+++ b/hlink.c
963ca808 544@@ -390,7 +390,7 @@ int hard_link_check(struct file_struct *file, int ndx, const char *fname,
f9df736a
WD
545 }
546 break;
547 }
548- if (!unchanged_file(cmpbuf, file, &alt_sx.st))
549+ if (!unchanged_file(cmpbuf, file, &alt_sx.st, j+1))
550 continue;
551 statret = 1;
552 if (unchanged_attrs(cmpbuf, file, &alt_sx))
cc3e685d
WD
553diff --git a/ifuncs.h b/ifuncs.h
554--- a/ifuncs.h
555+++ b/ifuncs.h
c0c7984e 556@@ -74,6 +74,12 @@ isDigit(const char *ptr)
213d4328
WD
557 }
558
559 static inline int
560+isXDigit(const char *ptr)
561+{
562+ return isxdigit(*(unsigned char *)ptr);
563+}
564+
565+static inline int
566 isPrint(const char *ptr)
567 {
568 return isprint(*(unsigned char *)ptr);
f9df736a
WD
569diff --git a/loadparm.c b/loadparm.c
570--- a/loadparm.c
571+++ b/loadparm.c
572@@ -149,6 +149,7 @@ typedef struct
573 char *temp_dir;
574 char *uid;
575
576+ int checksum_files;
577 int max_connections;
578 int max_verbosity;
579 int syslog_facility;
580@@ -200,6 +201,7 @@ static service sDefault =
581 /* temp_dir; */ NULL,
582 /* uid; */ NOBODY_USER,
583
584+ /* checksum_files; */ CSF_IGNORE_FILES,
585 /* max_connections; */ 0,
586 /* max_verbosity; */ 1,
587 /* syslog_facility; */ LOG_DAEMON,
588@@ -294,6 +296,12 @@ static struct enum_list enum_facilities[] = {
589 #endif
590 { -1, NULL }};
591
592+static struct enum_list enum_csum_modes[] = {
593+ { CSF_IGNORE_FILES, "none" },
594+ { CSF_LAX_MODE, "lax" },
595+ { CSF_STRICT_MODE, "strict" },
596+ { -1, NULL }
597+};
598
599 /* note that we do not initialise the defaults union - it is not allowed in ANSI C */
600 static struct parm_struct parm_table[] =
601@@ -306,6 +314,7 @@ static struct parm_struct parm_table[] =
602
603 {"auth users", P_STRING, P_LOCAL, &sDefault.auth_users, NULL,0},
604 {"charset", P_STRING, P_LOCAL, &sDefault.charset, NULL,0},
605+ {"checksum files", P_ENUM, P_LOCAL, &sDefault.checksum_files, enum_csum_modes,0},
606 {"comment", P_STRING, P_LOCAL, &sDefault.comment, NULL,0},
607 {"dont compress", P_STRING, P_LOCAL, &sDefault.dont_compress, NULL,0},
608 {"exclude from", P_STRING, P_LOCAL, &sDefault.exclude_from, NULL,0},
609@@ -423,6 +432,7 @@ FN_LOCAL_STRING(lp_secrets_file, secrets_file)
610 FN_LOCAL_STRING(lp_temp_dir, temp_dir)
611 FN_LOCAL_STRING(lp_uid, uid)
612
613+FN_LOCAL_INTEGER(lp_checksum_files, checksum_files)
614 FN_LOCAL_INTEGER(lp_max_connections, max_connections)
615 FN_LOCAL_INTEGER(lp_max_verbosity, max_verbosity)
616 FN_LOCAL_INTEGER(lp_syslog_facility, syslog_facility)
617diff --git a/options.c b/options.c
618--- a/options.c
619+++ b/options.c
c0c7984e 620@@ -113,6 +113,7 @@ size_t bwlimit_writemax = 0;
f9df736a
WD
621 int ignore_existing = 0;
622 int ignore_non_existing = 0;
623 int need_messages_from_generator = 0;
624+int checksum_files = CSF_IGNORE_FILES;
625 int max_delete = INT_MIN;
626 OFF_T max_size = 0;
627 OFF_T min_size = 0;
abd3adb8 628@@ -318,6 +319,7 @@ void usage(enum logcode F)
f9df736a
WD
629 rprintf(F," -q, --quiet suppress non-error messages\n");
630 rprintf(F," --no-motd suppress daemon-mode MOTD (see manpage caveat)\n");
631 rprintf(F," -c, --checksum skip based on checksum, not mod-time & size\n");
632+ rprintf(F," --sumfiles=MODE use .rsyncsums to speedup --checksum mode\n");
633 rprintf(F," -a, --archive archive mode; equals -rlptgoD (no -H,-A,-X)\n");
634 rprintf(F," --no-OPTION turn off an implied OPTION (e.g. --no-D)\n");
635 rprintf(F," -r, --recursive recurse into directories\n");
abd3adb8 636@@ -447,7 +449,7 @@ enum {OPT_VERSION = 1000, OPT_DAEMON, OPT_SENDER, OPT_EXCLUDE, OPT_EXCLUDE_FROM,
f9df736a
WD
637 OPT_FILTER, OPT_COMPARE_DEST, OPT_COPY_DEST, OPT_LINK_DEST, OPT_HELP,
638 OPT_INCLUDE, OPT_INCLUDE_FROM, OPT_MODIFY_WINDOW, OPT_MIN_SIZE, OPT_CHMOD,
639 OPT_READ_BATCH, OPT_WRITE_BATCH, OPT_ONLY_WRITE_BATCH, OPT_MAX_SIZE,
640- OPT_NO_D, OPT_APPEND, OPT_NO_ICONV,
641+ OPT_NO_D, OPT_APPEND, OPT_NO_ICONV, OPT_SUMFILES,
642 OPT_SERVER, OPT_REFUSED_BASE = 9000};
643
644 static struct poptOption long_options[] = {
abd3adb8 645@@ -575,6 +577,7 @@ static struct poptOption long_options[] = {
f9df736a
WD
646 {"checksum", 'c', POPT_ARG_VAL, &always_checksum, 1, 0, 0 },
647 {"no-checksum", 0, POPT_ARG_VAL, &always_checksum, 0, 0, 0 },
648 {"no-c", 0, POPT_ARG_VAL, &always_checksum, 0, 0, 0 },
649+ {"sumfiles", 0, POPT_ARG_STRING, 0, OPT_SUMFILES, 0, 0 },
650 {"block-size", 'B', POPT_ARG_LONG, &block_size, 0, 0, 0 },
651 {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
652 {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
abd3adb8 653@@ -1229,6 +1232,23 @@ int parse_arguments(int *argc_p, const char ***argv_p)
f9df736a
WD
654 }
655 break;
656
657+ case OPT_SUMFILES:
658+ arg = poptGetOptArg(pc);
659+ checksum_files = 0;
660+ if (strcmp(arg, "lax") == 0)
661+ checksum_files |= CSF_LAX_MODE;
662+ else if (strcmp(arg, "strict") == 0)
663+ checksum_files |= CSF_STRICT_MODE;
664+ else if (strcmp(arg, "none") == 0)
665+ checksum_files = CSF_IGNORE_FILES;
666+ else {
667+ snprintf(err_buf, sizeof err_buf,
668+ "Invalid argument passed to --sumfiles (%s)\n",
669+ arg);
670+ return 0;
671+ }
672+ break;
673+
674 case OPT_HELP:
675 usage(FINFO);
676 exit_cleanup(0);
abd3adb8 677@@ -1333,6 +1353,9 @@ int parse_arguments(int *argc_p, const char ***argv_p)
f9df736a
WD
678 }
679 #endif
680
c0c7984e 681+ if (!always_checksum)
f9df736a
WD
682+ checksum_files = CSF_IGNORE_FILES;
683+
684 if (write_batch && read_batch) {
685 snprintf(err_buf, sizeof err_buf,
686 "--write-batch and --read-batch can not be used together\n");
687diff --git a/rsync.h b/rsync.h
688--- a/rsync.h
689+++ b/rsync.h
ae306a29 690@@ -692,6 +692,10 @@ extern int xattrs_ndx;
f9df736a
WD
691 #define F_SUM(f) ((char*)OPT_EXTRA(f, LEN64_BUMP(f) + HLINK_BUMP(f) \
692 + SUM_EXTRA_CNT - 1))
693
694+/* These are only valid on an entry read from a checksum file. */
695+#define F_CTIME(f) OPT_EXTRA(f, LEN64_BUMP(f) + SUM_EXTRA_CNT)->unum
696+#define F_INODE(f) OPT_EXTRA(f, LEN64_BUMP(f) + SUM_EXTRA_CNT + 1)->unum
697+
698 /* Some utility defines: */
699 #define F_IS_ACTIVE(f) (f)->basename[0]
700 #define F_IS_HLINKED(f) ((f)->flags & FLAG_HLINKED)
963ca808 701@@ -879,6 +883,13 @@ typedef struct {
f9df736a
WD
702 char fname[1]; /* has variable size */
703 } relnamecache;
704
705+#define CSF_ENABLE (1<<1)
706+#define CSF_LAX (1<<2)
707+
708+#define CSF_IGNORE_FILES 0
709+#define CSF_LAX_MODE (CSF_ENABLE|CSF_LAX)
710+#define CSF_STRICT_MODE (CSF_ENABLE)
711+
712 #include "byteorder.h"
713 #include "lib/mdigest.h"
714 #include "lib/wildmatch.h"
715diff --git a/rsync.yo b/rsync.yo
716--- a/rsync.yo
717+++ b/rsync.yo
abd3adb8 718@@ -321,6 +321,7 @@ to the detailed description below for a complete description. verb(
f9df736a
WD
719 -q, --quiet suppress non-error messages
720 --no-motd suppress daemon-mode MOTD (see caveat)
721 -c, --checksum skip based on checksum, not mod-time & size
722+ --sumfiles=MODE use .rsyncsums to speedup --checksum mode
723 -a, --archive archive mode; equals -rlptgoD (no -H,-A,-X)
724 --no-OPTION turn off an implied OPTION (e.g. --no-D)
725 -r, --recursive recurse into directories
abd3adb8 726@@ -520,9 +521,9 @@ uses a "quick check" that (by default) checks if each file's size and time
f9df736a
WD
727 of last modification match between the sender and receiver. This option
728 changes this to compare a 128-bit MD4 checksum for each file that has a
729 matching size. Generating the checksums means that both sides will expend
730-a lot of disk I/O reading all the data in the files in the transfer (and
731-this is prior to any reading that will be done to transfer changed files),
732-so this can slow things down significantly.
733+a lot of disk I/O reading the data in all the files in the transfer, so
734+this can slow things down significantly (and this is prior to any reading
735+that will be done to transfer the files that have changed).
736
737 The sending side generates its checksums while it is doing the file-system
738 scan that builds the list of the available files. The receiver generates
abd3adb8 739@@ -530,12 +531,44 @@ its checksums when it is scanning for changed files, and will checksum any
f9df736a
WD
740 file that has the same size as the corresponding sender's file: files with
741 either a changed size or a changed checksum are selected for transfer.
742
743+See also the bf(--sumfiles) option for a way to use cached checksum data.
744+
745 Note that rsync always verifies that each em(transferred) file was
746 correctly reconstructed on the receiving side by checking a whole-file
747 checksum that is generated as the file is transferred, but that
748 automatic after-the-transfer verification has nothing to do with this
749 option's before-the-transfer "Does this file need to be updated?" check.
750
751+dit(bf(--sumfiles=MODE)) This option tells rsync to make use of any cached
752+checksum information it finds in per-directory .rsyncsums files when the
753+current transfer is using the bf(--checksum) option. If the checksum data
754+is up-to-date, it is used instead of recomputing it, saving both disk I/O
755+and CPU time. If the checksum data is missing or outdated, the checksum is
756+computed just as it would be if bf(--sumfiles) was not specified.
757+
758+The MODE value is either "lax", for relaxed checking (which compares size
759+and mtime), "strict" (which also compares ctime and inode), or "none" to
760+ignore any .rsyncsums files ("none" is the default). Rsync does not create
761+or update these files, but there is a perl script in the support directory
762+named "rsyncsums" that can be used for that.
763+
764+This option has no effect unless bf(--checksum, -c) was also specified. It
765+also only affects the current side of the transfer, so if you want the
766+remote side to parse its own .rsyncsums files, specify the option via the
767+bf(--rsync-path) option (e.g. "--rsync-path="rsync --sumfiles=lax").
768+
769+To avoid transferring the system's checksum files, you can use an exclude
770+(e.g. bf(--exclude=.rsyncsums)). To make this easier to type, you can use
771+a popt alias. For instance, adding the following line in your ~/.popt file
772+defines a bf(--cc) option that enables lax checksum files and excludes the
773+checksum files:
774+
775+verb( rsync alias --cc -c --sumfiles=lax --exclude=.rsyncsums)
776+
777+An rsync daemon does not allow the client to control this setting, so see
778+the "checksum files" daemon parameter for information on how to make a
779+daemon use cached checksum data.
780+
781 dit(bf(-a, --archive)) This is equivalent to bf(-rlptgoD). It is a quick
782 way of saying you want recursion and want to preserve almost
783 everything (with -H being a notable omission).
784diff --git a/rsyncd.conf.yo b/rsyncd.conf.yo
785--- a/rsyncd.conf.yo
786+++ b/rsyncd.conf.yo
787@@ -281,6 +281,17 @@ locking on this file to ensure that the max connections limit is not
788 exceeded for the modules sharing the lock file.
789 The default is tt(/var/run/rsyncd.lock).
790
e66d6d51 791+dit(bf(checksum files)) This parameter tells rsync to make use of any cached
f9df736a
WD
792+checksum information it finds in per-directory .rsyncsums files when the
793+current transfer is using the bf(--checksum) option. The value can be set
794+to either "lax", "strict", or "none" -- see the client's bf(--sumfiles)
795+option for what these choices do.
796+
797+Note also that the client's command-line option, bf(--sumfiles), has no
798+effect on a daemon. A daemon will only access checksum files if this
799+config option tells it to. See also the bf(exclude) directive for a way
800+to hide the .rsyncsums files from the user.
801+
e66d6d51 802 dit(bf(read only)) This parameter determines whether clients
f9df736a
WD
803 will be able to upload files or not. If "read only" is true then any
804 attempted uploads will fail. If "read only" is false then uploads will
cc3e685d 805diff --git a/support/rsyncsums b/support/rsyncsums
f9df736a 806new file mode 100755
cc3e685d
WD
807--- /dev/null
808+++ b/support/rsyncsums
0ef5abcb
WD
809@@ -0,0 +1,202 @@
810+#!/usr/bin/perl
213d4328 811+use strict;
0ef5abcb 812+use warnings;
213d4328
WD
813+
814+use Getopt::Long;
815+use Cwd qw(abs_path cwd);
816+use Digest::MD4;
817+use Digest::MD5;
818+
819+our $SUMS_FILE = '.rsyncsums';
820+
821+&Getopt::Long::Configure('bundling');
822+&usage if !&GetOptions(
213d4328 823+ 'recurse|r' => \( my $recurse_opt ),
f9df736a 824+ 'mode|m=s' => \( my $cmp_mode = 'strict' ),
d1a75c9f 825+ 'check|c' => \( my $check_opt ),
213d4328
WD
826+ 'verbose|v+' => \( my $verbosity = 0 ),
827+ 'help|h' => \( my $help_opt ),
828+);
f9df736a
WD
829+&usage if $help_opt || $cmp_mode !~ /^(lax|strict)$/;
830+
831+my $ignore_ctime_and_inode = $cmp_mode eq 'lax' ? 0 : 1;
213d4328
WD
832+
833+my $start_dir = cwd();
834+
835+my @dirs = @ARGV;
836+@dirs = '.' unless @dirs;
837+foreach (@dirs) {
838+ $_ = abs_path($_);
839+}
840+
841+$| = 1;
842+
d1a75c9f
WD
843+my $exit_code = 0;
844+
213d4328
WD
845+my $md4 = Digest::MD4->new;
846+my $md5 = Digest::MD5->new;
847+
848+while (@dirs) {
849+ my $dir = shift @dirs;
850+
851+ if (!chdir($dir)) {
852+ warn "Unable to chdir to $dir: $!\n";
853+ next;
854+ }
855+ if (!opendir(DP, '.')) {
856+ warn "Unable to opendir $dir: $!\n";
857+ next;
858+ }
859+
d1a75c9f
WD
860+ my $reldir = $dir;
861+ $reldir =~ s#^$start_dir(/|$)# $1 ? '' : '.' #eo;
213d4328 862+ if ($verbosity) {
213d4328 863+ print "$reldir ... ";
d1a75c9f 864+ print "\n" if $check_opt;
213d4328
WD
865+ }
866+
213d4328 867+ my %cache;
d1a75c9f
WD
868+ my $f_cnt = 0;
869+ if (open(FP, '<', $SUMS_FILE)) {
870+ while (<FP>) {
871+ chomp;
872+ my($sum4, $sum5, $size, $mtime, $ctime, $inode, $fn) = split(' ', $_, 7);
873+ $cache{$fn} = [ 0, $sum4, $sum5, $size, $mtime, $ctime & 0xFFFFFFFF, $inode & 0xFFFFFFFF ];
874+ $f_cnt++;
875+ }
876+ close FP;
877+ }
878+
213d4328 879+ my @subdirs;
d1a75c9f
WD
880+ my $d_cnt = 0;
881+ my $update_cnt = 0;
213d4328
WD
882+ while (defined(my $fn = readdir(DP))) {
883+ next if $fn =~ /^\.\.?$/ || $fn =~ /^\Q$SUMS_FILE\E$/o || -l $fn;
884+ if (-d _) {
885+ push(@subdirs, "$dir/$fn") unless $fn =~ /^(CVS|\.svn|\.git|\.bzr)$/;
886+ next;
887+ }
888+ next unless -f _;
889+
890+ my($size,$mtime,$ctime,$inode) = (stat(_))[7,9,10,1];
f9df736a
WD
891+ $ctime &= 0xFFFFFFFF;
892+ $inode &= 0xFFFFFFFF;
d1a75c9f 893+ my $ref = $cache{$fn};
d1a75c9f 894+ $d_cnt++;
213d4328 895+
d1a75c9f 896+ if (!$check_opt) {
213d4328 897+ if (defined $ref) {
d1a75c9f
WD
898+ $$ref[0] = 1;
899+ if ($$ref[3] == $size
900+ && $$ref[4] == $mtime
901+ && ($ignore_ctime_and_inode || ($$ref[5] == $ctime && $$ref[6] == $inode))
902+ && $$ref[1] !~ /=/ && $$ref[2] !~ /=/) {
903+ next;
213d4328 904+ }
d1a75c9f
WD
905+ }
906+ if (!$update_cnt++) {
907+ print "UPDATING\n" if $verbosity;
213d4328
WD
908+ }
909+ }
213d4328 910+
d1a75c9f
WD
911+ if (!open(IN, $fn)) {
912+ print STDERR "Unable to read $fn: $!\n";
913+ if (defined $ref) {
213d4328 914+ delete $cache{$fn};
d1a75c9f 915+ $f_cnt--;
213d4328 916+ }
d1a75c9f
WD
917+ next;
918+ }
213d4328 919+
d1a75c9f
WD
920+ my($sum4, $sum5);
921+ while (1) {
922+ while (sysread(IN, $_, 64*1024)) {
923+ $md4->add($_);
924+ $md5->add($_);
213d4328 925+ }
d1a75c9f
WD
926+ $sum4 = $md4->hexdigest;
927+ $sum5 = $md5->hexdigest;
928+ print " $sum4 $sum5" if $verbosity > 2;
929+ print " $fn" if $verbosity > 1;
930+ my($size2,$mtime2,$ctime2,$inode2) = (stat(IN))[7,9,10,1];
f9df736a
WD
931+ $ctime2 &= 0xFFFFFFFF;
932+ $inode2 &= 0xFFFFFFFF;
d1a75c9f
WD
933+ last if $size == $size2 && $mtime == $mtime2
934+ && ($ignore_ctime_and_inode || ($ctime == $ctime2 && $inode == $inode2));
935+ $size = $size2;
936+ $mtime = $mtime2;
937+ $ctime = $ctime2;
938+ $inode = $inode2;
939+ sysseek(IN, 0, 0);
940+ print " REREADING\n" if $verbosity > 1;
941+ }
213d4328 942+
d1a75c9f
WD
943+ close IN;
944+
945+ if ($check_opt) {
946+ my $dif;
947+ if (!defined $ref) {
948+ $dif = 'MISSING';
949+ } elsif ($sum4 ne $$ref[1] || $sum5 ne $$ref[2]) {
950+ $dif = 'FAILED';
951+ } else {
952+ print " OK\n" if $verbosity > 1;
953+ next;
954+ }
955+ if ($verbosity < 2) {
956+ print $verbosity ? ' ' : "$reldir/";
957+ print $fn;
213d4328 958+ }
d1a75c9f
WD
959+ print " $dif\n";
960+ $exit_code = 1;
961+ } else {
962+ print "\n" if $verbosity > 1;
f9df736a 963+ $cache{$fn} = [ 1, $sum4, $sum5, $size, $mtime, $ctime, $inode ];
d1a75c9f
WD
964+ }
965+ }
966+
967+ closedir DP;
213d4328 968+
d1a75c9f
WD
969+ unshift(@dirs, sort @subdirs) if $recurse_opt;
970+
971+ if ($check_opt) {
972+ ;
973+ } elsif ($d_cnt == 0) {
974+ if ($f_cnt) {
975+ print "(removed $SUMS_FILE) " if $verbosity;
976+ unlink($SUMS_FILE);
213d4328 977+ }
d1a75c9f
WD
978+ print "empty\n" if $verbosity;
979+ } elsif ($update_cnt || $d_cnt != $f_cnt) {
980+ print "UPDATING\n" if $verbosity && !$update_cnt;
981+ open(FP, '>', $SUMS_FILE) or die "Unable to write $dir/$SUMS_FILE: $!\n";
213d4328 982+
213d4328
WD
983+ foreach my $fn (sort keys %cache) {
984+ my $ref = $cache{$fn};
d1a75c9f
WD
985+ my($found, $sum4, $sum5, $size, $mtime, $ctime, $inode) = @$ref;
986+ next unless $found;
213d4328
WD
987+ printf FP '%s %s %10d %10d %10d %10d %s' . "\n", $sum4, $sum5, $size, $mtime, $ctime, $inode, $fn;
988+ }
d1a75c9f 989+ close FP;
213d4328
WD
990+ } else {
991+ print "ok\n" if $verbosity;
992+ }
213d4328
WD
993+}
994+
d1a75c9f
WD
995+exit $exit_code;
996+
213d4328
WD
997+sub usage
998+{
999+ die <<EOT;
1000+Usage: rsyncsums [OPTIONS] [DIRS]
1001+
1002+Options:
1003+ -r, --recurse Update $SUMS_FILE files in subdirectories too.
f9df736a
WD
1004+ -m, --mode=MODE Compare entries in either "lax" or "strict" mode. Using
1005+ "lax" compares size and mtime, while "strict" additionally
1006+ compares ctime and inode. Default: strict.
d1a75c9f 1007+ -c, --check Check if the checksums are right (doesn't update).
213d4328
WD
1008+ -v, --verbose Mention what we're doing. Repeat for more info.
1009+ -h, --help Display this help message.
1010+EOT
1011+}