1 This adds a sender optimization feature that allows a cache of checksums
2 to be used when the client specifies the --checksum option, and creates
3 and/or updates the .rsyncsums files when --checksum-updating is
6 To use this patch, run these commands for a successful build:
8 patch -p1 <patches/checksum-updating.diff
9 ./configure (optional if already run)
12 TODO: when sending individual files (as opposed to an entire directory),
13 we should still update the .rsyncsums file if we compute a new checksum.
14 (The file is currently only written if we send an entire dir.)
16 --- old/clientserver.c
17 +++ new/clientserver.c
18 @@ -37,6 +37,7 @@ extern int sanitize_paths;
19 extern int filesfrom_fd;
20 extern int remote_protocol;
21 extern int protocol_version;
22 +extern int checksum_updating;
23 extern int io_timeout;
25 extern int default_af_hint;
26 @@ -634,6 +635,8 @@ static int rsync_module(int f_in, int f_
27 else if (am_root < 0) /* Treat --fake-super from client as --super. */
30 + checksum_updating = lp_checksum_updating(i);
32 if (filesfrom_fd == 0)
45 @@ -57,6 +58,7 @@ extern int implied_dirs;
46 extern int file_extra_cnt;
47 extern int ignore_perishable;
48 extern int non_perishable_cnt;
49 +extern int checksum_updating;
50 extern int prune_empty_dirs;
51 extern int copy_links;
52 extern int copy_unsafe_links;
53 @@ -79,6 +81,9 @@ extern iconv_t ic_send, ic_recv;
55 #define PTR_SIZE (sizeof (struct file_struct *))
57 +#define FLAG_SUM_MISSING (1<<1)
58 +#define FLAG_SUM_FOUND (1<<2)
62 dev_t filesystem_dev; /* used to implement -x */
63 @@ -101,6 +106,8 @@ static char tmp_sum[MAX_DIGEST_LEN];
64 static char empty_sum[MAX_DIGEST_LEN];
65 static int flist_count_offset; /* for --delete --progress */
66 static int dir_count = 0;
67 +static struct file_list *checksum_flist = NULL;
68 +static int checksum_matches = 0;
70 static void clean_flist(struct file_list *flist, int strip_root);
71 static void output_flist(struct file_list *flist);
72 @@ -317,6 +324,259 @@ static void flist_done_allocating(struct
73 flist->pool_boundary = ptr;
76 +/* The len count is the length of the basename + 1 for the null. */
77 +static void add_checksum(const char *dirname, const char *basename, int len,
78 + OFF_T file_length, time_t mtime, const char *sum,
79 + const char *alt_sum, int flags)
81 + struct file_struct *file;
82 + int alloc_len, extra_len;
85 + if (len == 10+1 && *basename == '.' && strcmp(basename, ".rsyncsums") == 0)
89 + len = strlen(basename) + 1;
91 + extra_len = (file_extra_cnt + (file_length > 0xFFFFFFFFu) + SUM_EXTRA_CNT)
93 +#if EXTRA_ROUNDING > 0
94 + if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN))
95 + extra_len = (extra_len | (EXTRA_ROUNDING * EXTRA_LEN)) + EXTRA_LEN;
97 + alloc_len = FILE_STRUCT_LEN + extra_len + len + checksum_len*2 + 1;
98 + bp = pool_alloc(checksum_flist->file_pool, alloc_len, "add_checksum");
100 + memset(bp, 0, extra_len + FILE_STRUCT_LEN);
102 + file = (struct file_struct *)bp;
103 + bp += FILE_STRUCT_LEN;
105 + memcpy(bp, basename, len);
107 + strlcpy(bp+len, alt_sum, checksum_len*2 + 1);
109 + memset(bp+len, '=', checksum_len*2);
110 + bp[len+checksum_len*2] = '\0';
113 + file->flags = flags;
114 + file->mode = S_IFREG;
115 + file->modtime = mtime;
116 + file->len32 = (uint32)file_length;
117 + if (file_length > 0xFFFFFFFFu) {
118 + file->flags |= FLAG_LENGTH64;
119 + OPT_EXTRA(file, 0)->unum = (uint32)(file_length >> 32);
121 + file->dirname = dirname;
122 + bp = (char*)F_SUM(file);
123 + memcpy(bp, sum, checksum_len);
125 + flist_expand(checksum_flist, 1);
126 + checksum_flist->files[checksum_flist->count++] = file;
128 + checksum_flist->sorted = checksum_flist->files;
131 +/* The direname value must remain unchanged during the lifespan of the
132 + * created checksum_flist object because we use it directly. */
133 +static void read_checksums(const char *dirname)
135 + char line[MAXPATHLEN+1024], fbuf[MAXPATHLEN], sum[MAX_DIGEST_LEN];
136 + const char *alt_sum = NULL;
139 + int len, dlen, i, flags;
143 + if (checksum_flist) {
144 + /* Reset the pool memory and empty the file-list array. */
145 + pool_free_old(checksum_flist->file_pool,
146 + pool_boundary(checksum_flist->file_pool, 0));
147 + checksum_flist->count = 0;
149 + checksum_flist = flist_new(FLIST_TEMP, "read_checksums");
151 + checksum_flist->low = 0;
152 + checksum_flist->high = -1;
153 + checksum_matches = 0;
156 + dlen = strlcpy(fbuf, dirname, sizeof fbuf);
157 + if (dlen >= (int)sizeof fbuf)
159 + fbuf[dlen++] = '/';
162 + strlcpy(fbuf+dlen, ".rsyncsums", sizeof fbuf - dlen);
163 + if (!(fp = fopen(fbuf, "r")))
166 + while (fgets(line, sizeof line, fp)) {
168 + if (protocol_version >= 30) {
171 + while (*++cp == '=') {}
173 + while (isXDigit(cp)) cp++;
174 + if (cp - alt_sum != MD4_DIGEST_LEN*2 || *cp != ' ')
176 + while (*++cp == ' ') {}
180 + for (i = 0; i < checksum_len*2; i++, cp++) {
186 + memset(sum, 0, checksum_len);
187 + flags = FLAG_SUM_MISSING;
189 + for (i = 0; i < checksum_len*2; i++, cp++) {
191 + if (isXDigit(cp)) {
195 + x = (*cp & 0xF) + 9;
209 + while (*++cp == ' ') {}
211 + if (protocol_version < 30) {
214 + while (*++cp == '=') {}
216 + while (isXDigit(cp)) cp++;
217 + if (cp - alt_sum != MD5_DIGEST_LEN*2 || *cp != ' ')
219 + while (*++cp == ' ') {}
223 + while (isDigit(cp))
224 + file_length = file_length * 10 + *cp++ - '0';
227 + while (*++cp == ' ') {}
230 + while (isDigit(cp))
231 + mtime = mtime * 10 + *cp++ - '0';
234 + while (*++cp == ' ') {}
237 + while (len && (cp[len-1] == '\n' || cp[len-1] == '\r'))
241 + cp[len++] = '\0'; /* len now counts the null */
242 + if (strchr(cp, '/') || len > MAXPATHLEN)
245 + strlcpy(fbuf+dlen, cp, sizeof fbuf - dlen);
246 + if (is_excluded(fbuf, 0, ALL_FILTERS)) {
247 + flags |= FLAG_SUM_FOUND;
248 + checksum_matches++;
251 + add_checksum(dirname, cp, len, file_length, mtime,
252 + sum, alt_sum, flags);
256 + clean_flist(checksum_flist, 0);
259 +static void write_checksums(const char *dirname)
261 + char buf[MAXPATHLEN+1024];
262 + int count = checksum_flist->count;
263 + int new_entries = count > checksum_flist->high + 1;
264 + int orphan_entires = count != checksum_matches;
269 + for (i = checksum_flist->high + 1; i < count; i++) {
270 + struct file_struct *file = checksum_flist->sorted[i];
271 + file->flags |= FLAG_SUM_FOUND;
274 + clean_flist(checksum_flist, 0);
275 + checksum_flist->count = 0;
276 + checksum_matches = 0;
282 + if (pathjoin(buf, sizeof buf, dirname, ".rsyncsums") >= sizeof buf)
285 + strlcpy(buf, ".rsyncsums", sizeof buf);
287 + if (checksum_flist->high - checksum_flist->low < 0) {
292 + if (!new_entries && !orphan_entires)
295 + if (!(out_fp = fopen(buf, "w")))
298 + for (i = checksum_flist->low; i <= checksum_flist->high; i++) {
299 + struct file_struct *file = checksum_flist->sorted[i];
300 + const char *cp = F_SUM(file);
301 + const char *end = cp + checksum_len;
302 + if (!(file->flags & FLAG_SUM_FOUND))
304 + if (protocol_version >= 30) {
305 + fprintf(out_fp, "%s ",
306 + file->basename + strlen(file->basename) + 1);
308 + if (file->flags & FLAG_SUM_MISSING) {
310 + fprintf(out_fp, "==");
311 + } while (++cp != end);
314 + fprintf(out_fp, "%02x", CVAL(cp, 0));
315 + } while (++cp != end);
317 + if (protocol_version < 30) {
318 + fprintf(out_fp, " %s",
319 + file->basename + strlen(file->basename) + 1);
321 + fprintf(out_fp, " %10.0f %10ld %s\n",
322 + (double)F_LENGTH(file), (long)file->modtime,
329 int push_pathname(const char *dir, int len)
332 @@ -973,34 +1233,24 @@ static struct file_struct *recv_file_ent
337 - * Create a file_struct for a named file by reading its stat()
338 - * information and performing extensive checks against global
341 - * @return the new file, or NULL if there was an error or this file
342 - * should be excluded.
343 +/* Create a file_struct for a named file by reading its stat() information
344 + * and performing extensive checks against global options.
346 - * @todo There is a small optimization opportunity here to avoid
347 - * stat()ing the file in some circumstances, which has a certain cost.
348 - * We are called immediately after doing readdir(), and so we may
349 - * already know the d_type of the file. We could for example avoid
350 - * statting directories if we're not recursing, but this is not a very
351 - * important case. Some systems may not have d_type.
353 + * Returns a pointer to the new file struct, or NULL if there was an error
354 + * or this file should be excluded. */
355 struct file_struct *make_file(const char *fname, struct file_list *flist,
356 STRUCT_STAT *stp, int flags, int filter_level)
358 static char *lastdir;
359 - static int lastdir_len = -1;
360 + static int lastdir_len = -2;
361 struct file_struct *file;
363 char thisname[MAXPATHLEN];
364 char linkname[MAXPATHLEN];
365 int alloc_len, basename_len, linkname_len;
366 int extra_len = file_extra_cnt * EXTRA_LEN;
367 const char *basename;
372 if (strlcpy(thisname, fname, sizeof thisname)
373 @@ -1115,9 +1365,16 @@ struct file_struct *make_file(const char
374 memcpy(lastdir, thisname, len);
377 + if (always_checksum && am_sender && flist)
378 + read_checksums(lastdir);
383 + if (always_checksum && am_sender && flist && lastdir_len == -2) {
385 + read_checksums(NULL);
388 basename_len = strlen(basename) + 1; /* count the '\0' */
391 @@ -1193,11 +1450,36 @@ struct file_struct *make_file(const char
395 - if (always_checksum && am_sender && S_ISREG(st.st_mode))
396 - file_checksum(thisname, tmp_sum, st.st_size);
398 F_PATHNAME(file) = pathname;
400 + if (always_checksum && am_sender && S_ISREG(st.st_mode)) {
402 + if (flist && (j = flist_find(checksum_flist, file)) >= 0) {
403 + struct file_struct *fp = checksum_flist->sorted[j];
404 + if (fp->modtime == st.st_mtime && F_LENGTH(fp) == st.st_size) {
405 + if (fp->flags & FLAG_SUM_MISSING) {
406 + fp->flags &= ~FLAG_SUM_MISSING;
407 + file_checksum(thisname, tmp_sum, st.st_size);
408 + memcpy((char*)F_SUM(fp), tmp_sum, MAX_DIGEST_LEN);
410 + checksum_matches++;
411 + memcpy(tmp_sum, F_SUM(fp), MAX_DIGEST_LEN);
413 + fp->flags |= FLAG_SUM_FOUND;
416 + goto compute_checksum;
420 + file_checksum(thisname, tmp_sum, st.st_size);
421 + if (checksum_updating && flist) {
422 + add_checksum(file->dirname, basename, basename_len,
423 + st.st_size, st.st_mtime, tmp_sum, NULL, 0);
428 /* This code is only used by the receiver when it is building
429 * a list of files for a delete pass. */
430 if (keep_dirlinks && linkname_len && flist) {
431 @@ -1241,14 +1523,14 @@ void unmake_file(struct file_struct *fil
433 static struct file_struct *send_file_name(int f, struct file_list *flist,
434 char *fname, STRUCT_STAT *stp,
435 - int flags, int filter_flags)
436 + int flags, int filter_level)
438 struct file_struct *file;
439 #if defined SUPPORT_ACLS || defined SUPPORT_XATTRS
443 - file = make_file(fname, flist, stp, flags, filter_flags);
444 + file = make_file(fname, flist, stp, flags, filter_level);
448 @@ -1442,7 +1724,7 @@ static void send_directory(int f, struct
450 int divert_dirs = (flags & FLAG_DIVERT_DIRS) != 0;
451 int start = flist->count;
452 - int filter_flags = f == -2 ? SERVER_FILTERS : ALL_FILTERS;
453 + int filter_level = f == -2 ? SERVER_FILTERS : ALL_FILTERS;
455 assert(flist != NULL);
457 @@ -1471,7 +1753,7 @@ static void send_directory(int f, struct
461 - send_file_name(f, flist, fbuf, NULL, flags, filter_flags);
462 + send_file_name(f, flist, fbuf, NULL, flags, filter_level);
466 @@ -1483,6 +1765,9 @@ static void send_directory(int f, struct
470 + if (checksum_updating && always_checksum && am_sender && f >= 0)
471 + write_checksums(fbuf);
473 if (f >= 0 && recurse && !divert_dirs) {
474 int i, end = flist->count - 1;
475 /* send_if_directory() bumps flist->count, so use "end". */
476 @@ -2206,7 +2491,7 @@ void flist_free(struct file_list *flist)
478 if (!flist->prev || !flist_cnt)
479 pool_destroy(flist->file_pool);
481 + else if (flist->pool_boundary)
482 pool_free_old(flist->file_pool, flist->pool_boundary);
484 if (flist->sorted && flist->sorted != flist->files)
485 @@ -2225,6 +2510,7 @@ static void clean_flist(struct file_list
488 if (flist->count == 0) {
495 @@ -149,6 +149,7 @@ typedef struct
499 + BOOL checksum_updating;
502 BOOL ignore_nonreadable;
503 @@ -197,6 +198,7 @@ static service sDefault =
504 /* syslog_facility; */ LOG_DAEMON,
507 + /* checksum_updating; */ False,
508 /* fake_super; */ False,
509 /* ignore_errors; */ False,
510 /* ignore_nonreadable; */ False,
511 @@ -313,6 +315,7 @@ static struct parm_struct parm_table[] =
512 {"lock file", P_STRING, P_LOCAL, &sDefault.lock_file, NULL,0},
513 {"log file", P_STRING, P_LOCAL, &sDefault.log_file, NULL,0},
514 {"log format", P_STRING, P_LOCAL, &sDefault.log_format, NULL,0},
515 + {"checksum updating", P_BOOL, P_LOCAL, &sDefault.checksum_updating, NULL,0},
516 {"max connections", P_INTEGER,P_LOCAL, &sDefault.max_connections, NULL,0},
517 {"max verbosity", P_INTEGER,P_LOCAL, &sDefault.max_verbosity, NULL,0},
518 {"name", P_STRING, P_LOCAL, &sDefault.name, NULL,0},
519 @@ -418,6 +421,7 @@ FN_LOCAL_BOOL(lp_fake_super, fake_super)
520 FN_LOCAL_BOOL(lp_ignore_errors, ignore_errors)
521 FN_LOCAL_BOOL(lp_ignore_nonreadable, ignore_nonreadable)
522 FN_LOCAL_BOOL(lp_list, list)
523 +FN_LOCAL_BOOL(lp_checksum_updating, checksum_updating)
524 FN_LOCAL_BOOL(lp_read_only, read_only)
525 FN_LOCAL_BOOL(lp_strict_modes, strict_modes)
526 FN_LOCAL_BOOL(lp_transfer_logging, transfer_logging)
529 @@ -109,6 +109,7 @@ size_t bwlimit_writemax = 0;
530 int ignore_existing = 0;
531 int ignore_non_existing = 0;
532 int need_messages_from_generator = 0;
533 +int checksum_updating = 0;
537 @@ -302,6 +303,7 @@ void usage(enum logcode F)
538 rprintf(F," -q, --quiet suppress non-error messages\n");
539 rprintf(F," --no-motd suppress daemon-mode MOTD (see manpage caveat)\n");
540 rprintf(F," -c, --checksum skip based on checksum, not mod-time & size\n");
541 + rprintf(F," --checksum-updating sender updates .rsyncsums files\n");
542 rprintf(F," -a, --archive archive mode; equals -rlptgoD (no -H,-A,-X)\n");
543 rprintf(F," --no-OPTION turn off an implied OPTION (e.g. --no-D)\n");
544 rprintf(F," -r, --recursive recurse into directories\n");
545 @@ -542,6 +544,7 @@ static struct poptOption long_options[]
546 {"checksum", 'c', POPT_ARG_VAL, &always_checksum, 1, 0, 0 },
547 {"no-checksum", 0, POPT_ARG_VAL, &always_checksum, 0, 0, 0 },
548 {"no-c", 0, POPT_ARG_VAL, &always_checksum, 0, 0, 0 },
549 + {"checksum-updating",0, POPT_ARG_NONE, &checksum_updating, 0, 0, 0 },
550 {"block-size", 'B', POPT_ARG_LONG, &block_size, 0, 0, 0 },
551 {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
552 {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
553 @@ -1896,7 +1899,9 @@ void server_options(char **args,int *arg
554 args[ac++] = basis_dir[i];
558 + } else if (checksum_updating)
559 + args[ac++] = "--checksum-updating";
563 args[ac++] = "--append";
566 @@ -1070,6 +1070,12 @@ isDigit(const char *ptr)
570 +isXDigit(const char *ptr)
572 + return isxdigit(*(unsigned char *)ptr);
576 isPrint(const char *ptr)
578 return isprint(*(unsigned char *)ptr);
581 @@ -307,6 +307,7 @@ to the detailed description below for a
582 -q, --quiet suppress non-error messages
583 --no-motd suppress daemon-mode MOTD (see caveat)
584 -c, --checksum skip based on checksum, not mod-time & size
585 + --checksum-updating sender updates .rsyncsums files
586 -a, --archive archive mode; equals -rlptgoD (no -H,-A,-X)
587 --no-OPTION turn off an implied OPTION (e.g. --no-D)
588 -r, --recursive recurse into directories
589 @@ -502,9 +503,9 @@ uses a "quick check" that (by default) c
590 of last modification match between the sender and receiver. This option
591 changes this to compare a 128-bit MD4 checksum for each file that has a
592 matching size. Generating the checksums means that both sides will expend
593 -a lot of disk I/O reading all the data in the files in the transfer (and
594 -this is prior to any reading that will be done to transfer changed files),
595 -so this can slow things down significantly.
596 +a lot of disk I/O reading the data in all the files in the transfer, so
597 +this can slow things down significantly (and this is prior to any reading
598 +that will be done to transfer the files that have changed).
600 The sending side generates its checksums while it is doing the file-system
601 scan that builds the list of the available files. The receiver generates
602 @@ -512,12 +513,42 @@ its checksums when it is scanning for ch
603 file that has the same size as the corresponding sender's file: files with
604 either a changed size or a changed checksum are selected for transfer.
606 +Starting with version 3.0.0, the sending side will look for a checksum
607 +summary file and use a pre-generated checksum that it reads out of the file
608 +(as long as it matches the file's size and modified time). This allows a
609 +server to support the --checksum option to clients without having to
610 +recompute the checksums for each client. See the bf(--checksum-updating)
611 +option for a way to have rsync create/update these checksum files.
613 Note that rsync always verifies that each em(transferred) file was
614 correctly reconstructed on the receiving side by checking a whole-file
615 checksum that is generated when as the file is transferred, but that
616 automatic after-the-transfer verification has nothing to do with this
617 option's before-the-transfer "Does this file need to be updated?" check.
619 +dit(bf(--checksum-updating)) This option tells the sending side to create
620 +and/or update per-directory checksum files that are used by the
621 +bf(--checksum) option. The file that is updated is named .rsyncsums. If
622 +pre-transfer checksums are not being computed, this option has no effect.
624 +The checksum files stores the computed checksum, last-known size,
625 +modification time, and name for each file in the current directory. If a
626 +later transfer finds that a file matches its prior size and modification
627 +time, the checksum is assumed to still be correct. Otherwise it is
628 +recomputed and udpated in the file.
630 +To avoid transferring the system's checksum files, you can use an exclude
631 +(e.g. bf(--exclude=.rsyncsums)). To make this easier to type, you can use
632 +a popt alias. For instance, adding the following line in your ~/.popt file
633 +defines a bf(-cc) option that enables checksum updating and excludes the
636 +verb( rsync alias --cc --checksum-updating --exclude=.rsyncsums)
638 +An rsync daemon does not allow the client to control this setting, so see
639 +the "checksum updating" daemon config option for information on how to make
640 +a daemon maintain these checksum files.
642 dit(bf(-a, --archive)) This is equivalent to bf(-rlptgoD). It is a quick
643 way of saying you want recursion and want to preserve almost
644 everything (with -H being a notable omission).
645 --- old/rsyncd.conf.yo
646 +++ new/rsyncd.conf.yo
647 @@ -198,6 +198,20 @@ locking on this file to ensure that the
648 exceeded for the modules sharing the lock file.
649 The default is tt(/var/run/rsyncd.lock).
651 +dit(bf(checksum updating)) This option tells rsync to update/create the
652 +checksum information in the per-directory checksum files when users copy
653 +files using the bf(--checksum) option. Any file that has changed since it
654 +was last checksummed (or is not mentioned) has its data updated in the
657 +Note that this updating will occur even if the module is listed as being
658 +read-only. If you want to hide these files (and you will almost always
659 +want to do), add ".rsyncsums" to the module's exclude setting.
661 +Note also that the client's command-line option, bf(--checksum-updating),
662 +has no effect on a daemon. A daemon will only update/create checksum files
663 +if this config option is true.
665 dit(bf(read only)) The "read only" option determines whether clients
666 will be able to upload files or not. If "read only" is true then any
667 attempted uploads will fail. If "read only" is false then uploads will