The patches for 3.0.0pre8.
[rsync/rsync-patches.git] / checksum-updating.diff
... / ...
CommitLineData
1This builds on the sender optimization feature of the checksum-reading
2patch and adds the ability to create and/or updates the .rsyncsums files
3when --checksum-updating (or "checksum updating = true") is specified.
4
5To use this patch, run these commands for a successful build:
6
7 patch -p1 <patches/checksum-reading.diff
8 patch -p1 <patches/checksum-updating.diff
9 ./configure (optional if already run)
10 make
11
12diff --git a/clientserver.c b/clientserver.c
13--- a/clientserver.c
14+++ b/clientserver.c
15@@ -38,6 +38,7 @@ extern int sanitize_paths;
16 extern int filesfrom_fd;
17 extern int remote_protocol;
18 extern int protocol_version;
19+extern int checksum_updating;
20 extern int io_timeout;
21 extern int no_detach;
22 extern int write_batch;
23@@ -708,6 +709,8 @@ static int rsync_module(int f_in, int f_out, int i, char *addr, char *host)
24 else if (am_root < 0) /* Treat --fake-super from client as --super. */
25 am_root = 2;
26
27+ checksum_updating = lp_checksum_updating(i);
28+
29 if (filesfrom_fd == 0)
30 filesfrom_fd = f_in;
31
32diff --git a/flist.c b/flist.c
33--- a/flist.c
34+++ b/flist.c
35@@ -26,6 +26,7 @@
36 #include "io.h"
37
38 extern int verbose;
39+extern int dry_run;
40 extern int am_root;
41 extern int am_server;
42 extern int am_daemon;
43@@ -58,6 +59,7 @@ extern int implied_dirs;
44 extern int file_extra_cnt;
45 extern int ignore_perishable;
46 extern int non_perishable_cnt;
47+extern int checksum_updating;
48 extern int prune_empty_dirs;
49 extern int copy_links;
50 extern int copy_unsafe_links;
51@@ -83,6 +85,9 @@ extern iconv_t ic_send, ic_recv;
52
53 #define PTR_SIZE (sizeof (struct file_struct *))
54
55+#define FLAG_SUM_MISSING (1<<1) /* F_SUM() data is undefined */
56+#define FLAG_SUM_KEEP (1<<2) /* keep entry when rewriting */
57+
58 int io_error;
59 int checksum_len;
60 dev_t filesystem_dev; /* used to implement -x */
61@@ -121,6 +126,9 @@ static char tmp_sum[MAX_DIGEST_LEN];
62 static char empty_sum[MAX_DIGEST_LEN];
63 static int flist_count_offset; /* for --delete --progress */
64 static int dir_count = 0;
65+static int checksum_matches = 0;
66+static int checksum_updates = 0;
67+static int regular_skipped = 0;
68 static struct file_list *checksum_flist = NULL;
69
70 static void flist_sort_and_clean(struct file_list *flist, int strip_root);
71@@ -316,7 +324,8 @@ static void flist_done_allocating(struct file_list *flist)
72
73 /* The len count is the length of the basename + 1 for the null. */
74 static int add_checksum(const char *dirname, const char *basename, int len,
75- OFF_T file_length, time_t mtime, const char *sum)
76+ OFF_T file_length, time_t mtime, int32 ctime, int32 inode,
77+ const char *sum, const char *alt_sum, int flags)
78 {
79 struct file_struct *file;
80 int alloc_len, extra_len;
81@@ -327,13 +336,14 @@ static int add_checksum(const char *dirname, const char *basename, int len,
82 if (file_length == 0)
83 return 0;
84
85- extra_len = (file_extra_cnt + (file_length > 0xFFFFFFFFu) + SUM_EXTRA_CNT)
86+ /* "2" is for a 32-bit ctime num and an 32-bit inode num. */
87+ extra_len = (file_extra_cnt + (file_length > 0xFFFFFFFFu) + SUM_EXTRA_CNT + 2)
88 * EXTRA_LEN;
89 #if EXTRA_ROUNDING > 0
90 if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN))
91 extra_len = (extra_len | (EXTRA_ROUNDING * EXTRA_LEN)) + EXTRA_LEN;
92 #endif
93- alloc_len = FILE_STRUCT_LEN + extra_len + len;
94+ alloc_len = FILE_STRUCT_LEN + extra_len + len + checksum_len*2 + 1;
95 bp = pool_alloc(checksum_flist->file_pool, alloc_len, "add_checksum");
96
97 memset(bp, 0, extra_len + FILE_STRUCT_LEN);
98@@ -342,7 +352,14 @@ static int add_checksum(const char *dirname, const char *basename, int len,
99 bp += FILE_STRUCT_LEN;
100
101 memcpy(bp, basename, len);
102+ if (alt_sum)
103+ strlcpy(bp+len, alt_sum, checksum_len*2 + 1);
104+ else {
105+ memset(bp+len, '=', checksum_len*2);
106+ bp[len+checksum_len*2] = '\0';
107+ }
108
109+ file->flags = flags;
110 file->mode = S_IFREG;
111 file->modtime = mtime;
112 file->len32 = (uint32)file_length;
113@@ -353,6 +370,8 @@ static int add_checksum(const char *dirname, const char *basename, int len,
114 file->dirname = dirname;
115 bp = F_SUM(file);
116 memcpy(bp, sum, checksum_len);
117+ F_CTIME(file) = ctime;
118+ F_INODE(file) = inode;
119
120 flist_expand(checksum_flist, 1);
121 checksum_flist->files[checksum_flist->used++] = file;
122@@ -362,17 +381,104 @@ static int add_checksum(const char *dirname, const char *basename, int len,
123 return 1;
124 }
125
126+static void write_checksums(const char *next_dirname, int whole_dir)
127+{
128+ static const char *dirname_save;
129+ char fbuf[MAXPATHLEN];
130+ const char *dirname;
131+ int used, new_entries, counts_match, no_skipped;
132+ FILE *out_fp;
133+ int i;
134+
135+ dirname = dirname_save;
136+ dirname_save = next_dirname;
137+
138+ if (!dirname)
139+ return;
140+
141+ used = checksum_flist->used;
142+ new_entries = checksum_updates != 0;
143+ counts_match = used == checksum_matches;
144+ no_skipped = whole_dir && regular_skipped == 0;
145+
146+ clean_flist(checksum_flist, 0);
147+
148+ checksum_flist->used = 0;
149+ checksum_matches = 0;
150+ checksum_updates = 0;
151+ regular_skipped = 0;
152+
153+ if (dry_run)
154+ return;
155+
156+ if (*dirname) {
157+ if (pathjoin(fbuf, sizeof fbuf, dirname, ".rsyncsums") >= sizeof fbuf)
158+ return;
159+ } else
160+ strlcpy(fbuf, ".rsyncsums", sizeof fbuf);
161+
162+ if (checksum_flist->high - checksum_flist->low < 0 && no_skipped) {
163+ unlink(fbuf);
164+ return;
165+ }
166+
167+ if (!new_entries && (counts_match || !whole_dir))
168+ return;
169+
170+ if (!(out_fp = fopen(fbuf, "w")))
171+ return;
172+
173+ new_entries = 0;
174+ for (i = checksum_flist->low; i <= checksum_flist->high; i++) {
175+ struct file_struct *file = checksum_flist->sorted[i];
176+ const char *cp = F_SUM(file);
177+ const char *end = cp + checksum_len;
178+ const char *alt_sum = file->basename + strlen(file->basename) + 1;
179+ int32 ctime, inode;
180+ if (whole_dir && !(file->flags & FLAG_SUM_KEEP))
181+ continue;
182+ ctime = F_CTIME(file);
183+ inode = F_INODE(file);
184+ if (protocol_version >= 30)
185+ fprintf(out_fp, "%s ", alt_sum);
186+ if (file->flags & FLAG_SUM_MISSING) {
187+ new_entries++;
188+ do {
189+ fprintf(out_fp, "==");
190+ } while (++cp != end);
191+ } else {
192+ do {
193+ fprintf(out_fp, "%02x", (int)CVAL(cp, 0));
194+ } while (++cp != end);
195+ }
196+ if (protocol_version < 30)
197+ fprintf(out_fp, " %s", alt_sum);
198+ if (*alt_sum == '=')
199+ new_entries++;
200+ fprintf(out_fp, " %10.0f %10.0f %10lu %10lu %s\n",
201+ (double)F_LENGTH(file), (double)file->modtime,
202+ (long)ctime, (long)inode, file->basename);
203+ }
204+
205+ fclose(out_fp);
206+}
207+
208 /* The direname value must remain unchanged during the lifespan of the
209 * created checksum_flist object because we use it directly. */
210 static void read_checksums(const char *dirname)
211 {
212 char line[MAXPATHLEN+1024], fbuf[MAXPATHLEN], sum[MAX_DIGEST_LEN];
213+ const char *alt_sum = NULL;
214 OFF_T file_length;
215 time_t mtime;
216- int len, dlen, i;
217+ int32 ctime, inode;
218+ int len, dlen, i, flags;
219 char *cp;
220 FILE *fp;
221
222+ if (checksum_updating)
223+ write_checksums(dirname, 0);
224+
225 if (checksum_flist) {
226 /* Reset the pool memory and empty the file-list array. */
227 pool_free_old(checksum_flist->file_pool,
228@@ -383,6 +489,9 @@ static void read_checksums(const char *dirname)
229
230 checksum_flist->low = 0;
231 checksum_flist->high = -1;
232+ checksum_matches = 0;
233+ checksum_updates = 0;
234+ regular_skipped = 0;
235
236 if (!dirname)
237 return;
238@@ -401,7 +510,7 @@ static void read_checksums(const char *dirname)
239 while (fgets(line, sizeof line, fp)) {
240 cp = line;
241 if (protocol_version >= 30) {
242- char *alt_sum = cp;
243+ alt_sum = cp;
244 if (*cp == '=')
245 while (*++cp == '=') {}
246 else
247@@ -412,7 +521,14 @@ static void read_checksums(const char *dirname)
248 }
249
250 if (*cp == '=') {
251- continue;
252+ for (i = 0; i < checksum_len*2; i++, cp++) {
253+ if (*cp != '=') {
254+ cp = "";
255+ break;
256+ }
257+ }
258+ memset(sum, 0, checksum_len);
259+ flags = FLAG_SUM_MISSING;
260 } else {
261 for (i = 0; i < checksum_len*2; i++, cp++) {
262 int x;
263@@ -430,13 +546,14 @@ static void read_checksums(const char *dirname)
264 else
265 sum[i/2] = x << 4;
266 }
267+ flags = 0;
268 }
269 if (*cp != ' ')
270 break;
271 while (*++cp == ' ') {}
272
273 if (protocol_version < 30) {
274- char *alt_sum = cp;
275+ alt_sum = cp;
276 if (*cp == '=')
277 while (*++cp == '=') {}
278 else
279@@ -460,16 +577,16 @@ static void read_checksums(const char *dirname)
280 break;
281 while (*++cp == ' ') {}
282
283- /* Ignore ctime. */
284+ ctime = 0;
285 while (isDigit(cp))
286- cp++;
287+ ctime = ctime * 10 + *cp++ - '0';
288 if (*cp != ' ')
289 break;
290 while (*++cp == ' ') {}
291
292- /* Ignore inode. */
293+ inode = 0;
294 while (isDigit(cp))
295- cp++;
296+ inode = inode * 10 + *cp++ - '0';
297 if (*cp != ' ')
298 break;
299 while (*++cp == ' ') {}
300@@ -486,8 +603,13 @@ static void read_checksums(const char *dirname)
301 continue;
302
303 strlcpy(fbuf+dlen, cp, sizeof fbuf - dlen);
304+ if (is_excluded(fbuf, 0, ALL_FILTERS)) {
305+ flags |= FLAG_SUM_KEEP;
306+ checksum_matches++;
307+ }
308
309- add_checksum(dirname, cp, len, file_length, mtime, sum);
310+ add_checksum(dirname, cp, len, file_length, mtime, ctime, inode,
311+ sum, alt_sum, flags);
312 }
313 fclose(fp);
314
315@@ -1272,6 +1394,8 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
316 if (is_excluded(thisname, S_ISDIR(st.st_mode) != 0, filter_level)) {
317 if (ignore_perishable)
318 non_perishable_cnt++;
319+ if (S_ISREG(st.st_mode))
320+ regular_skipped++;
321 return NULL;
322 }
323
324@@ -1402,13 +1526,36 @@ struct file_struct *make_file(const char *fname, struct file_list *flist,
325 int j;
326 if (flist && (j = flist_find(checksum_flist, file)) >= 0) {
327 struct file_struct *fp = checksum_flist->sorted[j];
328+ int32 ctime = F_CTIME(fp);
329+ int32 inode = F_INODE(fp);
330 if (F_LENGTH(fp) == st.st_size
331- && fp->modtime == st.st_mtime)
332- memcpy(tmp_sum, F_SUM(fp), MAX_DIGEST_LEN);
333- else
334- file_checksum(thisname, tmp_sum, st.st_size);
335- } else
336+ && fp->modtime == st.st_mtime
337+ && ctime == (int32)st.st_ctime
338+ && inode == (int32)st.st_ino) {
339+ if (fp->flags & FLAG_SUM_MISSING) {
340+ fp->flags &= ~FLAG_SUM_MISSING;
341+ checksum_updates++;
342+ file_checksum(thisname, tmp_sum, st.st_size);
343+ memcpy(F_SUM(fp), tmp_sum, MAX_DIGEST_LEN);
344+ } else {
345+ checksum_matches++;
346+ memcpy(tmp_sum, F_SUM(fp), MAX_DIGEST_LEN);
347+ }
348+ fp->flags |= FLAG_SUM_KEEP;
349+ } else {
350+ clear_file(fp);
351+ goto compute_new_checksum;
352+ }
353+ } else {
354+ compute_new_checksum:
355 file_checksum(thisname, tmp_sum, st.st_size);
356+ if (checksum_updating && flist) {
357+ checksum_updates +=
358+ add_checksum(file->dirname, basename, basename_len,
359+ st.st_size, st.st_mtime, st.st_ctime,
360+ st.st_ino, tmp_sum, NULL, FLAG_SUM_KEEP);
361+ }
362+ }
363 }
364
365 /* This code is only used by the receiver when it is building
366@@ -1703,6 +1850,9 @@ static void send_directory(int f, struct file_list *flist, char *fbuf, int len,
367
368 closedir(d);
369
370+ if (checksum_updating && always_checksum && am_sender && f >= 0)
371+ write_checksums(NULL, 1);
372+
373 if (f >= 0 && recurse && !divert_dirs) {
374 int i, end = flist->used - 1;
375 /* send_if_directory() bumps flist->used, so use "end". */
376@@ -2265,7 +2415,7 @@ struct file_list *send_file_list(int f, int argc, char *argv[])
377 flist_eof = 1;
378
379 if (checksum_updating && always_checksum && flist_eof)
380- read_checksums(NULL);
381+ read_checksums(NULL); /* writes any last updates */
382
383 return flist;
384 }
385diff --git a/loadparm.c b/loadparm.c
386--- a/loadparm.c
387+++ b/loadparm.c
388@@ -153,6 +153,7 @@ typedef struct
389 int syslog_facility;
390 int timeout;
391
392+ BOOL checksum_updating;
393 BOOL fake_super;
394 BOOL ignore_errors;
395 BOOL ignore_nonreadable;
396@@ -202,6 +203,7 @@ static service sDefault =
397 /* syslog_facility; */ LOG_DAEMON,
398 /* timeout; */ 0,
399
400+ /* checksum_updating; */ False,
401 /* fake_super; */ False,
402 /* ignore_errors; */ False,
403 /* ignore_nonreadable; */ False,
404@@ -301,6 +303,7 @@ static struct parm_struct parm_table[] =
405 {"socket options", P_STRING, P_GLOBAL,&Globals.socket_options, NULL,0},
406
407 {"auth users", P_STRING, P_LOCAL, &sDefault.auth_users, NULL,0},
408+ {"checksum updating", P_BOOL, P_LOCAL, &sDefault.checksum_updating, NULL,0},
409 {"comment", P_STRING, P_LOCAL, &sDefault.comment, NULL,0},
410 {"dont compress", P_STRING, P_LOCAL, &sDefault.dont_compress, NULL,0},
411 {"exclude from", P_STRING, P_LOCAL, &sDefault.exclude_from, NULL,0},
412@@ -421,6 +424,7 @@ FN_LOCAL_INTEGER(lp_max_connections, max_connections)
413 FN_LOCAL_INTEGER(lp_max_verbosity, max_verbosity)
414 FN_LOCAL_INTEGER(lp_timeout, timeout)
415
416+FN_LOCAL_BOOL(lp_checksum_updating, checksum_updating)
417 FN_LOCAL_BOOL(lp_fake_super, fake_super)
418 FN_LOCAL_BOOL(lp_ignore_errors, ignore_errors)
419 FN_LOCAL_BOOL(lp_ignore_nonreadable, ignore_nonreadable)
420diff --git a/options.c b/options.c
421--- a/options.c
422+++ b/options.c
423@@ -112,6 +112,7 @@ size_t bwlimit_writemax = 0;
424 int ignore_existing = 0;
425 int ignore_non_existing = 0;
426 int need_messages_from_generator = 0;
427+int checksum_updating = 0;
428 int max_delete = INT_MIN;
429 OFF_T max_size = 0;
430 OFF_T min_size = 0;
431@@ -312,6 +313,7 @@ void usage(enum logcode F)
432 rprintf(F," -q, --quiet suppress non-error messages\n");
433 rprintf(F," --no-motd suppress daemon-mode MOTD (see manpage caveat)\n");
434 rprintf(F," -c, --checksum skip based on checksum, not mod-time & size\n");
435+ rprintf(F," --checksum-updating sender updates .rsyncsums files\n");
436 rprintf(F," -a, --archive archive mode; equals -rlptgoD (no -H,-A,-X)\n");
437 rprintf(F," --no-OPTION turn off an implied OPTION (e.g. --no-D)\n");
438 rprintf(F," -r, --recursive recurse into directories\n");
439@@ -560,6 +562,7 @@ static struct poptOption long_options[] = {
440 {"checksum", 'c', POPT_ARG_VAL, &always_checksum, 1, 0, 0 },
441 {"no-checksum", 0, POPT_ARG_VAL, &always_checksum, 0, 0, 0 },
442 {"no-c", 0, POPT_ARG_VAL, &always_checksum, 0, 0, 0 },
443+ {"checksum-updating",0, POPT_ARG_NONE, &checksum_updating, 0, 0, 0 },
444 {"block-size", 'B', POPT_ARG_LONG, &block_size, 0, 0, 0 },
445 {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
446 {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
447@@ -1965,7 +1968,9 @@ void server_options(char **args, int *argc_p)
448 args[ac++] = basis_dir[i];
449 }
450 }
451- }
452+ } else if (checksum_updating)
453+ args[ac++] = "--checksum-updating";
454+
455
456 if (append_mode) {
457 if (append_mode > 1)
458diff --git a/rsync.h b/rsync.h
459--- a/rsync.h
460+++ b/rsync.h
461@@ -678,6 +678,10 @@ extern int xattrs_ndx;
462 #define F_SUM(f) ((char*)OPT_EXTRA(f, LEN64_BUMP(f) + HLINK_BUMP(f) \
463 + SUM_EXTRA_CNT - 1))
464
465+/* These are only valid on an entry read from a checksum file. */
466+#define F_CTIME(f) OPT_EXTRA(f, LEN64_BUMP(f) + SUM_EXTRA_CNT)->num
467+#define F_INODE(f) OPT_EXTRA(f, LEN64_BUMP(f) + SUM_EXTRA_CNT + 1)->num
468+
469 /* Some utility defines: */
470 #define F_IS_ACTIVE(f) (f)->basename[0]
471 #define F_IS_HLINKED(f) ((f)->flags & FLAG_HLINKED)
472diff --git a/rsync.yo b/rsync.yo
473--- a/rsync.yo
474+++ b/rsync.yo
475@@ -317,6 +317,7 @@ to the detailed description below for a complete description. verb(
476 -q, --quiet suppress non-error messages
477 --no-motd suppress daemon-mode MOTD (see caveat)
478 -c, --checksum skip based on checksum, not mod-time & size
479+ --checksum-updating sender updates .rsyncsums files
480 -a, --archive archive mode; equals -rlptgoD (no -H,-A,-X)
481 --no-OPTION turn off an implied OPTION (e.g. --no-D)
482 -r, --recursive recurse into directories
483@@ -516,9 +517,9 @@ uses a "quick check" that (by default) checks if each file's size and time
484 of last modification match between the sender and receiver. This option
485 changes this to compare a 128-bit MD4 checksum for each file that has a
486 matching size. Generating the checksums means that both sides will expend
487-a lot of disk I/O reading all the data in the files in the transfer (and
488-this is prior to any reading that will be done to transfer changed files),
489-so this can slow things down significantly.
490+a lot of disk I/O reading the data in all the files in the transfer, so
491+this can slow things down significantly (and this is prior to any reading
492+that will be done to transfer the files that have changed).
493
494 The sending side generates its checksums while it is doing the file-system
495 scan that builds the list of the available files. The receiver generates
496@@ -526,12 +527,42 @@ its checksums when it is scanning for changed files, and will checksum any
497 file that has the same size as the corresponding sender's file: files with
498 either a changed size or a changed checksum are selected for transfer.
499
500+Starting with version 3.0.0, the sending side will look for a checksum
501+summary file and use a pre-generated checksum that it reads out of the file
502+(as long as it matches the file's size and modified time). This allows a
503+server to support the --checksum option to clients without having to
504+recompute the checksums for each client. See the bf(--checksum-updating)
505+option for a way to have rsync create/update these checksum files.
506+
507 Note that rsync always verifies that each em(transferred) file was
508 correctly reconstructed on the receiving side by checking a whole-file
509 checksum that is generated when as the file is transferred, but that
510 automatic after-the-transfer verification has nothing to do with this
511 option's before-the-transfer "Does this file need to be updated?" check.
512
513+dit(bf(--checksum-updating)) This option tells the sending side to create
514+and/or update per-directory checksum files that are used by the
515+bf(--checksum) option. The file that is updated is named .rsyncsums. If
516+pre-transfer checksums are not being computed, this option has no effect.
517+
518+The checksum files stores the computed checksum, last-known size,
519+modification time, and name for each file in the current directory. If a
520+later transfer finds that a file matches its prior size and modification
521+time, the checksum is assumed to still be correct. Otherwise it is
522+recomputed and udpated in the file.
523+
524+To avoid transferring the system's checksum files, you can use an exclude
525+(e.g. bf(--exclude=.rsyncsums)). To make this easier to type, you can use
526+a popt alias. For instance, adding the following line in your ~/.popt file
527+defines a bf(-cc) option that enables checksum updating and excludes the
528+checksum files:
529+
530+verb( rsync alias --cc --checksum-updating --exclude=.rsyncsums)
531+
532+An rsync daemon does not allow the client to control this setting, so see
533+the "checksum updating" daemon config option for information on how to make
534+a daemon maintain these checksum files.
535+
536 dit(bf(-a, --archive)) This is equivalent to bf(-rlptgoD). It is a quick
537 way of saying you want recursion and want to preserve almost
538 everything (with -H being a notable omission).
539diff --git a/rsyncd.conf.yo b/rsyncd.conf.yo
540--- a/rsyncd.conf.yo
541+++ b/rsyncd.conf.yo
542@@ -241,6 +241,20 @@ locking on this file to ensure that the max connections limit is not
543 exceeded for the modules sharing the lock file.
544 The default is tt(/var/run/rsyncd.lock).
545
546+dit(bf(checksum updating)) This option tells rsync to update/create the
547+checksum information in the per-directory checksum files when users copy
548+files using the bf(--checksum) option. Any file that has changed since it
549+was last checksummed (or is not mentioned) has its data updated in the
550+.rsyncsums file.
551+
552+Note that this updating will occur even if the module is listed as being
553+read-only. If you want to hide these files (and you will almost always
554+want to do), add ".rsyncsums" to the module's exclude setting.
555+
556+Note also that the client's command-line option, bf(--checksum-updating),
557+has no effect on a daemon. A daemon will only update/create checksum files
558+if this config option is true.
559+
560 dit(bf(read only)) The "read only" option determines whether clients
561 will be able to upload files or not. If "read only" is true then any
562 attempted uploads will fail. If "read only" is false then uploads will