Allow for more -p info.
[rsync/rsync-patches.git] / checksum-updating.diff
... / ...
CommitLineData
1This builds on the sender optimization feature of the checksum4mirrors
2patch and adds the ability to create and/or updates the .rsyncsums files
3when --checksum-updating (or "checksum updating = true") is specified.
4
5To use this patch, run these commands for a successful build:
6
7 patch -p1 <patches/checksum4mirrors.diff
8 patch -p1 <patches/checksum-updating.diff
9 ./configure (optional if already run)
10 make
11
12--- old/clientserver.c
13+++ new/clientserver.c
14@@ -37,6 +37,7 @@ extern int sanitize_paths;
15 extern int filesfrom_fd;
16 extern int remote_protocol;
17 extern int protocol_version;
18+extern int checksum_updating;
19 extern int io_timeout;
20 extern int no_detach;
21 extern int default_af_hint;
22@@ -687,6 +688,8 @@ static int rsync_module(int f_in, int f_
23 else if (am_root < 0) /* Treat --fake-super from client as --super. */
24 am_root = 2;
25
26+ checksum_updating = lp_checksum_updating(i);
27+
28 if (filesfrom_fd == 0)
29 filesfrom_fd = f_in;
30
31--- old/flist.c
32+++ new/flist.c
33@@ -26,6 +26,7 @@
34 #include "io.h"
35
36 extern int verbose;
37+extern int dry_run;
38 extern int am_root;
39 extern int am_server;
40 extern int am_daemon;
41@@ -58,6 +59,7 @@ extern int implied_dirs;
42 extern int file_extra_cnt;
43 extern int ignore_perishable;
44 extern int non_perishable_cnt;
45+extern int checksum_updating;
46 extern int prune_empty_dirs;
47 extern int copy_links;
48 extern int copy_unsafe_links;
49@@ -82,6 +84,9 @@ extern iconv_t ic_send, ic_recv;
50
51 #define PTR_SIZE (sizeof (struct file_struct *))
52
53+#define FLAG_SUM_MISSING (1<<1) /* F_SUM() data is undefined */
54+#define FLAG_SUM_KEEP (1<<2) /* keep entry when rewriting */
55+
56 int io_error;
57 int checksum_len;
58 dev_t filesystem_dev; /* used to implement -x */
59@@ -117,6 +122,9 @@ static char empty_sum[MAX_DIGEST_LEN];
60 static int flist_count_offset; /* for --delete --progress */
61 static int dir_count = 0;
62 static int high_hlink_ndx;
63+static int checksum_matches = 0;
64+static int checksum_updates = 0;
65+static int regular_skipped = 0;
66 static struct file_list *checksum_flist = NULL;
67
68 static void clean_flist(struct file_list *flist, int strip_root);
69@@ -307,7 +315,8 @@ static void flist_done_allocating(struct
70
71 /* The len count is the length of the basename + 1 for the null. */
72 static int add_checksum(const char *dirname, const char *basename, int len,
73- OFF_T file_length, time_t mtime, const char *sum)
74+ OFF_T file_length, time_t mtime, int32 ctime, int32 inode,
75+ const char *sum, const char *alt_sum, int flags)
76 {
77 struct file_struct *file;
78 int alloc_len, extra_len;
79@@ -318,13 +327,14 @@ static int add_checksum(const char *dirn
80 if (file_length == 0)
81 return 0;
82
83- extra_len = (file_extra_cnt + (file_length > 0xFFFFFFFFu) + SUM_EXTRA_CNT)
84+ /* "2" is for a 32-bit ctime num and an 32-bit inode num. */
85+ extra_len = (file_extra_cnt + (file_length > 0xFFFFFFFFu) + SUM_EXTRA_CNT + 2)
86 * EXTRA_LEN;
87 #if EXTRA_ROUNDING > 0
88 if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN))
89 extra_len = (extra_len | (EXTRA_ROUNDING * EXTRA_LEN)) + EXTRA_LEN;
90 #endif
91- alloc_len = FILE_STRUCT_LEN + extra_len + len;
92+ alloc_len = FILE_STRUCT_LEN + extra_len + len + checksum_len*2 + 1;
93 bp = pool_alloc(checksum_flist->file_pool, alloc_len, "add_checksum");
94
95 memset(bp, 0, extra_len + FILE_STRUCT_LEN);
96@@ -333,7 +343,14 @@ static int add_checksum(const char *dirn
97 bp += FILE_STRUCT_LEN;
98
99 memcpy(bp, basename, len);
100+ if (alt_sum)
101+ strlcpy(bp+len, alt_sum, checksum_len*2 + 1);
102+ else {
103+ memset(bp+len, '=', checksum_len*2);
104+ bp[len+checksum_len*2] = '\0';
105+ }
106
107+ file->flags = flags;
108 file->mode = S_IFREG;
109 file->modtime = mtime;
110 file->len32 = (uint32)file_length;
111@@ -344,6 +361,8 @@ static int add_checksum(const char *dirn
112 file->dirname = dirname;
113 bp = F_SUM(file);
114 memcpy(bp, sum, checksum_len);
115+ F_CTIME(file) = ctime;
116+ F_INODE(file) = inode;
117
118 flist_expand(checksum_flist, 1);
119 checksum_flist->files[checksum_flist->used++] = file;
120@@ -353,17 +372,104 @@ static int add_checksum(const char *dirn
121 return 1;
122 }
123
124+static void write_checksums(const char *next_dirname, int whole_dir)
125+{
126+ static const char *dirname_save;
127+ char fbuf[MAXPATHLEN];
128+ const char *dirname;
129+ int used, new_entries, counts_match, no_skipped;
130+ FILE *out_fp;
131+ int i;
132+
133+ dirname = dirname_save;
134+ dirname_save = next_dirname;
135+
136+ if (!dirname)
137+ return;
138+
139+ used = checksum_flist->used;
140+ new_entries = checksum_updates != 0;
141+ counts_match = used == checksum_matches;
142+ no_skipped = whole_dir && regular_skipped == 0;
143+
144+ clean_flist(checksum_flist, 0);
145+
146+ checksum_flist->used = 0;
147+ checksum_matches = 0;
148+ checksum_updates = 0;
149+ regular_skipped = 0;
150+
151+ if (dry_run)
152+ return;
153+
154+ if (*dirname) {
155+ if (pathjoin(fbuf, sizeof fbuf, dirname, ".rsyncsums") >= sizeof fbuf)
156+ return;
157+ } else
158+ strlcpy(fbuf, ".rsyncsums", sizeof fbuf);
159+
160+ if (checksum_flist->high - checksum_flist->low < 0 && no_skipped) {
161+ unlink(fbuf);
162+ return;
163+ }
164+
165+ if (!new_entries && (counts_match || !whole_dir))
166+ return;
167+
168+ if (!(out_fp = fopen(fbuf, "w")))
169+ return;
170+
171+ new_entries = 0;
172+ for (i = checksum_flist->low; i <= checksum_flist->high; i++) {
173+ struct file_struct *file = checksum_flist->sorted[i];
174+ const char *cp = F_SUM(file);
175+ const char *end = cp + checksum_len;
176+ const char *alt_sum = file->basename + strlen(file->basename) + 1;
177+ int32 ctime, inode;
178+ if (whole_dir && !(file->flags & FLAG_SUM_KEEP))
179+ continue;
180+ ctime = F_CTIME(file);
181+ inode = F_INODE(file);
182+ if (protocol_version >= 30)
183+ fprintf(out_fp, "%s ", alt_sum);
184+ if (file->flags & FLAG_SUM_MISSING) {
185+ new_entries++;
186+ do {
187+ fprintf(out_fp, "==");
188+ } while (++cp != end);
189+ } else {
190+ do {
191+ fprintf(out_fp, "%02x", (int)CVAL(cp, 0));
192+ } while (++cp != end);
193+ }
194+ if (protocol_version < 30)
195+ fprintf(out_fp, " %s", alt_sum);
196+ if (*alt_sum == '=')
197+ new_entries++;
198+ fprintf(out_fp, " %10.0f %10.0f %10lu %10lu %s\n",
199+ (double)F_LENGTH(file), (double)file->modtime,
200+ (long)ctime, (long)inode, file->basename);
201+ }
202+
203+ fclose(out_fp);
204+}
205+
206 /* The direname value must remain unchanged during the lifespan of the
207 * created checksum_flist object because we use it directly. */
208 static void read_checksums(const char *dirname)
209 {
210 char line[MAXPATHLEN+1024], fbuf[MAXPATHLEN], sum[MAX_DIGEST_LEN];
211+ const char *alt_sum = NULL;
212 OFF_T file_length;
213 time_t mtime;
214- int len, dlen, i;
215+ int32 ctime, inode;
216+ int len, dlen, i, flags;
217 char *cp;
218 FILE *fp;
219
220+ if (checksum_updating)
221+ write_checksums(dirname, 0);
222+
223 if (checksum_flist) {
224 /* Reset the pool memory and empty the file-list array. */
225 pool_free_old(checksum_flist->file_pool,
226@@ -374,6 +480,9 @@ static void read_checksums(const char *d
227
228 checksum_flist->low = 0;
229 checksum_flist->high = -1;
230+ checksum_matches = 0;
231+ checksum_updates = 0;
232+ regular_skipped = 0;
233
234 if (!dirname)
235 return;
236@@ -392,7 +501,7 @@ static void read_checksums(const char *d
237 while (fgets(line, sizeof line, fp)) {
238 cp = line;
239 if (protocol_version >= 30) {
240- char *alt_sum = cp;
241+ alt_sum = cp;
242 if (*cp == '=')
243 while (*++cp == '=') {}
244 else
245@@ -403,7 +512,14 @@ static void read_checksums(const char *d
246 }
247
248 if (*cp == '=') {
249- continue;
250+ for (i = 0; i < checksum_len*2; i++, cp++) {
251+ if (*cp != '=') {
252+ cp = "";
253+ break;
254+ }
255+ }
256+ memset(sum, 0, checksum_len);
257+ flags = FLAG_SUM_MISSING;
258 } else {
259 for (i = 0; i < checksum_len*2; i++, cp++) {
260 int x;
261@@ -421,13 +537,14 @@ static void read_checksums(const char *d
262 else
263 sum[i/2] = x << 4;
264 }
265+ flags = 0;
266 }
267 if (*cp != ' ')
268 break;
269 while (*++cp == ' ') {}
270
271 if (protocol_version < 30) {
272- char *alt_sum = cp;
273+ alt_sum = cp;
274 if (*cp == '=')
275 while (*++cp == '=') {}
276 else
277@@ -451,16 +568,16 @@ static void read_checksums(const char *d
278 break;
279 while (*++cp == ' ') {}
280
281- /* Ignore ctime. */
282+ ctime = 0;
283 while (isDigit(cp))
284- cp++;
285+ ctime = ctime * 10 + *cp++ - '0';
286 if (*cp != ' ')
287 break;
288 while (*++cp == ' ') {}
289
290- /* Ignore inode. */
291+ inode = 0;
292 while (isDigit(cp))
293- cp++;
294+ inode = inode * 10 + *cp++ - '0';
295 if (*cp != ' ')
296 break;
297 while (*++cp == ' ') {}
298@@ -477,8 +594,13 @@ static void read_checksums(const char *d
299 continue;
300
301 strlcpy(fbuf+dlen, cp, sizeof fbuf - dlen);
302+ if (is_excluded(fbuf, 0, ALL_FILTERS)) {
303+ flags |= FLAG_SUM_KEEP;
304+ checksum_matches++;
305+ }
306
307- add_checksum(dirname, cp, len, file_length, mtime, sum);
308+ add_checksum(dirname, cp, len, file_length, mtime, ctime, inode,
309+ sum, alt_sum, flags);
310 }
311 fclose(fp);
312
313@@ -1260,6 +1382,8 @@ struct file_struct *make_file(const char
314 if (is_excluded(thisname, S_ISDIR(st.st_mode) != 0, filter_level)) {
315 if (ignore_perishable)
316 non_perishable_cnt++;
317+ if (S_ISREG(st.st_mode))
318+ regular_skipped++;
319 return NULL;
320 }
321
322@@ -1391,13 +1515,36 @@ struct file_struct *make_file(const char
323 int j;
324 if (flist && (j = flist_find(checksum_flist, file)) >= 0) {
325 struct file_struct *fp = checksum_flist->sorted[j];
326+ int32 ctime = F_CTIME(fp);
327+ int32 inode = F_INODE(fp);
328 if (F_LENGTH(fp) == st.st_size
329- && fp->modtime == st.st_mtime)
330- memcpy(tmp_sum, F_SUM(fp), MAX_DIGEST_LEN);
331- else
332- file_checksum(thisname, tmp_sum, st.st_size);
333- } else
334+ && fp->modtime == st.st_mtime
335+ && ctime == (int32)st.st_ctime
336+ && inode == (int32)st.st_ino) {
337+ if (fp->flags & FLAG_SUM_MISSING) {
338+ fp->flags &= ~FLAG_SUM_MISSING;
339+ checksum_updates++;
340+ file_checksum(thisname, tmp_sum, st.st_size);
341+ memcpy(F_SUM(fp), tmp_sum, MAX_DIGEST_LEN);
342+ } else {
343+ checksum_matches++;
344+ memcpy(tmp_sum, F_SUM(fp), MAX_DIGEST_LEN);
345+ }
346+ fp->flags |= FLAG_SUM_KEEP;
347+ } else {
348+ clear_file(fp);
349+ goto compute_new_checksum;
350+ }
351+ } else {
352+ compute_new_checksum:
353 file_checksum(thisname, tmp_sum, st.st_size);
354+ if (checksum_updating && flist) {
355+ checksum_updates +=
356+ add_checksum(file->dirname, basename, basename_len,
357+ st.st_size, st.st_mtime, st.st_ctime,
358+ st.st_ino, tmp_sum, NULL, FLAG_SUM_KEEP);
359+ }
360+ }
361 }
362
363 /* This code is only used by the receiver when it is building
364@@ -1688,6 +1835,9 @@ static void send_directory(int f, struct
365
366 closedir(d);
367
368+ if (checksum_updating && always_checksum && am_sender && f >= 0)
369+ write_checksums(NULL, 1);
370+
371 if (f >= 0 && recurse && !divert_dirs) {
372 int i, end = flist->used - 1;
373 /* send_if_directory() bumps flist->used, so use "end". */
374@@ -2249,7 +2399,11 @@ struct file_list *send_file_list(int f,
375 * file-list to check if this is a 1-file xfer. */
376 send_extra_file_list(f, 1);
377 }
378- }
379+ } else
380+ flist_eof = 1;
381+
382+ if (checksum_updating && always_checksum && flist_eof)
383+ read_checksums(NULL); /* writes any last updates */
384
385 return flist;
386 }
387@@ -2535,7 +2689,7 @@ void flist_free(struct file_list *flist)
388
389 if (!flist->prev || !flist_cnt)
390 pool_destroy(flist->file_pool);
391- else
392+ else if (flist->pool_boundary)
393 pool_free_old(flist->file_pool, flist->pool_boundary);
394
395 if (flist->sorted && flist->sorted != flist->files)
396--- old/loadparm.c
397+++ new/loadparm.c
398@@ -153,6 +153,7 @@ typedef struct
399 int syslog_facility;
400 int timeout;
401
402+ BOOL checksum_updating;
403 BOOL fake_super;
404 BOOL ignore_errors;
405 BOOL ignore_nonreadable;
406@@ -201,6 +202,7 @@ static service sDefault =
407 /* syslog_facility; */ LOG_DAEMON,
408 /* timeout; */ 0,
409
410+ /* checksum_updating; */ False,
411 /* fake_super; */ False,
412 /* ignore_errors; */ False,
413 /* ignore_nonreadable; */ False,
414@@ -317,6 +319,7 @@ static struct parm_struct parm_table[] =
415 {"lock file", P_STRING, P_LOCAL, &sDefault.lock_file, NULL,0},
416 {"log file", P_STRING, P_LOCAL, &sDefault.log_file, NULL,0},
417 {"log format", P_STRING, P_LOCAL, &sDefault.log_format, NULL,0},
418+ {"checksum updating", P_BOOL, P_LOCAL, &sDefault.checksum_updating, NULL,0},
419 {"max connections", P_INTEGER,P_LOCAL, &sDefault.max_connections, NULL,0},
420 {"max verbosity", P_INTEGER,P_LOCAL, &sDefault.max_verbosity, NULL,0},
421 {"name", P_STRING, P_LOCAL, &sDefault.name, NULL,0},
422@@ -422,6 +425,7 @@ FN_LOCAL_BOOL(lp_fake_super, fake_super)
423 FN_LOCAL_BOOL(lp_ignore_errors, ignore_errors)
424 FN_LOCAL_BOOL(lp_ignore_nonreadable, ignore_nonreadable)
425 FN_LOCAL_BOOL(lp_list, list)
426+FN_LOCAL_BOOL(lp_checksum_updating, checksum_updating)
427 FN_LOCAL_BOOL(lp_read_only, read_only)
428 FN_LOCAL_BOOL(lp_strict_modes, strict_modes)
429 FN_LOCAL_BOOL(lp_transfer_logging, transfer_logging)
430--- old/options.c
431+++ new/options.c
432@@ -110,6 +110,7 @@ size_t bwlimit_writemax = 0;
433 int ignore_existing = 0;
434 int ignore_non_existing = 0;
435 int need_messages_from_generator = 0;
436+int checksum_updating = 0;
437 int max_delete = INT_MIN;
438 OFF_T max_size = 0;
439 OFF_T min_size = 0;
440@@ -310,6 +311,7 @@ void usage(enum logcode F)
441 rprintf(F," -q, --quiet suppress non-error messages\n");
442 rprintf(F," --no-motd suppress daemon-mode MOTD (see manpage caveat)\n");
443 rprintf(F," -c, --checksum skip based on checksum, not mod-time & size\n");
444+ rprintf(F," --checksum-updating sender updates .rsyncsums files\n");
445 rprintf(F," -a, --archive archive mode; equals -rlptgoD (no -H,-A,-X)\n");
446 rprintf(F," --no-OPTION turn off an implied OPTION (e.g. --no-D)\n");
447 rprintf(F," -r, --recursive recurse into directories\n");
448@@ -557,6 +559,7 @@ static struct poptOption long_options[]
449 {"checksum", 'c', POPT_ARG_VAL, &always_checksum, 1, 0, 0 },
450 {"no-checksum", 0, POPT_ARG_VAL, &always_checksum, 0, 0, 0 },
451 {"no-c", 0, POPT_ARG_VAL, &always_checksum, 0, 0, 0 },
452+ {"checksum-updating",0, POPT_ARG_NONE, &checksum_updating, 0, 0, 0 },
453 {"block-size", 'B', POPT_ARG_LONG, &block_size, 0, 0, 0 },
454 {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
455 {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
456@@ -1954,7 +1957,9 @@ void server_options(char **args, int *ar
457 args[ac++] = basis_dir[i];
458 }
459 }
460- }
461+ } else if (checksum_updating)
462+ args[ac++] = "--checksum-updating";
463+
464
465 if (append_mode) {
466 if (append_mode > 1)
467--- old/rsync.h
468+++ new/rsync.h
469@@ -662,6 +662,10 @@ extern int xattrs_ndx;
470 #define F_SUM(f) ((char*)OPT_EXTRA(f, LEN64_BUMP(f) + HLINK_BUMP(f) \
471 + SUM_EXTRA_CNT - 1))
472
473+/* These are only valid on an entry read from a checksum file. */
474+#define F_CTIME(f) OPT_EXTRA(f, LEN64_BUMP(f) + SUM_EXTRA_CNT)->num
475+#define F_INODE(f) OPT_EXTRA(f, LEN64_BUMP(f) + SUM_EXTRA_CNT + 1)->num
476+
477 /* Some utility defines: */
478 #define F_IS_ACTIVE(f) (f)->basename[0]
479 #define F_IS_HLINKED(f) ((f)->flags & FLAG_HLINKED)
480--- old/rsync.yo
481+++ new/rsync.yo
482@@ -317,6 +317,7 @@ to the detailed description below for a
483 -q, --quiet suppress non-error messages
484 --no-motd suppress daemon-mode MOTD (see caveat)
485 -c, --checksum skip based on checksum, not mod-time & size
486+ --checksum-updating sender updates .rsyncsums files
487 -a, --archive archive mode; equals -rlptgoD (no -H,-A,-X)
488 --no-OPTION turn off an implied OPTION (e.g. --no-D)
489 -r, --recursive recurse into directories
490@@ -515,9 +516,9 @@ uses a "quick check" that (by default) c
491 of last modification match between the sender and receiver. This option
492 changes this to compare a 128-bit MD4 checksum for each file that has a
493 matching size. Generating the checksums means that both sides will expend
494-a lot of disk I/O reading all the data in the files in the transfer (and
495-this is prior to any reading that will be done to transfer changed files),
496-so this can slow things down significantly.
497+a lot of disk I/O reading the data in all the files in the transfer, so
498+this can slow things down significantly (and this is prior to any reading
499+that will be done to transfer the files that have changed).
500
501 The sending side generates its checksums while it is doing the file-system
502 scan that builds the list of the available files. The receiver generates
503@@ -525,12 +526,42 @@ its checksums when it is scanning for ch
504 file that has the same size as the corresponding sender's file: files with
505 either a changed size or a changed checksum are selected for transfer.
506
507+Starting with version 3.0.0, the sending side will look for a checksum
508+summary file and use a pre-generated checksum that it reads out of the file
509+(as long as it matches the file's size and modified time). This allows a
510+server to support the --checksum option to clients without having to
511+recompute the checksums for each client. See the bf(--checksum-updating)
512+option for a way to have rsync create/update these checksum files.
513+
514 Note that rsync always verifies that each em(transferred) file was
515 correctly reconstructed on the receiving side by checking a whole-file
516 checksum that is generated when as the file is transferred, but that
517 automatic after-the-transfer verification has nothing to do with this
518 option's before-the-transfer "Does this file need to be updated?" check.
519
520+dit(bf(--checksum-updating)) This option tells the sending side to create
521+and/or update per-directory checksum files that are used by the
522+bf(--checksum) option. The file that is updated is named .rsyncsums. If
523+pre-transfer checksums are not being computed, this option has no effect.
524+
525+The checksum files stores the computed checksum, last-known size,
526+modification time, and name for each file in the current directory. If a
527+later transfer finds that a file matches its prior size and modification
528+time, the checksum is assumed to still be correct. Otherwise it is
529+recomputed and udpated in the file.
530+
531+To avoid transferring the system's checksum files, you can use an exclude
532+(e.g. bf(--exclude=.rsyncsums)). To make this easier to type, you can use
533+a popt alias. For instance, adding the following line in your ~/.popt file
534+defines a bf(-cc) option that enables checksum updating and excludes the
535+checksum files:
536+
537+verb( rsync alias --cc --checksum-updating --exclude=.rsyncsums)
538+
539+An rsync daemon does not allow the client to control this setting, so see
540+the "checksum updating" daemon config option for information on how to make
541+a daemon maintain these checksum files.
542+
543 dit(bf(-a, --archive)) This is equivalent to bf(-rlptgoD). It is a quick
544 way of saying you want recursion and want to preserve almost
545 everything (with -H being a notable omission).
546--- old/rsyncd.conf.yo
547+++ new/rsyncd.conf.yo
548@@ -200,6 +200,20 @@ locking on this file to ensure that the
549 exceeded for the modules sharing the lock file.
550 The default is tt(/var/run/rsyncd.lock).
551
552+dit(bf(checksum updating)) This option tells rsync to update/create the
553+checksum information in the per-directory checksum files when users copy
554+files using the bf(--checksum) option. Any file that has changed since it
555+was last checksummed (or is not mentioned) has its data updated in the
556+.rsyncsums file.
557+
558+Note that this updating will occur even if the module is listed as being
559+read-only. If you want to hide these files (and you will almost always
560+want to do), add ".rsyncsums" to the module's exclude setting.
561+
562+Note also that the client's command-line option, bf(--checksum-updating),
563+has no effect on a daemon. A daemon will only update/create checksum files
564+if this config option is true.
565+
566 dit(bf(read only)) The "read only" option determines whether clients
567 will be able to upload files or not. If "read only" is true then any
568 attempted uploads will fail. If "read only" is false then uploads will