Applying uid/gid fix from trunk.
[rsync/rsync-patches.git] / detect-renamed.diff
... / ...
CommitLineData
1This patch adds the --detect-renamed option which makes rsync notice files
2that either (1) match in size & modify-time (plus the basename, if possible)
3or (2) match in size & checksum (when --checksum was also specified) and use
4each match as an alternate basis file to speed up the transfer.
5
6The algorithm attempts to scan the receiving-side's files in an efficient
7manner. If --delete[-before] is enabled, we'll take advantage of the
8pre-transfer delete pass to prepare any alternate-basis-file matches we
9might find. If --delete-before is not enabled, rsync does the rename scan
10during the regular file-sending scan (scanning each directory right before
11the generator starts updating files from that dir). In this latter mode,
12rsync might delay the updating of a file (if no alternate-basis match was
13yet found) until the full scan of the receiving side is complete, at which
14point any delayed files are processed.
15
16I chose to hard-link the alternate-basis files into a ".~tmp~" subdir that
17takes advantage of rsync's pre-existing partial-dir logic. This uses less
18memory than trying to keep track of the matches internally, and also allows
19any deletions or file-updates to occur normally without interfering with
20these alternate-basis discoveries.
21
22To use this patch, run these commands for a successful build:
23
24 patch -p1 <patches/detect-renamed.diff
25 ./configure (optional if already run)
26 make
27
28TODO:
29
30 We need to never return a match from fattr_find() that has a basis
31 file. This will ensure that we don't try to give a renamed file to
32 a file that can't use it, while missing out on giving it to a file
33 that could use it.
34
35diff --git a/compat.c b/compat.c
36--- a/compat.c
37+++ b/compat.c
38@@ -41,6 +41,7 @@ extern int checksum_seed;
39 extern int basis_dir_cnt;
40 extern int prune_empty_dirs;
41 extern int protocol_version;
42+extern int detect_renamed;
43 extern int protect_args;
44 extern int preserve_uid;
45 extern int preserve_gid;
46@@ -120,6 +121,7 @@ void set_allow_inc_recurse(void)
47 allow_inc_recurse = 0;
48 else if (!am_sender
49 && (delete_before || delete_after
50+ || detect_renamed
51 || delay_updates || prune_empty_dirs))
52 allow_inc_recurse = 0;
53 else if (am_server && !local_server
54diff --git a/flist.c b/flist.c
55--- a/flist.c
56+++ b/flist.c
57@@ -63,6 +63,7 @@ extern int non_perishable_cnt;
58 extern int prune_empty_dirs;
59 extern int copy_links;
60 extern int copy_unsafe_links;
61+extern int detect_renamed;
62 extern int protocol_version;
63 extern int sanitize_paths;
64 extern int munge_symlinks;
65@@ -121,6 +122,8 @@ static int64 tmp_dev, tmp_ino;
66 #endif
67 static char tmp_sum[MAX_DIGEST_LEN];
68
69+struct file_list the_fattr_list;
70+
71 static char empty_sum[MAX_DIGEST_LEN];
72 static int flist_count_offset; /* for --delete --progress */
73 static int dir_count = 0;
74@@ -288,6 +291,45 @@ static int is_excluded(const char *fname, int is_dir, int filter_level)
75 return 0;
76 }
77
78+static int fattr_compare(struct file_struct **file1, struct file_struct **file2)
79+{
80+ struct file_struct *f1 = *file1;
81+ struct file_struct *f2 = *file2;
82+ int64 len1 = F_LENGTH(f1), len2 = F_LENGTH(f2);
83+ int diff;
84+
85+ if (!f1->basename || !S_ISREG(f1->mode) || !len1) {
86+ if (!f2->basename || !S_ISREG(f2->mode) || !len2)
87+ return 0;
88+ return 1;
89+ }
90+ if (!f2->basename || !S_ISREG(f2->mode) || !len2)
91+ return -1;
92+
93+ /* Don't use diff for values that are longer than an int. */
94+ if (len1 != len2)
95+ return len1 < len2 ? -1 : 1;
96+
97+ if (always_checksum) {
98+ diff = u_memcmp(F_SUM(f1), F_SUM(f2), checksum_len);
99+ if (diff)
100+ return diff;
101+ } else if (f1->modtime != f2->modtime)
102+ return f1->modtime < f2->modtime ? -1 : 1;
103+
104+ diff = u_strcmp(f1->basename, f2->basename);
105+ if (diff)
106+ return diff;
107+
108+ if (f1->dirname == f2->dirname)
109+ return 0;
110+ if (!f1->dirname)
111+ return -1;
112+ if (!f2->dirname)
113+ return 1;
114+ return u_strcmp(f1->dirname, f2->dirname);
115+}
116+
117 static void send_directory(int f, struct file_list *flist,
118 char *fbuf, int len, int flags);
119
120@@ -2344,6 +2386,25 @@ struct file_list *recv_file_list(int f)
121
122 flist_sort_and_clean(flist, relative_paths);
123
124+ if (detect_renamed) {
125+ int j = flist->used;
126+ the_fattr_list.used = j;
127+ the_fattr_list.files = new_array(struct file_struct *, j);
128+ if (!the_fattr_list.files)
129+ out_of_memory("recv_file_list");
130+ memcpy(the_fattr_list.files, flist->files,
131+ j * sizeof (struct file_struct *));
132+ qsort(the_fattr_list.files, j,
133+ sizeof the_fattr_list.files[0], (int (*)())fattr_compare);
134+ the_fattr_list.low = 0;
135+ while (j-- > 0) {
136+ struct file_struct *fp = the_fattr_list.files[j];
137+ if (fp->basename && S_ISREG(fp->mode) && F_LENGTH(fp))
138+ break;
139+ }
140+ the_fattr_list.high = j;
141+ }
142+
143 if (protocol_version < 30) {
144 /* Recv the io_error flag */
145 if (ignore_errors)
146diff --git a/generator.c b/generator.c
147--- a/generator.c
148+++ b/generator.c
149@@ -80,6 +80,7 @@ extern char *basis_dir[];
150 extern int compare_dest;
151 extern int copy_dest;
152 extern int link_dest;
153+extern int detect_renamed;
154 extern int whole_file;
155 extern int list_only;
156 extern int read_batch;
157@@ -98,6 +99,7 @@ extern char *backup_suffix;
158 extern int backup_suffix_len;
159 extern struct file_list *cur_flist, *first_flist, *dir_flist;
160 extern struct filter_list_struct daemon_filter_list;
161+extern struct file_list the_fattr_list;
162
163 int ignore_perishable = 0;
164 int non_perishable_cnt = 0;
165@@ -105,6 +107,7 @@ int maybe_ATTRS_REPORT = 0;
166
167 static dev_t dev_zero;
168 static int deletion_count = 0; /* used to implement --max-delete */
169+static int unexplored_dirs = 1;
170 static int deldelay_size = 0, deldelay_cnt = 0;
171 static char *deldelay_buf = NULL;
172 static int deldelay_fd = -1;
173@@ -115,7 +118,7 @@ static int need_retouch_dir_times;
174 static int need_retouch_dir_perms;
175 static const char *solo_file = NULL;
176
177-/* For calling delete_item() and delete_dir_contents(). */
178+/* For calling delete_item(), delete_dir_contents(), and delete_in_dir(). */
179 #define DEL_NO_UID_WRITE (1<<0) /* file/dir has our uid w/o write perm */
180 #define DEL_RECURSE (1<<1) /* if dir, delete all contents */
181 #define DEL_DIR_IS_EMPTY (1<<2) /* internal delete_FUNCTIONS use only */
182@@ -124,6 +127,7 @@ static const char *solo_file = NULL;
183 #define DEL_FOR_SYMLINK (1<<5) /* making room for a replacement symlink */
184 #define DEL_FOR_DEVICE (1<<6) /* making room for a replacement device */
185 #define DEL_FOR_SPECIAL (1<<7) /* making room for a replacement special */
186+#define DEL_NO_DELETIONS (1<<9) /* just check for renames w/o deleting */
187
188 #define DEL_MAKE_ROOM (DEL_FOR_FILE|DEL_FOR_DIR|DEL_FOR_SYMLINK|DEL_FOR_DEVICE|DEL_FOR_SPECIAL)
189
190@@ -148,11 +152,121 @@ static int is_backup_file(char *fn)
191 return k > 0 && strcmp(fn+k, backup_suffix) == 0;
192 }
193
194+/* Search for a regular file that matches either (1) the size & modified
195+ * time (plus the basename, if possible) or (2) the size & checksum. If
196+ * we find an exact match down to the dirname, return -1 because we found
197+ * an up-to-date file in the transfer, not a renamed file. */
198+static int fattr_find(struct file_struct *f, char *fname)
199+{
200+ int low = the_fattr_list.low, high = the_fattr_list.high;
201+ int mid, ok_match = -1, good_match = -1;
202+ struct file_struct *fmid;
203+ int diff;
204+
205+ while (low <= high) {
206+ mid = (low + high) / 2;
207+ fmid = the_fattr_list.files[mid];
208+ if (F_LENGTH(fmid) != F_LENGTH(f)) {
209+ if (F_LENGTH(fmid) < F_LENGTH(f))
210+ low = mid + 1;
211+ else
212+ high = mid - 1;
213+ continue;
214+ }
215+ if (always_checksum) {
216+ /* We use the FLAG_FILE_SENT flag to indicate when we
217+ * have computed the checksum for an entry. */
218+ if (!(f->flags & FLAG_FILE_SENT)) {
219+ if (fmid->modtime == f->modtime
220+ && f_name_cmp(fmid, f) == 0)
221+ return -1; /* assume we can't help */
222+ file_checksum(fname, F_SUM(f), F_LENGTH(f));
223+ f->flags |= FLAG_FILE_SENT;
224+ }
225+ diff = u_memcmp(F_SUM(fmid), F_SUM(f), checksum_len);
226+ if (diff) {
227+ if (diff < 0)
228+ low = mid + 1;
229+ else
230+ high = mid - 1;
231+ continue;
232+ }
233+ } else {
234+ if (fmid->modtime != f->modtime) {
235+ if (fmid->modtime < f->modtime)
236+ low = mid + 1;
237+ else
238+ high = mid - 1;
239+ continue;
240+ }
241+ }
242+ ok_match = mid;
243+ diff = u_strcmp(fmid->basename, f->basename);
244+ if (diff == 0) {
245+ good_match = mid;
246+ if (fmid->dirname == f->dirname)
247+ return -1; /* file is up-to-date */
248+ if (!fmid->dirname) {
249+ low = mid + 1;
250+ continue;
251+ }
252+ if (!f->dirname) {
253+ high = mid - 1;
254+ continue;
255+ }
256+ diff = u_strcmp(fmid->dirname, f->dirname);
257+ if (diff == 0)
258+ return -1; /* file is up-to-date */
259+ }
260+ if (diff < 0)
261+ low = mid + 1;
262+ else
263+ high = mid - 1;
264+ }
265+
266+ return good_match >= 0 ? good_match : ok_match;
267+}
268+
269+static void look_for_rename(struct file_struct *file, char *fname)
270+{
271+ struct file_struct *fp;
272+ char *partialptr, *fn;
273+ STRUCT_STAT st;
274+ int ndx;
275+
276+ if (!partial_dir || (ndx = fattr_find(file, fname)) < 0)
277+ return;
278+
279+ fp = the_fattr_list.files[ndx];
280+ fn = f_name(fp, NULL);
281+ /* We don't provide an alternate-basis file if there is a basis file. */
282+ if (link_stat(fn, &st, 0) == 0)
283+ return;
284+
285+ if (!dry_run) {
286+ if ((partialptr = partial_dir_fname(fn)) == NULL
287+ || !handle_partial_dir(partialptr, PDIR_CREATE))
288+ return;
289+ /* We only use the file if we can hard-link it into our tmp dir. */
290+ if (link(fname, partialptr) != 0) {
291+ if (errno != EEXIST)
292+ handle_partial_dir(partialptr, PDIR_DELETE);
293+ return;
294+ }
295+ }
296+
297+ /* I think this falls into the -vv category with "%s is uptodate", etc. */
298+ if (verbose > 1)
299+ rprintf(FINFO, "found renamed: %s => %s\n", fname, fn);
300+}
301+
302 /* Delete a file or directory. If DEL_RECURSE is set in the flags, this will
303 * delete recursively.
304 *
305 * Note that fbuf must point to a MAXPATHLEN buffer if the mode indicates it's
306 * a directory! (The buffer is used for recursion, but returned unchanged.)
307+ *
308+ * Also note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
309 */
310 static enum delret delete_item(char *fbuf, uint16 mode, uint16 flags)
311 {
312@@ -186,6 +300,8 @@ static enum delret delete_item(char *fbuf, uint16 mode, uint16 flags)
313 goto check_ret;
314 /* OK: try to delete the directory. */
315 }
316+ if (flags & DEL_NO_DELETIONS)
317+ return DR_SUCCESS;
318
319 if (!(flags & DEL_MAKE_ROOM) && max_delete >= 0 && ++deletion_count > max_delete)
320 return DR_AT_LIMIT;
321@@ -241,6 +357,8 @@ static enum delret delete_item(char *fbuf, uint16 mode, uint16 flags)
322 * its contents, otherwise just checks for content. Returns DR_SUCCESS or
323 * DR_NOT_EMPTY. Note that fname must point to a MAXPATHLEN buffer! (The
324 * buffer is used for recursion, but returned unchanged.)
325+ *
326+ * Note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
327 */
328 static enum delret delete_dir_contents(char *fname, uint16 flags)
329 {
330@@ -260,7 +378,9 @@ static enum delret delete_dir_contents(char *fname, uint16 flags)
331 save_filters = push_local_filters(fname, dlen);
332
333 non_perishable_cnt = 0;
334+ file_extra_cnt += SUM_EXTRA_CNT;
335 dirlist = get_dirlist(fname, dlen, 0);
336+ file_extra_cnt -= SUM_EXTRA_CNT;
337 ret = non_perishable_cnt ? DR_NOT_EMPTY : DR_SUCCESS;
338
339 if (!dirlist->used)
340@@ -300,7 +420,8 @@ static enum delret delete_dir_contents(char *fname, uint16 flags)
341 if (S_ISDIR(fp->mode)) {
342 if (delete_dir_contents(fname, flags | DEL_RECURSE) != DR_SUCCESS)
343 ret = DR_NOT_EMPTY;
344- }
345+ } else if (detect_renamed && S_ISREG(fp->mode))
346+ look_for_rename(fp, fname);
347 if (delete_item(fname, fp->mode, flags) != DR_SUCCESS)
348 ret = DR_NOT_EMPTY;
349 }
350@@ -465,13 +586,18 @@ static void do_delayed_deletions(char *delbuf)
351 * all the --delete-WHEN options. Note that the fbuf pointer must point to a
352 * MAXPATHLEN buffer with the name of the directory in it (the functions we
353 * call will append names onto the end, but the old dir value will be restored
354- * on exit). */
355-static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
356+ * on exit).
357+ *
358+ * Note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
359+ */
360+static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev,
361+ int del_flags)
362 {
363 static int already_warned = 0;
364 struct file_list *dirlist;
365- char delbuf[MAXPATHLEN];
366- int dlen, i;
367+ char *p, delbuf[MAXPATHLEN];
368+ unsigned remainder;
369+ int dlen, i, restore_dot = 0;
370 int save_uid_ndx = uid_ndx;
371
372 if (!fbuf) {
373@@ -486,17 +612,22 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
374 maybe_send_keepalive();
375
376 if (io_error && !ignore_errors) {
377- if (already_warned)
378+ if (!already_warned) {
379+ rprintf(FINFO,
380+ "IO error encountered -- skipping file deletion\n");
381+ already_warned = 1;
382+ }
383+ if (!detect_renamed)
384 return;
385- rprintf(FINFO,
386- "IO error encountered -- skipping file deletion\n");
387- already_warned = 1;
388- return;
389+ del_flags |= DEL_NO_DELETIONS;
390 }
391
392 dlen = strlen(fbuf);
393 change_local_filter_dir(fbuf, dlen, F_DEPTH(file));
394
395+ if (detect_renamed)
396+ unexplored_dirs--;
397+
398 if (one_file_system) {
399 if (file->flags & FLAG_TOP_DIR)
400 filesystem_dev = *fs_dev;
401@@ -509,6 +640,14 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
402
403 dirlist = get_dirlist(fbuf, dlen, 0);
404
405+ p = fbuf + dlen;
406+ if (dlen == 1 && *fbuf == '.') {
407+ restore_dot = 1;
408+ p = fbuf;
409+ } else if (dlen != 1 || *fbuf != '/')
410+ *p++ = '/';
411+ remainder = MAXPATHLEN - (p - fbuf);
412+
413 /* If an item in dirlist is not found in flist, delete it
414 * from the filesystem. */
415 for (i = dirlist->used; i--; ) {
416@@ -521,6 +660,10 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
417 f_name(fp, NULL));
418 continue;
419 }
420+ if (detect_renamed && S_ISREG(fp->mode)) {
421+ strlcpy(p, fp->basename, remainder);
422+ look_for_rename(fp, fbuf);
423+ }
424 /* Here we want to match regardless of file type. Replacement
425 * of a file with one of another type is handled separately by
426 * a delete_item call with a DEL_MAKE_ROOM flag. */
427@@ -529,14 +672,19 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
428 if (!(fp->mode & S_IWUSR) && !am_root && (uid_t)F_OWNER(fp) == our_uid)
429 flags |= DEL_NO_UID_WRITE;
430 f_name(fp, delbuf);
431- if (delete_during == 2) {
432- if (!remember_delete(fp, delbuf, flags))
433+ if (delete_during == 2 && !(del_flags & DEL_NO_DELETIONS)) {
434+ if (!remember_delete(fp, delbuf, del_flags | flags))
435 break;
436 } else
437- delete_item(delbuf, fp->mode, flags);
438- }
439+ delete_item(delbuf, fp->mode, del_flags | flags);
440+ } else if (detect_renamed && S_ISDIR(fp->mode))
441+ unexplored_dirs++;
442 }
443
444+ if (restore_dot)
445+ fbuf[0] = '.';
446+ fbuf[dlen] = '\0';
447+
448 flist_free(dirlist);
449
450 if (!save_uid_ndx) {
451@@ -574,9 +722,9 @@ static void do_delete_pass(void)
452 || !S_ISDIR(st.st_mode))
453 continue;
454
455- delete_in_dir(fbuf, file, &st.st_dev);
456+ delete_in_dir(fbuf, file, &st.st_dev, 0);
457 }
458- delete_in_dir(NULL, NULL, &dev_zero);
459+ delete_in_dir(NULL, NULL, &dev_zero, 0);
460
461 if (do_progress && !am_server)
462 rprintf(FINFO, " \r");
463@@ -1229,6 +1377,7 @@ static void list_file_entry(struct file_struct *f)
464 }
465 }
466
467+static struct bitbag *delayed_bits = NULL;
468 static int phase = 0;
469 static int dflt_perms;
470
471@@ -1505,9 +1654,12 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
472 }
473 else if (delete_during && f_out != -1 && !phase
474 && !(file->flags & FLAG_MISSING_DIR)) {
475- if (file->flags & FLAG_CONTENT_DIR)
476- delete_in_dir(fname, file, &real_sx.st.st_dev);
477- else
478+ if (file->flags & FLAG_CONTENT_DIR) {
479+ if (detect_renamed && real_ret != 0)
480+ unexplored_dirs++;
481+ delete_in_dir(fname, file, &real_sx.st.st_dev,
482+ delete_during < 0 ? DEL_NO_DELETIONS : 0);
483+ } else
484 change_local_filter_dir(fname, strlen(fname), F_DEPTH(file));
485 }
486 goto cleanup;
487@@ -1785,8 +1937,14 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
488 goto cleanup;
489 }
490 #endif
491- if (stat_errno == ENOENT)
492+ if (stat_errno == ENOENT) {
493+ if (detect_renamed && unexplored_dirs > 0
494+ && F_LENGTH(file)) {
495+ bitbag_set_bit(delayed_bits, ndx);
496+ return;
497+ }
498 goto notify_others;
499+ }
500 rsyserr(FERROR_XFER, stat_errno, "recv_generator: failed to stat %s",
501 full_fname(fname));
502 goto cleanup;
503@@ -2187,6 +2345,12 @@ void generate_files(int f_out, const char *local_name)
504 if (verbose > 2)
505 rprintf(FINFO, "generator starting pid=%ld\n", (long)getpid());
506
507+ if (detect_renamed) {
508+ delayed_bits = bitbag_create(cur_flist->used);
509+ if (!delete_before && !delete_during)
510+ delete_during = -1;
511+ }
512+
513 if (delete_before && !solo_file && cur_flist->used > 0)
514 do_delete_pass();
515 if (delete_during == 2) {
516@@ -2197,7 +2361,7 @@ void generate_files(int f_out, const char *local_name)
517 }
518 do_progress = 0;
519
520- if (append_mode > 0 || whole_file < 0)
521+ if (append_mode > 0 || detect_renamed || whole_file < 0)
522 whole_file = 0;
523 if (verbose >= 2) {
524 rprintf(FINFO, "delta-transmission %s\n",
525@@ -2239,7 +2403,7 @@ void generate_files(int f_out, const char *local_name)
526 dirdev = MAKEDEV(DEV_MAJOR(devp), DEV_MINOR(devp));
527 } else
528 dirdev = MAKEDEV(0, 0);
529- delete_in_dir(fbuf, fp, &dirdev);
530+ delete_in_dir(fbuf, fp, &dirdev, 0);
531 } else
532 change_local_filter_dir(fbuf, strlen(fbuf), F_DEPTH(fp));
533 }
534@@ -2289,7 +2453,21 @@ void generate_files(int f_out, const char *local_name)
535 write_ndx(f_out, NDX_DONE);
536
537 if (delete_during)
538- delete_in_dir(NULL, NULL, &dev_zero);
539+ delete_in_dir(NULL, NULL, &dev_zero, 0);
540+ if (detect_renamed) {
541+ if (delete_during < 0)
542+ delete_during = 0;
543+ detect_renamed = 0;
544+
545+ for (i = -1; (i = bitbag_next_bit(delayed_bits, i)) >= 0; ) {
546+ struct file_struct *file = cur_flist->files[i];
547+ if (local_name)
548+ strlcpy(fbuf, local_name, sizeof fbuf);
549+ else
550+ f_name(file, fbuf);
551+ recv_generator(fbuf, file, i, itemizing, code, f_out);
552+ }
553+ }
554 phase++;
555 if (verbose > 2)
556 rprintf(FINFO, "generate_files phase=%d\n", phase);
557diff --git a/options.c b/options.c
558--- a/options.c
559+++ b/options.c
560@@ -82,6 +82,7 @@ int am_generator = 0;
561 int am_starting_up = 1;
562 int relative_paths = -1;
563 int implied_dirs = 1;
564+int detect_renamed = 0;
565 int numeric_ids = 0;
566 int allow_8bit_chars = 0;
567 int force_delete = 0;
568@@ -392,6 +393,7 @@ void usage(enum logcode F)
569 rprintf(F," --modify-window=NUM compare mod-times with reduced accuracy\n");
570 rprintf(F," -T, --temp-dir=DIR create temporary files in directory DIR\n");
571 rprintf(F," -y, --fuzzy find similar file for basis if no dest file\n");
572+ rprintf(F," --detect-renamed try to find renamed files to speed up the transfer\n");
573 rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n");
574 rprintf(F," --copy-dest=DIR ... and include copies of unchanged files\n");
575 rprintf(F," --link-dest=DIR hardlink to files in DIR when unchanged\n");
576@@ -579,6 +581,7 @@ static struct poptOption long_options[] = {
577 {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
578 {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
579 {"link-dest", 0, POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 },
580+ {"detect-renamed", 0, POPT_ARG_NONE, &detect_renamed, 0, 0, 0 },
581 {"fuzzy", 'y', POPT_ARG_VAL, &fuzzy_basis, 1, 0, 0 },
582 {"no-fuzzy", 0, POPT_ARG_VAL, &fuzzy_basis, 0, 0, 0 },
583 {"no-y", 0, POPT_ARG_VAL, &fuzzy_basis, 0, 0, 0 },
584@@ -1591,7 +1594,7 @@ int parse_arguments(int *argc_p, const char ***argv_p)
585 inplace = 1;
586 }
587
588- if (delay_updates && !partial_dir)
589+ if ((delay_updates || detect_renamed) && !partial_dir)
590 partial_dir = tmp_partialdir;
591
592 if (inplace) {
593@@ -1600,6 +1603,7 @@ int parse_arguments(int *argc_p, const char ***argv_p)
594 snprintf(err_buf, sizeof err_buf,
595 "--%s cannot be used with --%s\n",
596 append_mode ? "append" : "inplace",
597+ detect_renamed ? "detect-renamed" :
598 delay_updates ? "delay-updates" : "partial-dir");
599 return 0;
600 }
601@@ -1961,6 +1965,8 @@ void server_options(char **args, int *argc_p)
602 args[ac++] = "--super";
603 if (size_only)
604 args[ac++] = "--size-only";
605+ if (detect_renamed)
606+ args[ac++] = "--detect-renamed";
607 } else {
608 if (skip_compress) {
609 if (asprintf(&arg, "--skip-compress=%s", skip_compress) < 0)
610diff --git a/rsync.yo b/rsync.yo
611--- a/rsync.yo
612+++ b/rsync.yo
613@@ -389,6 +389,7 @@ to the detailed description below for a complete description. verb(
614 --modify-window=NUM compare mod-times with reduced accuracy
615 -T, --temp-dir=DIR create temporary files in directory DIR
616 -y, --fuzzy find similar file for basis if no dest file
617+ --detect-renamed try to find renamed files to speed the xfer
618 --compare-dest=DIR also compare received files relative to DIR
619 --copy-dest=DIR ... and include copies of unchanged files
620 --link-dest=DIR hardlink to files in DIR when unchanged
621@@ -1491,6 +1492,21 @@ Note that the use of the bf(--delete) option might get rid of any potential
622 fuzzy-match files, so either use bf(--delete-after) or specify some
623 filename exclusions if you need to prevent this.
624
625+dit(bf(--detect-renamed)) With this option, for each new source file
626+(call it em(src/S)), rsync looks for a file em(dest/D) anywhere in the
627+destination that passes the quick check with em(src/S). If such a em(dest/D)
628+is found, rsync uses it as an alternate basis for transferring em(S). The
629+idea is that if em(src/S) was renamed from em(src/D) (as opposed to em(src/S)
630+passing the quick check with em(dest/D) by coincidence), the delta-transfer
631+algorithm will find that all the data matches between em(src/S) and em(dest/D),
632+and the transfer will be really fast.
633+
634+By default, alternate-basis files are hard-linked into a directory named
635+".~tmp~" in each file's destination directory, but if you've specified
636+the bf(--partial-dir) option, that directory will be used instead. These
637+potential alternate-basis files will be removed as the transfer progresses.
638+This option conflicts with bf(--inplace) and bf(--append).
639+
640 dit(bf(--compare-dest=DIR)) This option instructs rsync to use em(DIR) on
641 the destination machine as an additional hierarchy to compare destination
642 files against doing transfers (if the files are missing in the destination
643diff --git a/util.c b/util.c
644--- a/util.c
645+++ b/util.c
646@@ -1168,6 +1168,32 @@ int handle_partial_dir(const char *fname, int create)
647 return 1;
648 }
649
650+/* We need to supply our own strcmp function for file list comparisons
651+ * to ensure that signed/unsigned usage is consistent between machines. */
652+int u_strcmp(const char *p1, const char *p2)
653+{
654+ for ( ; *p1; p1++, p2++) {
655+ if (*p1 != *p2)
656+ break;
657+ }
658+
659+ return (int)*(uchar*)p1 - (int)*(uchar*)p2;
660+}
661+
662+/* We need a memcmp function compares unsigned-byte values. */
663+int u_memcmp(const void *p1, const void *p2, size_t len)
664+{
665+ const uchar *u1 = p1;
666+ const uchar *u2 = p2;
667+
668+ while (len--) {
669+ if (*u1 != *u2)
670+ return (int)*u1 - (int)*u2;
671+ }
672+
673+ return 0;
674+}
675+
676 /**
677 * Determine if a symlink points outside the current directory tree.
678 * This is considered "unsafe" because e.g. when mirroring somebody