Fixes from Matt.
[rsync/rsync-patches.git] / detect-renamed.diff
CommitLineData
1fffd582
WD
1This patch adds the --detect-renamed option which makes rsync notice files
2that either (1) match in size & modify-time (plus the basename, if possible)
3or (2) match in size & checksum (when --checksum was also specified) and use
4each match as an alternate basis file to speed up the transfer.
5
6The algorithm attempts to scan the receiving-side's files in an efficient
7manner. If --delete[-before] is enabled, we'll take advantage of the
8pre-transfer delete pass to prepare any alternate-basis-file matches we
9might find. If --delete-before is not enabled, rsync does the rename scan
10during the regular file-sending scan (scanning each directory right before
11the generator starts updating files from that dir). In this latter mode,
12rsync might delay the updating of a file (if no alternate-basis match was
13yet found) until the full scan of the receiving side is complete, at which
14point any delayed files are processed.
15
16I chose to hard-link the alternate-basis files into a ".~tmp~" subdir that
17takes advantage of rsync's pre-existing partial-dir logic. This uses less
18memory than trying to keep track of the matches internally, and also allows
19any deletions or file-updates to occur normally without interfering with
20these alternate-basis discoveries.
21
03019e41 22To use this patch, run these commands for a successful build:
1fffd582 23
03019e41
WD
24 patch -p1 <patches/detect-renamed.diff
25 ./configure (optional if already run)
1fffd582
WD
26 make
27
28TODO:
29
30 We need to never return a match from fattr_find() that has a basis
31 file. This will ensure that we don't try to give a renamed file to
32 a file that can't use it, while missing out on giving it to a file
33 that could use it.
34
9bcaf4de
WD
35--- old/compat.c
36+++ new/compat.c
cdcd2137
WD
37@@ -41,6 +41,7 @@ extern int checksum_seed;
38 extern int basis_dir_cnt;
39 extern int prune_empty_dirs;
40 extern int protocol_version;
9bcaf4de 41+extern int detect_renamed;
cdcd2137
WD
42 extern int protect_args;
43 extern int preserve_uid;
44 extern int preserve_gid;
45@@ -218,7 +219,7 @@ void setup_protocol(int f_out,int f_in)
ccdb48f6
WD
46 } else if (protocol_version >= 30) {
47 if (recurse && allow_inc_recurse
37be07bf 48 && !delete_before && !delete_after && !delay_updates
ccdb48f6
WD
49- && !use_qsort && !prune_empty_dirs)
50+ && !use_qsort && !prune_empty_dirs && !detect_renamed)
9bcaf4de
WD
51 inc_recurse = 1;
52 need_messages_from_generator = 1;
53 }
1fffd582
WD
54--- old/flist.c
55+++ new/flist.c
ccdb48f6 56@@ -61,6 +61,7 @@ extern int non_perishable_cnt;
1fffd582
WD
57 extern int prune_empty_dirs;
58 extern int copy_links;
59 extern int copy_unsafe_links;
60+extern int detect_renamed;
61 extern int protocol_version;
62 extern int sanitize_paths;
03019e41 63 extern struct stats stats;
ccdb48f6 64@@ -113,6 +114,8 @@ static int64 tmp_dev, tmp_ino;
7b80cd0e 65 #endif
87a38eea 66 static char tmp_sum[MAX_DIGEST_LEN];
1fffd582
WD
67
68+struct file_list the_fattr_list;
69+
87a38eea 70 static char empty_sum[MAX_DIGEST_LEN];
a47d1f86 71 static int flist_count_offset; /* for --delete --progress */
6cbbe66d 72 static int dir_count = 0;
cdcd2137 73@@ -252,6 +255,45 @@ static int is_excluded(char *fname, int
73adde61 74 return 0;
1fffd582
WD
75 }
76
77+static int fattr_compare(struct file_struct **file1, struct file_struct **file2)
78+{
79+ struct file_struct *f1 = *file1;
80+ struct file_struct *f2 = *file2;
a47d1f86 81+ int64 len1 = F_LENGTH(f1), len2 = F_LENGTH(f2);
1fffd582
WD
82+ int diff;
83+
a47d1f86
WD
84+ if (!f1->basename || !S_ISREG(f1->mode) || !len1) {
85+ if (!f2->basename || !S_ISREG(f2->mode) || !len2)
1fffd582
WD
86+ return 0;
87+ return 1;
88+ }
a47d1f86 89+ if (!f2->basename || !S_ISREG(f2->mode) || !len2)
1fffd582
WD
90+ return -1;
91+
92+ /* Don't use diff for values that are longer than an int. */
a47d1f86
WD
93+ if (len1 != len2)
94+ return len1 < len2 ? -1 : 1;
1fffd582
WD
95+
96+ if (always_checksum) {
70891d26 97+ diff = u_memcmp(F_SUM(f1), F_SUM(f2), checksum_len);
1fffd582
WD
98+ if (diff)
99+ return diff;
100+ } else if (f1->modtime != f2->modtime)
101+ return f1->modtime < f2->modtime ? -1 : 1;
102+
103+ diff = u_strcmp(f1->basename, f2->basename);
104+ if (diff)
105+ return diff;
106+
107+ if (f1->dirname == f2->dirname)
108+ return 0;
109+ if (!f1->dirname)
110+ return -1;
111+ if (!f2->dirname)
112+ return 1;
113+ return u_strcmp(f1->dirname, f2->dirname);
114+}
115+
73adde61 116 static void send_directory(int f, struct file_list *flist,
fc068916 117 char *fbuf, int len, int flags);
1fffd582 118
6fa0767f
WD
119@@ -2154,6 +2196,25 @@ struct file_list *recv_file_list(int f)
120
121 clean_flist(flist, relative_paths);
1fffd582
WD
122
123+ if (detect_renamed) {
7bfcb297
WD
124+ int j = flist->used;
125+ the_fattr_list.used = j;
1fffd582
WD
126+ the_fattr_list.files = new_array(struct file_struct *, j);
127+ if (!the_fattr_list.files)
70891d26 128+ out_of_memory("recv_file_list");
1fffd582
WD
129+ memcpy(the_fattr_list.files, flist->files,
130+ j * sizeof (struct file_struct *));
131+ qsort(the_fattr_list.files, j,
fc068916 132+ sizeof the_fattr_list.files[0], (int (*)())fattr_compare);
1fffd582
WD
133+ the_fattr_list.low = 0;
134+ while (j-- > 0) {
135+ struct file_struct *fp = the_fattr_list.files[j];
a47d1f86 136+ if (fp->basename && S_ISREG(fp->mode) && F_LENGTH(fp))
1fffd582
WD
137+ break;
138+ }
139+ the_fattr_list.high = j;
140+ }
141+
6fa0767f
WD
142 if (protocol_version < 30) {
143 /* Recv the io_error flag */
144 if (ignore_errors)
1fffd582
WD
145--- old/generator.c
146+++ new/generator.c
ccdb48f6 147@@ -79,6 +79,7 @@ extern char *basis_dir[];
1fffd582
WD
148 extern int compare_dest;
149 extern int copy_dest;
150 extern int link_dest;
151+extern int detect_renamed;
152 extern int whole_file;
153 extern int list_only;
ccdb48f6
WD
154 extern int read_batch;
155@@ -95,6 +96,7 @@ extern char *backup_suffix;
1fffd582 156 extern int backup_suffix_len;
fc068916 157 extern struct file_list *cur_flist, *first_flist, *dir_flist;
1fffd582 158 extern struct filter_list_struct server_filter_list;
fc068916 159+extern struct file_list the_fattr_list;
73adde61
WD
160 #ifdef ICONV_OPTION
161 extern int ic_ndx;
162 #endif
ccdb48f6 163@@ -105,6 +107,7 @@ int maybe_ATTRS_REPORT = 0;
d16b5fd6 164
fc068916 165 static dev_t dev_zero;
1fffd582
WD
166 static int deletion_count = 0; /* used to implement --max-delete */
167+static int unexplored_dirs = 1;
1071853f
WD
168 static int deldelay_size = 0, deldelay_cnt = 0;
169 static char *deldelay_buf = NULL;
170 static int deldelay_fd = -1;
ccdb48f6 171@@ -114,7 +117,8 @@ static int need_retouch_dir_times;
9c85142a 172 static int need_retouch_dir_perms;
2dbc45e7 173 static const char *solo_file = NULL;
1fffd582 174
d16b5fd6
WD
175-/* For calling delete_item() and delete_dir_contents(). */
176+/* For calling delete_item(), delete_dir_contents(), and delete_in_dir(). */
177+#define DEL_NO_DELETIONS (1<<0)
87d0091c 178 #define DEL_RECURSE (1<<1) /* recurse */
d16b5fd6 179 #define DEL_DIR_IS_EMPTY (1<<2) /* internal delete_FUNCTIONS use only */
1fffd582 180
6fa0767f 181@@ -136,11 +140,121 @@ static int is_backup_file(char *fn)
1fffd582
WD
182 return k > 0 && strcmp(fn+k, backup_suffix) == 0;
183 }
184
185+/* Search for a regular file that matches either (1) the size & modified
186+ * time (plus the basename, if possible) or (2) the size & checksum. If
187+ * we find an exact match down to the dirname, return -1 because we found
188+ * an up-to-date file in the transfer, not a renamed file. */
a47d1f86 189+static int fattr_find(struct file_struct *f, char *fname)
1fffd582
WD
190+{
191+ int low = the_fattr_list.low, high = the_fattr_list.high;
192+ int mid, ok_match = -1, good_match = -1;
193+ struct file_struct *fmid;
194+ int diff;
195+
196+ while (low <= high) {
197+ mid = (low + high) / 2;
198+ fmid = the_fattr_list.files[mid];
a47d1f86
WD
199+ if (F_LENGTH(fmid) != F_LENGTH(f)) {
200+ if (F_LENGTH(fmid) < F_LENGTH(f))
1fffd582
WD
201+ low = mid + 1;
202+ else
203+ high = mid - 1;
204+ continue;
205+ }
206+ if (always_checksum) {
9bcaf4de 207+ /* We use the FLAG_FILE_SENT flag to indicate when we
a47d1f86 208+ * have computed the checksum for an entry. */
9bcaf4de 209+ if (!(f->flags & FLAG_FILE_SENT)) {
1fffd582
WD
210+ if (fmid->modtime == f->modtime
211+ && f_name_cmp(fmid, f) == 0)
212+ return -1; /* assume we can't help */
d7d6347c 213+ file_checksum(fname, F_SUM(f), F_LENGTH(f));
9bcaf4de 214+ f->flags |= FLAG_FILE_SENT;
1fffd582 215+ }
70891d26 216+ diff = u_memcmp(F_SUM(fmid), F_SUM(f), checksum_len);
1fffd582
WD
217+ if (diff) {
218+ if (diff < 0)
219+ low = mid + 1;
220+ else
221+ high = mid - 1;
222+ continue;
223+ }
224+ } else {
225+ if (fmid->modtime != f->modtime) {
226+ if (fmid->modtime < f->modtime)
227+ low = mid + 1;
228+ else
229+ high = mid - 1;
230+ continue;
231+ }
232+ }
233+ ok_match = mid;
234+ diff = u_strcmp(fmid->basename, f->basename);
235+ if (diff == 0) {
236+ good_match = mid;
237+ if (fmid->dirname == f->dirname)
238+ return -1; /* file is up-to-date */
239+ if (!fmid->dirname) {
240+ low = mid + 1;
241+ continue;
242+ }
243+ if (!f->dirname) {
244+ high = mid - 1;
245+ continue;
246+ }
247+ diff = u_strcmp(fmid->dirname, f->dirname);
248+ if (diff == 0)
249+ return -1; /* file is up-to-date */
250+ }
251+ if (diff < 0)
252+ low = mid + 1;
253+ else
254+ high = mid - 1;
255+ }
256+
257+ return good_match >= 0 ? good_match : ok_match;
258+}
259+
a47d1f86 260+static void look_for_rename(struct file_struct *file, char *fname)
1fffd582
WD
261+{
262+ struct file_struct *fp;
263+ char *partialptr, *fn;
264+ STRUCT_STAT st;
265+ int ndx;
266+
6fa0767f 267+ if (!partial_dir || (ndx = fattr_find(file, fname)) < 0)
1fffd582
WD
268+ return;
269+
270+ fp = the_fattr_list.files[ndx];
271+ fn = f_name(fp, NULL);
272+ /* We don't provide an alternate-basis file if there is a basis file. */
273+ if (link_stat(fn, &st, 0) == 0)
274+ return;
1fffd582 275+
6fa0767f
WD
276+ if (!dry_run) {
277+ if ((partialptr = partial_dir_fname(fn)) == NULL
278+ || !handle_partial_dir(partialptr, PDIR_CREATE))
279+ return;
280+ /* We only use the file if we can hard-link it into our tmp dir. */
281+ if (link(fname, partialptr) != 0) {
282+ if (errno != EEXIST)
283+ handle_partial_dir(partialptr, PDIR_DELETE);
284+ return;
1fffd582 285+ }
1fffd582
WD
286+ }
287+
6fa0767f
WD
288+ /* I think this falls into the -vv category with "%s is uptodate", etc. */
289+ if (verbose > 1)
290+ rprintf(FINFO, "found renamed: %s => %s\n", fname, fn);
1fffd582 291+}
87d0091c
WD
292+
293 /* Delete a file or directory. If DEL_RECURSE is set in the flags, this will
294 * delete recursively.
1fffd582 295 *
f813befd 296 * Note that fbuf must point to a MAXPATHLEN buffer if the mode indicates it's
15894839
WD
297 * a directory! (The buffer is used for recursion, but returned unchanged.)
298+ *
299+ * Also note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
300 */
f813befd 301 static enum delret delete_item(char *fbuf, int mode, char *replace, int flags)
15894839 302 {
6fa0767f 303@@ -162,6 +276,8 @@ static enum delret delete_item(char *fbu
15894839
WD
304 goto check_ret;
305 /* OK: try to delete the directory. */
306 }
307+ if (flags & DEL_NO_DELETIONS)
308+ return DR_SUCCESS;
309
310 if (!replace && max_delete >= 0 && ++deletion_count > max_delete)
311 return DR_AT_LIMIT;
6fa0767f 312@@ -208,6 +324,8 @@ static enum delret delete_item(char *fbu
d16b5fd6
WD
313 * its contents, otherwise just checks for content. Returns DR_SUCCESS or
314 * DR_NOT_EMPTY. Note that fname must point to a MAXPATHLEN buffer! (The
315 * buffer is used for recursion, but returned unchanged.)
1fffd582 316+ *
87d0091c 317+ * Note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
1fffd582 318 */
87d0091c 319 static enum delret delete_dir_contents(char *fname, int flags)
1fffd582 320 {
6fa0767f 321@@ -227,7 +345,9 @@ static enum delret delete_dir_contents(c
a47d1f86
WD
322 save_filters = push_local_filters(fname, dlen);
323
324 non_perishable_cnt = 0;
7e27b6c0 325+ file_extra_cnt += SUM_EXTRA_CNT;
a47d1f86 326 dirlist = get_dirlist(fname, dlen, 0);
7e27b6c0 327+ file_extra_cnt -= SUM_EXTRA_CNT;
a47d1f86
WD
328 ret = non_perishable_cnt ? DR_NOT_EMPTY : DR_SUCCESS;
329
9c85142a 330 if (!dirlist->used)
6fa0767f 331@@ -264,6 +384,8 @@ static enum delret delete_dir_contents(c
d16b5fd6
WD
332 if (S_ISDIR(fp->mode)
333 && delete_dir_contents(fname, flags | DEL_RECURSE) != DR_SUCCESS)
334 ret = DR_NOT_EMPTY;
335+ if (detect_renamed && S_ISREG(fp->mode))
a47d1f86 336+ look_for_rename(fp, fname);
d16b5fd6
WD
337 if (delete_item(fname, fp->mode, NULL, flags) != DR_SUCCESS)
338 ret = DR_NOT_EMPTY;
339 }
6fa0767f 340@@ -416,13 +538,18 @@ static void do_delayed_deletions(char *d
1fffd582
WD
341 * all the --delete-WHEN options. Note that the fbuf pointer must point to a
342 * MAXPATHLEN buffer with the name of the directory in it (the functions we
343 * call will append names onto the end, but the old dir value will be restored
344- * on exit). */
73adde61 345-static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
1fffd582
WD
346+ * on exit).
347+ *
348+ * Note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
349+ */
73adde61
WD
350+static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev,
351+ int flags)
1fffd582 352 {
1fffd582
WD
353 static int already_warned = 0;
354 struct file_list *dirlist;
355- char delbuf[MAXPATHLEN];
6fa0767f 356- int dlen, i;
1fffd582
WD
357+ char *p, delbuf[MAXPATHLEN];
358+ unsigned remainder;
6fa0767f 359+ int dlen, i, restore_dot = 0;
1fffd582 360
73adde61 361 if (!fbuf) {
6fa0767f
WD
362 change_local_filter_dir(NULL, 0, 0);
363@@ -432,21 +559,28 @@ static void delete_in_dir(char *fbuf, st
1fffd582
WD
364 if (verbose > 2)
365 rprintf(FINFO, "delete_in_dir(%s)\n", fbuf);
366
87d0091c 367+ flags |= DEL_RECURSE;
1fffd582
WD
368+
369 if (allowed_lull)
370 maybe_send_keepalive();
371
041d67b8 372 if (io_error && !ignore_errors) {
1fffd582
WD
373- if (already_warned)
374+ if (!already_warned) {
375+ rprintf(FINFO,
376+ "IO error encountered -- skipping file deletion\n");
377+ already_warned = 1;
378+ }
379+ if (!detect_renamed)
380 return;
381- rprintf(FINFO,
382- "IO error encountered -- skipping file deletion\n");
383- already_warned = 1;
384- return;
385+ flags |= DEL_NO_DELETIONS;
386 }
387
1fffd582 388 dlen = strlen(fbuf);
fc068916 389 change_local_filter_dir(fbuf, dlen, F_DEPTH(file));
1fffd582
WD
390
391+ if (detect_renamed)
392+ unexplored_dirs--;
393+
394 if (one_file_system) {
395 if (file->flags & FLAG_TOP_DIR)
fc068916 396 filesystem_dev = *fs_dev;
6fa0767f 397@@ -456,6 +590,14 @@ static void delete_in_dir(char *fbuf, st
1fffd582
WD
398
399 dirlist = get_dirlist(fbuf, dlen, 0);
400
401+ p = fbuf + dlen;
6fa0767f
WD
402+ if (dlen == 1 && *fbuf == '.') {
403+ restore_dot = 1;
404+ p = fbuf;
405+ } else if (dlen != 1 || *fbuf != '/')
1fffd582
WD
406+ *p++ = '/';
407+ remainder = MAXPATHLEN - (p - fbuf);
408+
409 /* If an item in dirlist is not found in flist, delete it
410 * from the filesystem. */
9c85142a 411 for (i = dirlist->used; i--; ) {
6fa0767f 412@@ -468,16 +610,25 @@ static void delete_in_dir(char *fbuf, st
87d0091c 413 f_name(fp, NULL));
1fffd582 414 continue;
87d0091c 415 }
1fffd582
WD
416+ if (detect_renamed && S_ISREG(fp->mode)) {
417+ strlcpy(p, fp->basename, remainder);
a47d1f86 418+ look_for_rename(fp, fbuf);
1fffd582 419+ }
73adde61 420 if (flist_find(cur_flist, fp) < 0) {
1fffd582 421 f_name(fp, delbuf);
1071853f 422- if (delete_during == 2) {
a47d1f86 423+ if (delete_during == 2 && !(flags & DEL_NO_DELETIONS)) {
1071853f
WD
424 if (!remember_delete(fp, delbuf))
425 break;
426 } else
f813befd 427- delete_item(delbuf, fp->mode, NULL, DEL_RECURSE);
1fffd582 428- }
f813befd 429+ delete_item(delbuf, fp->mode, NULL, flags);
1fffd582
WD
430+ } else if (detect_renamed && S_ISDIR(fp->mode))
431+ unexplored_dirs++;
432 }
433
6fa0767f
WD
434+ if (restore_dot)
435+ fbuf[0] = '.';
1fffd582
WD
436+ fbuf[dlen] = '\0';
437+
438 flist_free(dirlist);
439 }
440
6fa0767f 441@@ -507,9 +658,9 @@ static void do_delete_pass(void)
1fffd582
WD
442 || !S_ISDIR(st.st_mode))
443 continue;
444
73adde61
WD
445- delete_in_dir(fbuf, file, &st.st_dev);
446+ delete_in_dir(fbuf, file, &st.st_dev, 0);
1fffd582 447 }
73adde61
WD
448- delete_in_dir(NULL, NULL, &dev_zero);
449+ delete_in_dir(NULL, NULL, &dev_zero, 0);
1fffd582
WD
450
451 if (do_progress && !am_server)
452 rprintf(FINFO, " \r");
6fa0767f 453@@ -1101,6 +1252,7 @@ static void list_file_entry(struct file_
cdcd2137 454 }
1fffd582
WD
455 }
456
457+static struct bitbag *delayed_bits = NULL;
458 static int phase = 0;
ffc18846 459 static int dflt_perms;
1fffd582 460
6fa0767f 461@@ -1345,8 +1497,12 @@ static void recv_generator(char *fname,
fc068916
WD
462 }
463 }
464 else if (delete_during && f_out != -1 && !phase && dry_run < 2
9668a39c 465- && (file->flags & FLAG_CONTENT_DIR))
73adde61 466- delete_in_dir(fname, file, &real_sx.st.st_dev);
9668a39c 467+ && (file->flags & FLAG_CONTENT_DIR)) {
9a70b743 468+ if (detect_renamed && real_ret != 0)
1fffd582 469+ unexplored_dirs++;
73adde61 470+ delete_in_dir(fname, file, &real_sx.st.st_dev,
1fffd582
WD
471+ delete_during < 0 ? DEL_NO_DELETIONS : 0);
472+ }
ffc18846 473 goto cleanup;
1fffd582
WD
474 }
475
6fa0767f 476@@ -1624,8 +1780,14 @@ static void recv_generator(char *fname,
ffc18846 477 goto cleanup;
ccdb48f6 478 }
81172142 479 #endif
1fffd582
WD
480- if (stat_errno == ENOENT)
481+ if (stat_errno == ENOENT) {
482+ if (detect_renamed && unexplored_dirs > 0
a47d1f86 483+ && F_LENGTH(file)) {
1fffd582
WD
484+ bitbag_set_bit(delayed_bits, ndx);
485+ return;
486+ }
487 goto notify_others;
488+ }
489 rsyserr(FERROR, stat_errno, "recv_generator: failed to stat %s",
490 full_fname(fname));
ffc18846 491 goto cleanup;
6fa0767f 492@@ -1961,6 +2123,12 @@ void generate_files(int f_out, const cha
fc068916
WD
493 if (verbose > 2)
494 rprintf(FINFO, "generator starting pid=%ld\n", (long)getpid());
1fffd582
WD
495
496+ if (detect_renamed) {
7bfcb297 497+ delayed_bits = bitbag_create(cur_flist->used);
1fffd582
WD
498+ if (!delete_before && !delete_during)
499+ delete_during = -1;
500+ }
501+
9c85142a 502 if (delete_before && !solo_file && cur_flist->used > 0)
73adde61 503 do_delete_pass();
1071853f 504 if (delete_during == 2) {
6fa0767f 505@@ -1971,7 +2139,7 @@ void generate_files(int f_out, const cha
1071853f 506 }
1fffd582
WD
507 do_progress = 0;
508
fc068916
WD
509- if (append_mode > 0 || whole_file < 0)
510+ if (append_mode > 0 || detect_renamed || whole_file < 0)
1fffd582
WD
511 whole_file = 0;
512 if (verbose >= 2) {
513 rprintf(FINFO, "delta-transmission %s\n",
6fa0767f 514@@ -2009,7 +2177,7 @@ void generate_files(int f_out, const cha
9c85142a
WD
515 dirdev = MAKEDEV(DEV_MAJOR(devp), DEV_MINOR(devp));
516 } else
517 dirdev = MAKEDEV(0, 0);
518- delete_in_dir(f_name(fp, fbuf), fp, &dirdev);
519+ delete_in_dir(f_name(fp, fbuf), fp, &dirdev, 0);
520 }
fc068916
WD
521 }
522 }
6fa0767f 523@@ -2054,7 +2222,21 @@ void generate_files(int f_out, const cha
2dbc45e7 524 } while ((cur_flist = cur_flist->next) != NULL);
fc068916 525
2dbc45e7 526 if (delete_during)
73adde61
WD
527- delete_in_dir(NULL, NULL, &dev_zero);
528+ delete_in_dir(NULL, NULL, &dev_zero, 0);
2dbc45e7
WD
529+ if (detect_renamed) {
530+ if (delete_during < 0)
531+ delete_during = 0;
532+ detect_renamed = 0;
1fffd582 533+
2dbc45e7 534+ for (i = -1; (i = bitbag_next_bit(delayed_bits, i)) >= 0; ) {
9bcaf4de 535+ struct file_struct *file = cur_flist->files[i];
2dbc45e7
WD
536+ if (local_name)
537+ strlcpy(fbuf, local_name, sizeof fbuf);
538+ else
539+ f_name(file, fbuf);
540+ recv_generator(fbuf, file, i, itemizing, code, f_out);
541+ }
542+ }
543 phase++;
544 if (verbose > 2)
545 rprintf(FINFO, "generate_files phase=%d\n", phase);
1fffd582
WD
546--- old/options.c
547+++ new/options.c
cdcd2137 548@@ -80,6 +80,7 @@ int am_generator = 0;
a94141d9 549 int am_starting_up = 1;
1fffd582
WD
550 int relative_paths = -1;
551 int implied_dirs = 1;
552+int detect_renamed = 0;
553 int numeric_ids = 0;
554 int allow_8bit_chars = 0;
555 int force_delete = 0;
cdcd2137 556@@ -383,6 +384,7 @@ void usage(enum logcode F)
1fffd582
WD
557 rprintf(F," --modify-window=NUM compare mod-times with reduced accuracy\n");
558 rprintf(F," -T, --temp-dir=DIR create temporary files in directory DIR\n");
559 rprintf(F," -y, --fuzzy find similar file for basis if no dest file\n");
560+ rprintf(F," --detect-renamed try to find renamed files to speed up the transfer\n");
561 rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n");
562 rprintf(F," --copy-dest=DIR ... and include copies of unchanged files\n");
563 rprintf(F," --link-dest=DIR hardlink to files in DIR when unchanged\n");
ccdb48f6 564@@ -561,6 +563,7 @@ static struct poptOption long_options[]
1fffd582
WD
565 {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
566 {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
567 {"link-dest", 0, POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 },
568+ {"detect-renamed", 0, POPT_ARG_NONE, &detect_renamed, 0, 0, 0 },
569 {"fuzzy", 'y', POPT_ARG_NONE, &fuzzy_basis, 0, 0, 0 },
570 {"compress", 'z', POPT_ARG_NONE, 0, 'z', 0, 0 },
6cbbe66d 571 {"no-compress", 0, POPT_ARG_VAL, &do_compression, 0, 0, 0 },
9668a39c 572@@ -1532,7 +1535,7 @@ int parse_arguments(int *argc_p, const c
1fffd582
WD
573 inplace = 1;
574 }
575
576- if (delay_updates && !partial_dir)
577+ if ((delay_updates || detect_renamed) && !partial_dir)
578 partial_dir = tmp_partialdir;
579
580 if (inplace) {
9668a39c 581@@ -1541,6 +1544,7 @@ int parse_arguments(int *argc_p, const c
1fffd582
WD
582 snprintf(err_buf, sizeof err_buf,
583 "--%s cannot be used with --%s\n",
584 append_mode ? "append" : "inplace",
585+ detect_renamed ? "detect-renamed" :
586 delay_updates ? "delay-updates" : "partial-dir");
587 return 0;
588 }
6fa0767f
WD
589@@ -1890,6 +1894,9 @@ void server_options(char **args, int *ar
590 args[ac++] = arg;
591 }
592 }
593+ /* Both sides need to know in case this disables incremental recursion. */
594+ if (detect_renamed)
595+ args[ac++] = "--detect-renamed";
596
597 if (modify_window_set) {
598 if (asprintf(&arg, "--modify-window=%d", modify_window) < 0)
1fffd582
WD
599--- old/rsync.yo
600+++ new/rsync.yo
6fa0767f 601@@ -384,6 +384,7 @@ to the detailed description below for a
1fffd582
WD
602 --modify-window=NUM compare mod-times with reduced accuracy
603 -T, --temp-dir=DIR create temporary files in directory DIR
604 -y, --fuzzy find similar file for basis if no dest file
605+ --detect-renamed try to find renamed files to speed the xfer
606 --compare-dest=DIR also compare received files relative to DIR
607 --copy-dest=DIR ... and include copies of unchanged files
608 --link-dest=DIR hardlink to files in DIR when unchanged
6fa0767f 609@@ -1424,6 +1425,21 @@ Note that the use of the bf(--delete) op
1fffd582
WD
610 fuzzy-match files, so either use bf(--delete-after) or specify some
611 filename exclusions if you need to prevent this.
612
6fa0767f
WD
613+dit(bf(--detect-renamed)) With this option, for each new source file
614+(call it em(src/S)), rsync looks for a file em(dest/D) anywhere in the
615+destination that passes the quick check with em(src/S). If such a em(dest/D)
616+is found, rsync uses it as an alternate basis for transferring em(S). The
617+idea is that if em(src/S) was renamed from em(src/D) (as opposed to em(src/S)
618+passing the quick check with em(dest/D) by coincidence), the delta-transfer
619+algorithm will find that all the data matches between em(src/S) and em(dest/D),
620+and the transfer will be really fast.
621+
1fffd582
WD
622+By default, alternate-basis files are hard-linked into a directory named
623+".~tmp~" in each file's destination directory, but if you've specified
624+the bf(--partial-dir) option, that directory will be used instead. These
625+potential alternate-basis files will be removed as the transfer progresses.
626+This option conflicts with bf(--inplace) and bf(--append).
627+
628 dit(bf(--compare-dest=DIR)) This option instructs rsync to use em(DIR) on
629 the destination machine as an additional hierarchy to compare destination
630 files against doing transfers (if the files are missing in the destination
631--- old/util.c
632+++ new/util.c
cdcd2137 633@@ -1030,6 +1030,32 @@ int handle_partial_dir(const char *fname
1fffd582
WD
634 return 1;
635 }
636
637+/* We need to supply our own strcmp function for file list comparisons
638+ * to ensure that signed/unsigned usage is consistent between machines. */
639+int u_strcmp(const char *p1, const char *p2)
640+{
641+ for ( ; *p1; p1++, p2++) {
642+ if (*p1 != *p2)
643+ break;
644+ }
645+
646+ return (int)*(uchar*)p1 - (int)*(uchar*)p2;
647+}
648+
649+/* We need a memcmp function compares unsigned-byte values. */
650+int u_memcmp(const void *p1, const void *p2, size_t len)
651+{
652+ const uchar *u1 = p1;
653+ const uchar *u2 = p2;
654+
655+ while (len--) {
656+ if (*u1 != *u2)
657+ return (int)*u1 - (int)*u2;
658+ }
659+
660+ return 0;
661+}
662+
663 /**
664 * Determine if a symlink points outside the current directory tree.
665 * This is considered "unsafe" because e.g. when mirroring somebody