The patches for 3.0.0pre7.
[rsync/rsync-patches.git] / detect-renamed.diff
CommitLineData
1fffd582
WD
1This patch adds the --detect-renamed option which makes rsync notice files
2that either (1) match in size & modify-time (plus the basename, if possible)
3or (2) match in size & checksum (when --checksum was also specified) and use
4each match as an alternate basis file to speed up the transfer.
5
6The algorithm attempts to scan the receiving-side's files in an efficient
7manner. If --delete[-before] is enabled, we'll take advantage of the
8pre-transfer delete pass to prepare any alternate-basis-file matches we
9might find. If --delete-before is not enabled, rsync does the rename scan
10during the regular file-sending scan (scanning each directory right before
11the generator starts updating files from that dir). In this latter mode,
12rsync might delay the updating of a file (if no alternate-basis match was
13yet found) until the full scan of the receiving side is complete, at which
14point any delayed files are processed.
15
16I chose to hard-link the alternate-basis files into a ".~tmp~" subdir that
17takes advantage of rsync's pre-existing partial-dir logic. This uses less
18memory than trying to keep track of the matches internally, and also allows
19any deletions or file-updates to occur normally without interfering with
20these alternate-basis discoveries.
21
03019e41 22To use this patch, run these commands for a successful build:
1fffd582 23
03019e41
WD
24 patch -p1 <patches/detect-renamed.diff
25 ./configure (optional if already run)
1fffd582
WD
26 make
27
28TODO:
29
30 We need to never return a match from fattr_find() that has a basis
31 file. This will ensure that we don't try to give a renamed file to
32 a file that can't use it, while missing out on giving it to a file
33 that could use it.
34
cc3e685d
WD
35diff --git a/compat.c b/compat.c
36--- a/compat.c
37+++ b/compat.c
cdcd2137
WD
38@@ -41,6 +41,7 @@ extern int checksum_seed;
39 extern int basis_dir_cnt;
40 extern int prune_empty_dirs;
41 extern int protocol_version;
9bcaf4de 42+extern int detect_renamed;
cdcd2137
WD
43 extern int protect_args;
44 extern int preserve_uid;
45 extern int preserve_gid;
d4dd2dd5 46@@ -107,6 +108,7 @@ void set_allow_inc_recurse(void)
9aab301c
WD
47 allow_inc_recurse = 0;
48 else if (!am_sender
49 && (delete_before || delete_after
50+ || detect_renamed
51 || delay_updates || prune_empty_dirs))
51bc0e89
WD
52 allow_inc_recurse = 0;
53 else if (am_server && !local_server
cc3e685d
WD
54diff --git a/flist.c b/flist.c
55--- a/flist.c
56+++ b/flist.c
ccdb48f6 57@@ -61,6 +61,7 @@ extern int non_perishable_cnt;
1fffd582
WD
58 extern int prune_empty_dirs;
59 extern int copy_links;
60 extern int copy_unsafe_links;
61+extern int detect_renamed;
62 extern int protocol_version;
63 extern int sanitize_paths;
cc3e685d
WD
64 extern int munge_symlinks;
65@@ -118,6 +119,8 @@ static int64 tmp_dev, tmp_ino;
7b80cd0e 66 #endif
87a38eea 67 static char tmp_sum[MAX_DIGEST_LEN];
1fffd582
WD
68
69+struct file_list the_fattr_list;
70+
87a38eea 71 static char empty_sum[MAX_DIGEST_LEN];
a47d1f86 72 static int flist_count_offset; /* for --delete --progress */
6cbbe66d 73 static int dir_count = 0;
f2863bc0 74@@ -261,6 +264,45 @@ static int is_excluded(char *fname, int is_dir, int filter_level)
73adde61 75 return 0;
1fffd582
WD
76 }
77
78+static int fattr_compare(struct file_struct **file1, struct file_struct **file2)
79+{
80+ struct file_struct *f1 = *file1;
81+ struct file_struct *f2 = *file2;
a47d1f86 82+ int64 len1 = F_LENGTH(f1), len2 = F_LENGTH(f2);
1fffd582
WD
83+ int diff;
84+
a47d1f86
WD
85+ if (!f1->basename || !S_ISREG(f1->mode) || !len1) {
86+ if (!f2->basename || !S_ISREG(f2->mode) || !len2)
1fffd582
WD
87+ return 0;
88+ return 1;
89+ }
a47d1f86 90+ if (!f2->basename || !S_ISREG(f2->mode) || !len2)
1fffd582
WD
91+ return -1;
92+
93+ /* Don't use diff for values that are longer than an int. */
a47d1f86
WD
94+ if (len1 != len2)
95+ return len1 < len2 ? -1 : 1;
1fffd582
WD
96+
97+ if (always_checksum) {
70891d26 98+ diff = u_memcmp(F_SUM(f1), F_SUM(f2), checksum_len);
1fffd582
WD
99+ if (diff)
100+ return diff;
101+ } else if (f1->modtime != f2->modtime)
102+ return f1->modtime < f2->modtime ? -1 : 1;
103+
104+ diff = u_strcmp(f1->basename, f2->basename);
105+ if (diff)
106+ return diff;
107+
108+ if (f1->dirname == f2->dirname)
109+ return 0;
110+ if (!f1->dirname)
111+ return -1;
112+ if (!f2->dirname)
113+ return 1;
114+ return u_strcmp(f1->dirname, f2->dirname);
115+}
116+
73adde61 117 static void send_directory(int f, struct file_list *flist,
fc068916 118 char *fbuf, int len, int flags);
1fffd582 119
f2863bc0 120@@ -2167,6 +2209,25 @@ struct file_list *recv_file_list(int f)
6fa0767f 121
f2863bc0 122 flist_sort_and_clean(flist, relative_paths);
1fffd582
WD
123
124+ if (detect_renamed) {
7bfcb297
WD
125+ int j = flist->used;
126+ the_fattr_list.used = j;
1fffd582
WD
127+ the_fattr_list.files = new_array(struct file_struct *, j);
128+ if (!the_fattr_list.files)
70891d26 129+ out_of_memory("recv_file_list");
1fffd582
WD
130+ memcpy(the_fattr_list.files, flist->files,
131+ j * sizeof (struct file_struct *));
132+ qsort(the_fattr_list.files, j,
fc068916 133+ sizeof the_fattr_list.files[0], (int (*)())fattr_compare);
1fffd582
WD
134+ the_fattr_list.low = 0;
135+ while (j-- > 0) {
136+ struct file_struct *fp = the_fattr_list.files[j];
a47d1f86 137+ if (fp->basename && S_ISREG(fp->mode) && F_LENGTH(fp))
1fffd582
WD
138+ break;
139+ }
140+ the_fattr_list.high = j;
141+ }
142+
6fa0767f
WD
143 if (protocol_version < 30) {
144 /* Recv the io_error flag */
145 if (ignore_errors)
cc3e685d
WD
146diff --git a/generator.c b/generator.c
147--- a/generator.c
148+++ b/generator.c
ccdb48f6 149@@ -79,6 +79,7 @@ extern char *basis_dir[];
1fffd582
WD
150 extern int compare_dest;
151 extern int copy_dest;
152 extern int link_dest;
153+extern int detect_renamed;
154 extern int whole_file;
155 extern int list_only;
ccdb48f6 156 extern int read_batch;
d4dd2dd5 157@@ -96,6 +97,7 @@ extern char *backup_suffix;
1fffd582 158 extern int backup_suffix_len;
fc068916 159 extern struct file_list *cur_flist, *first_flist, *dir_flist;
1fffd582 160 extern struct filter_list_struct server_filter_list;
fc068916 161+extern struct file_list the_fattr_list;
d4dd2dd5
WD
162
163 int ignore_perishable = 0;
164 int non_perishable_cnt = 0;
165@@ -103,6 +105,7 @@ int maybe_ATTRS_REPORT = 0;
d16b5fd6 166
fc068916 167 static dev_t dev_zero;
1fffd582
WD
168 static int deletion_count = 0; /* used to implement --max-delete */
169+static int unexplored_dirs = 1;
1071853f
WD
170 static int deldelay_size = 0, deldelay_cnt = 0;
171 static char *deldelay_buf = NULL;
172 static int deldelay_fd = -1;
d4dd2dd5 173@@ -112,7 +115,8 @@ static int need_retouch_dir_times;
9c85142a 174 static int need_retouch_dir_perms;
2dbc45e7 175 static const char *solo_file = NULL;
1fffd582 176
d16b5fd6
WD
177-/* For calling delete_item() and delete_dir_contents(). */
178+/* For calling delete_item(), delete_dir_contents(), and delete_in_dir(). */
179+#define DEL_NO_DELETIONS (1<<0)
87d0091c 180 #define DEL_RECURSE (1<<1) /* recurse */
d16b5fd6 181 #define DEL_DIR_IS_EMPTY (1<<2) /* internal delete_FUNCTIONS use only */
1fffd582 182
d4dd2dd5 183@@ -134,11 +138,121 @@ static int is_backup_file(char *fn)
1fffd582
WD
184 return k > 0 && strcmp(fn+k, backup_suffix) == 0;
185 }
186
187+/* Search for a regular file that matches either (1) the size & modified
188+ * time (plus the basename, if possible) or (2) the size & checksum. If
189+ * we find an exact match down to the dirname, return -1 because we found
190+ * an up-to-date file in the transfer, not a renamed file. */
a47d1f86 191+static int fattr_find(struct file_struct *f, char *fname)
1fffd582
WD
192+{
193+ int low = the_fattr_list.low, high = the_fattr_list.high;
194+ int mid, ok_match = -1, good_match = -1;
195+ struct file_struct *fmid;
196+ int diff;
197+
198+ while (low <= high) {
199+ mid = (low + high) / 2;
200+ fmid = the_fattr_list.files[mid];
a47d1f86
WD
201+ if (F_LENGTH(fmid) != F_LENGTH(f)) {
202+ if (F_LENGTH(fmid) < F_LENGTH(f))
1fffd582
WD
203+ low = mid + 1;
204+ else
205+ high = mid - 1;
206+ continue;
207+ }
208+ if (always_checksum) {
9bcaf4de 209+ /* We use the FLAG_FILE_SENT flag to indicate when we
a47d1f86 210+ * have computed the checksum for an entry. */
9bcaf4de 211+ if (!(f->flags & FLAG_FILE_SENT)) {
1fffd582
WD
212+ if (fmid->modtime == f->modtime
213+ && f_name_cmp(fmid, f) == 0)
214+ return -1; /* assume we can't help */
d7d6347c 215+ file_checksum(fname, F_SUM(f), F_LENGTH(f));
9bcaf4de 216+ f->flags |= FLAG_FILE_SENT;
1fffd582 217+ }
70891d26 218+ diff = u_memcmp(F_SUM(fmid), F_SUM(f), checksum_len);
1fffd582
WD
219+ if (diff) {
220+ if (diff < 0)
221+ low = mid + 1;
222+ else
223+ high = mid - 1;
224+ continue;
225+ }
226+ } else {
227+ if (fmid->modtime != f->modtime) {
228+ if (fmid->modtime < f->modtime)
229+ low = mid + 1;
230+ else
231+ high = mid - 1;
232+ continue;
233+ }
234+ }
235+ ok_match = mid;
236+ diff = u_strcmp(fmid->basename, f->basename);
237+ if (diff == 0) {
238+ good_match = mid;
239+ if (fmid->dirname == f->dirname)
240+ return -1; /* file is up-to-date */
241+ if (!fmid->dirname) {
242+ low = mid + 1;
243+ continue;
244+ }
245+ if (!f->dirname) {
246+ high = mid - 1;
247+ continue;
248+ }
249+ diff = u_strcmp(fmid->dirname, f->dirname);
250+ if (diff == 0)
251+ return -1; /* file is up-to-date */
252+ }
253+ if (diff < 0)
254+ low = mid + 1;
255+ else
256+ high = mid - 1;
257+ }
258+
259+ return good_match >= 0 ? good_match : ok_match;
260+}
261+
a47d1f86 262+static void look_for_rename(struct file_struct *file, char *fname)
1fffd582
WD
263+{
264+ struct file_struct *fp;
265+ char *partialptr, *fn;
266+ STRUCT_STAT st;
267+ int ndx;
268+
6fa0767f 269+ if (!partial_dir || (ndx = fattr_find(file, fname)) < 0)
1fffd582
WD
270+ return;
271+
272+ fp = the_fattr_list.files[ndx];
273+ fn = f_name(fp, NULL);
274+ /* We don't provide an alternate-basis file if there is a basis file. */
275+ if (link_stat(fn, &st, 0) == 0)
276+ return;
1fffd582 277+
6fa0767f
WD
278+ if (!dry_run) {
279+ if ((partialptr = partial_dir_fname(fn)) == NULL
280+ || !handle_partial_dir(partialptr, PDIR_CREATE))
281+ return;
282+ /* We only use the file if we can hard-link it into our tmp dir. */
283+ if (link(fname, partialptr) != 0) {
284+ if (errno != EEXIST)
285+ handle_partial_dir(partialptr, PDIR_DELETE);
286+ return;
1fffd582 287+ }
1fffd582
WD
288+ }
289+
6fa0767f
WD
290+ /* I think this falls into the -vv category with "%s is uptodate", etc. */
291+ if (verbose > 1)
292+ rprintf(FINFO, "found renamed: %s => %s\n", fname, fn);
1fffd582 293+}
87d0091c
WD
294+
295 /* Delete a file or directory. If DEL_RECURSE is set in the flags, this will
296 * delete recursively.
1fffd582 297 *
f813befd 298 * Note that fbuf must point to a MAXPATHLEN buffer if the mode indicates it's
15894839
WD
299 * a directory! (The buffer is used for recursion, but returned unchanged.)
300+ *
301+ * Also note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
302 */
f813befd 303 static enum delret delete_item(char *fbuf, int mode, char *replace, int flags)
15894839 304 {
cc3e685d 305@@ -160,6 +274,8 @@ static enum delret delete_item(char *fbuf, int mode, char *replace, int flags)
15894839
WD
306 goto check_ret;
307 /* OK: try to delete the directory. */
308 }
309+ if (flags & DEL_NO_DELETIONS)
310+ return DR_SUCCESS;
311
312 if (!replace && max_delete >= 0 && ++deletion_count > max_delete)
313 return DR_AT_LIMIT;
cc3e685d 314@@ -206,6 +322,8 @@ static enum delret delete_item(char *fbuf, int mode, char *replace, int flags)
d16b5fd6
WD
315 * its contents, otherwise just checks for content. Returns DR_SUCCESS or
316 * DR_NOT_EMPTY. Note that fname must point to a MAXPATHLEN buffer! (The
317 * buffer is used for recursion, but returned unchanged.)
1fffd582 318+ *
87d0091c 319+ * Note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
1fffd582 320 */
87d0091c 321 static enum delret delete_dir_contents(char *fname, int flags)
1fffd582 322 {
cc3e685d 323@@ -225,7 +343,9 @@ static enum delret delete_dir_contents(char *fname, int flags)
a47d1f86
WD
324 save_filters = push_local_filters(fname, dlen);
325
326 non_perishable_cnt = 0;
7e27b6c0 327+ file_extra_cnt += SUM_EXTRA_CNT;
a47d1f86 328 dirlist = get_dirlist(fname, dlen, 0);
7e27b6c0 329+ file_extra_cnt -= SUM_EXTRA_CNT;
a47d1f86
WD
330 ret = non_perishable_cnt ? DR_NOT_EMPTY : DR_SUCCESS;
331
9c85142a 332 if (!dirlist->used)
cc3e685d 333@@ -262,6 +382,8 @@ static enum delret delete_dir_contents(char *fname, int flags)
d16b5fd6
WD
334 if (S_ISDIR(fp->mode)
335 && delete_dir_contents(fname, flags | DEL_RECURSE) != DR_SUCCESS)
336 ret = DR_NOT_EMPTY;
337+ if (detect_renamed && S_ISREG(fp->mode))
a47d1f86 338+ look_for_rename(fp, fname);
d16b5fd6
WD
339 if (delete_item(fname, fp->mode, NULL, flags) != DR_SUCCESS)
340 ret = DR_NOT_EMPTY;
341 }
cc3e685d 342@@ -414,13 +536,18 @@ static void do_delayed_deletions(char *delbuf)
1fffd582
WD
343 * all the --delete-WHEN options. Note that the fbuf pointer must point to a
344 * MAXPATHLEN buffer with the name of the directory in it (the functions we
345 * call will append names onto the end, but the old dir value will be restored
346- * on exit). */
73adde61 347-static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
1fffd582
WD
348+ * on exit).
349+ *
350+ * Note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
351+ */
73adde61
WD
352+static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev,
353+ int flags)
1fffd582 354 {
1fffd582
WD
355 static int already_warned = 0;
356 struct file_list *dirlist;
357- char delbuf[MAXPATHLEN];
6fa0767f 358- int dlen, i;
1fffd582
WD
359+ char *p, delbuf[MAXPATHLEN];
360+ unsigned remainder;
6fa0767f 361+ int dlen, i, restore_dot = 0;
1fffd582 362
73adde61 363 if (!fbuf) {
6fa0767f 364 change_local_filter_dir(NULL, 0, 0);
cc3e685d 365@@ -430,21 +557,28 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
1fffd582
WD
366 if (verbose > 2)
367 rprintf(FINFO, "delete_in_dir(%s)\n", fbuf);
368
87d0091c 369+ flags |= DEL_RECURSE;
1fffd582
WD
370+
371 if (allowed_lull)
372 maybe_send_keepalive();
373
041d67b8 374 if (io_error && !ignore_errors) {
1fffd582
WD
375- if (already_warned)
376+ if (!already_warned) {
377+ rprintf(FINFO,
378+ "IO error encountered -- skipping file deletion\n");
379+ already_warned = 1;
380+ }
381+ if (!detect_renamed)
382 return;
383- rprintf(FINFO,
384- "IO error encountered -- skipping file deletion\n");
385- already_warned = 1;
386- return;
387+ flags |= DEL_NO_DELETIONS;
388 }
389
1fffd582 390 dlen = strlen(fbuf);
fc068916 391 change_local_filter_dir(fbuf, dlen, F_DEPTH(file));
1fffd582
WD
392
393+ if (detect_renamed)
394+ unexplored_dirs--;
395+
396 if (one_file_system) {
397 if (file->flags & FLAG_TOP_DIR)
fc068916 398 filesystem_dev = *fs_dev;
cc3e685d 399@@ -454,6 +588,14 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
1fffd582
WD
400
401 dirlist = get_dirlist(fbuf, dlen, 0);
402
403+ p = fbuf + dlen;
6fa0767f
WD
404+ if (dlen == 1 && *fbuf == '.') {
405+ restore_dot = 1;
406+ p = fbuf;
407+ } else if (dlen != 1 || *fbuf != '/')
1fffd582
WD
408+ *p++ = '/';
409+ remainder = MAXPATHLEN - (p - fbuf);
410+
411 /* If an item in dirlist is not found in flist, delete it
412 * from the filesystem. */
9c85142a 413 for (i = dirlist->used; i--; ) {
cc3e685d 414@@ -466,16 +608,25 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
87d0091c 415 f_name(fp, NULL));
1fffd582 416 continue;
87d0091c 417 }
1fffd582
WD
418+ if (detect_renamed && S_ISREG(fp->mode)) {
419+ strlcpy(p, fp->basename, remainder);
a47d1f86 420+ look_for_rename(fp, fbuf);
1fffd582 421+ }
73adde61 422 if (flist_find(cur_flist, fp) < 0) {
1fffd582 423 f_name(fp, delbuf);
1071853f 424- if (delete_during == 2) {
a47d1f86 425+ if (delete_during == 2 && !(flags & DEL_NO_DELETIONS)) {
1071853f
WD
426 if (!remember_delete(fp, delbuf))
427 break;
428 } else
f813befd 429- delete_item(delbuf, fp->mode, NULL, DEL_RECURSE);
1fffd582 430- }
f813befd 431+ delete_item(delbuf, fp->mode, NULL, flags);
1fffd582
WD
432+ } else if (detect_renamed && S_ISDIR(fp->mode))
433+ unexplored_dirs++;
434 }
435
6fa0767f
WD
436+ if (restore_dot)
437+ fbuf[0] = '.';
1fffd582
WD
438+ fbuf[dlen] = '\0';
439+
440 flist_free(dirlist);
441 }
442
d4dd2dd5 443@@ -505,9 +656,9 @@ static void do_delete_pass(void)
1fffd582
WD
444 || !S_ISDIR(st.st_mode))
445 continue;
446
73adde61
WD
447- delete_in_dir(fbuf, file, &st.st_dev);
448+ delete_in_dir(fbuf, file, &st.st_dev, 0);
1fffd582 449 }
73adde61
WD
450- delete_in_dir(NULL, NULL, &dev_zero);
451+ delete_in_dir(NULL, NULL, &dev_zero, 0);
1fffd582
WD
452
453 if (do_progress && !am_server)
454 rprintf(FINFO, " \r");
cc3e685d 455@@ -1139,6 +1290,7 @@ static void list_file_entry(struct file_struct *f)
cdcd2137 456 }
1fffd582
WD
457 }
458
459+static struct bitbag *delayed_bits = NULL;
460 static int phase = 0;
ffc18846 461 static int dflt_perms;
1fffd582 462
cc3e685d 463@@ -1383,8 +1535,12 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
fc068916
WD
464 }
465 }
466 else if (delete_during && f_out != -1 && !phase && dry_run < 2
9668a39c 467- && (file->flags & FLAG_CONTENT_DIR))
73adde61 468- delete_in_dir(fname, file, &real_sx.st.st_dev);
9668a39c 469+ && (file->flags & FLAG_CONTENT_DIR)) {
9a70b743 470+ if (detect_renamed && real_ret != 0)
1fffd582 471+ unexplored_dirs++;
73adde61 472+ delete_in_dir(fname, file, &real_sx.st.st_dev,
1fffd582
WD
473+ delete_during < 0 ? DEL_NO_DELETIONS : 0);
474+ }
ffc18846 475 goto cleanup;
1fffd582
WD
476 }
477
cc3e685d 478@@ -1662,8 +1818,14 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
ffc18846 479 goto cleanup;
ccdb48f6 480 }
81172142 481 #endif
1fffd582
WD
482- if (stat_errno == ENOENT)
483+ if (stat_errno == ENOENT) {
484+ if (detect_renamed && unexplored_dirs > 0
a47d1f86 485+ && F_LENGTH(file)) {
1fffd582
WD
486+ bitbag_set_bit(delayed_bits, ndx);
487+ return;
488+ }
489 goto notify_others;
490+ }
cc3e685d 491 rsyserr(FERROR_XFER, stat_errno, "recv_generator: failed to stat %s",
1fffd582 492 full_fname(fname));
ffc18846 493 goto cleanup;
cc3e685d 494@@ -2000,6 +2162,12 @@ void generate_files(int f_out, const char *local_name)
fc068916
WD
495 if (verbose > 2)
496 rprintf(FINFO, "generator starting pid=%ld\n", (long)getpid());
1fffd582
WD
497
498+ if (detect_renamed) {
7bfcb297 499+ delayed_bits = bitbag_create(cur_flist->used);
1fffd582
WD
500+ if (!delete_before && !delete_during)
501+ delete_during = -1;
502+ }
503+
9c85142a 504 if (delete_before && !solo_file && cur_flist->used > 0)
73adde61 505 do_delete_pass();
1071853f 506 if (delete_during == 2) {
cc3e685d 507@@ -2010,7 +2178,7 @@ void generate_files(int f_out, const char *local_name)
1071853f 508 }
1fffd582
WD
509 do_progress = 0;
510
fc068916
WD
511- if (append_mode > 0 || whole_file < 0)
512+ if (append_mode > 0 || detect_renamed || whole_file < 0)
1fffd582
WD
513 whole_file = 0;
514 if (verbose >= 2) {
515 rprintf(FINFO, "delta-transmission %s\n",
cc3e685d 516@@ -2048,7 +2216,7 @@ void generate_files(int f_out, const char *local_name)
9c85142a
WD
517 dirdev = MAKEDEV(DEV_MAJOR(devp), DEV_MINOR(devp));
518 } else
519 dirdev = MAKEDEV(0, 0);
520- delete_in_dir(f_name(fp, fbuf), fp, &dirdev);
521+ delete_in_dir(f_name(fp, fbuf), fp, &dirdev, 0);
522 }
fc068916
WD
523 }
524 }
cc3e685d 525@@ -2091,7 +2259,21 @@ void generate_files(int f_out, const char *local_name)
2dbc45e7 526 } while ((cur_flist = cur_flist->next) != NULL);
fc068916 527
2dbc45e7 528 if (delete_during)
73adde61
WD
529- delete_in_dir(NULL, NULL, &dev_zero);
530+ delete_in_dir(NULL, NULL, &dev_zero, 0);
2dbc45e7
WD
531+ if (detect_renamed) {
532+ if (delete_during < 0)
533+ delete_during = 0;
534+ detect_renamed = 0;
1fffd582 535+
2dbc45e7 536+ for (i = -1; (i = bitbag_next_bit(delayed_bits, i)) >= 0; ) {
9bcaf4de 537+ struct file_struct *file = cur_flist->files[i];
2dbc45e7
WD
538+ if (local_name)
539+ strlcpy(fbuf, local_name, sizeof fbuf);
540+ else
541+ f_name(file, fbuf);
542+ recv_generator(fbuf, file, i, itemizing, code, f_out);
543+ }
544+ }
545 phase++;
546 if (verbose > 2)
547 rprintf(FINFO, "generate_files phase=%d\n", phase);
cc3e685d
WD
548diff --git a/options.c b/options.c
549--- a/options.c
550+++ b/options.c
f2863bc0 551@@ -81,6 +81,7 @@ int am_generator = 0;
a94141d9 552 int am_starting_up = 1;
1fffd582
WD
553 int relative_paths = -1;
554 int implied_dirs = 1;
555+int detect_renamed = 0;
556 int numeric_ids = 0;
557 int allow_8bit_chars = 0;
558 int force_delete = 0;
f2863bc0 559@@ -386,6 +387,7 @@ void usage(enum logcode F)
1fffd582
WD
560 rprintf(F," --modify-window=NUM compare mod-times with reduced accuracy\n");
561 rprintf(F," -T, --temp-dir=DIR create temporary files in directory DIR\n");
562 rprintf(F," -y, --fuzzy find similar file for basis if no dest file\n");
563+ rprintf(F," --detect-renamed try to find renamed files to speed up the transfer\n");
564 rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n");
565 rprintf(F," --copy-dest=DIR ... and include copies of unchanged files\n");
566 rprintf(F," --link-dest=DIR hardlink to files in DIR when unchanged\n");
f2863bc0 567@@ -564,6 +566,7 @@ static struct poptOption long_options[] = {
1fffd582
WD
568 {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
569 {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
570 {"link-dest", 0, POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 },
571+ {"detect-renamed", 0, POPT_ARG_NONE, &detect_renamed, 0, 0, 0 },
572 {"fuzzy", 'y', POPT_ARG_NONE, &fuzzy_basis, 0, 0, 0 },
573 {"compress", 'z', POPT_ARG_NONE, 0, 'z', 0, 0 },
6cbbe66d 574 {"no-compress", 0, POPT_ARG_VAL, &do_compression, 0, 0, 0 },
f2863bc0 575@@ -1541,7 +1544,7 @@ int parse_arguments(int *argc_p, const char ***argv_p, int frommain)
1fffd582
WD
576 inplace = 1;
577 }
578
579- if (delay_updates && !partial_dir)
580+ if ((delay_updates || detect_renamed) && !partial_dir)
581 partial_dir = tmp_partialdir;
582
583 if (inplace) {
f2863bc0 584@@ -1550,6 +1553,7 @@ int parse_arguments(int *argc_p, const char ***argv_p, int frommain)
1fffd582
WD
585 snprintf(err_buf, sizeof err_buf,
586 "--%s cannot be used with --%s\n",
587 append_mode ? "append" : "inplace",
588+ detect_renamed ? "detect-renamed" :
589 delay_updates ? "delay-updates" : "partial-dir");
590 return 0;
591 }
f2863bc0 592@@ -1896,6 +1900,8 @@ void server_options(char **args, int *argc_p)
51bc0e89
WD
593 args[ac++] = "--super";
594 if (size_only)
595 args[ac++] = "--size-only";
596+ if (detect_renamed)
597+ args[ac++] = "--detect-renamed";
598 } else {
599 if (skip_compress) {
600 if (asprintf(&arg, "--skip-compress=%s", skip_compress) < 0)
cc3e685d
WD
601diff --git a/rsync.yo b/rsync.yo
602--- a/rsync.yo
603+++ b/rsync.yo
604@@ -385,6 +385,7 @@ to the detailed description below for a complete description. verb(
1fffd582
WD
605 --modify-window=NUM compare mod-times with reduced accuracy
606 -T, --temp-dir=DIR create temporary files in directory DIR
607 -y, --fuzzy find similar file for basis if no dest file
608+ --detect-renamed try to find renamed files to speed the xfer
609 --compare-dest=DIR also compare received files relative to DIR
610 --copy-dest=DIR ... and include copies of unchanged files
611 --link-dest=DIR hardlink to files in DIR when unchanged
cc3e685d 612@@ -1451,6 +1452,21 @@ Note that the use of the bf(--delete) option might get rid of any potential
1fffd582
WD
613 fuzzy-match files, so either use bf(--delete-after) or specify some
614 filename exclusions if you need to prevent this.
615
6fa0767f
WD
616+dit(bf(--detect-renamed)) With this option, for each new source file
617+(call it em(src/S)), rsync looks for a file em(dest/D) anywhere in the
618+destination that passes the quick check with em(src/S). If such a em(dest/D)
619+is found, rsync uses it as an alternate basis for transferring em(S). The
620+idea is that if em(src/S) was renamed from em(src/D) (as opposed to em(src/S)
621+passing the quick check with em(dest/D) by coincidence), the delta-transfer
622+algorithm will find that all the data matches between em(src/S) and em(dest/D),
623+and the transfer will be really fast.
624+
1fffd582
WD
625+By default, alternate-basis files are hard-linked into a directory named
626+".~tmp~" in each file's destination directory, but if you've specified
627+the bf(--partial-dir) option, that directory will be used instead. These
628+potential alternate-basis files will be removed as the transfer progresses.
629+This option conflicts with bf(--inplace) and bf(--append).
630+
631 dit(bf(--compare-dest=DIR)) This option instructs rsync to use em(DIR) on
632 the destination machine as an additional hierarchy to compare destination
633 files against doing transfers (if the files are missing in the destination
cc3e685d
WD
634diff --git a/util.c b/util.c
635--- a/util.c
636+++ b/util.c
f2863bc0 637@@ -1022,6 +1022,32 @@ int handle_partial_dir(const char *fname, int create)
1fffd582
WD
638 return 1;
639 }
640
641+/* We need to supply our own strcmp function for file list comparisons
642+ * to ensure that signed/unsigned usage is consistent between machines. */
643+int u_strcmp(const char *p1, const char *p2)
644+{
645+ for ( ; *p1; p1++, p2++) {
646+ if (*p1 != *p2)
647+ break;
648+ }
649+
650+ return (int)*(uchar*)p1 - (int)*(uchar*)p2;
651+}
652+
653+/* We need a memcmp function compares unsigned-byte values. */
654+int u_memcmp(const void *p1, const void *p2, size_t len)
655+{
656+ const uchar *u1 = p1;
657+ const uchar *u2 = p2;
658+
659+ while (len--) {
660+ if (*u1 != *u2)
661+ return (int)*u1 - (int)*u2;
662+ }
663+
664+ return 0;
665+}
666+
667 /**
668 * Determine if a symlink points outside the current directory tree.
669 * This is considered "unsafe" because e.g. when mirroring somebody