Use "use warnings" rather than -w on the #! line.
[rsync/rsync-patches.git] / detect-renamed.diff
CommitLineData
1fffd582
WD
1This patch adds the --detect-renamed option which makes rsync notice files
2that either (1) match in size & modify-time (plus the basename, if possible)
3or (2) match in size & checksum (when --checksum was also specified) and use
4each match as an alternate basis file to speed up the transfer.
5
6The algorithm attempts to scan the receiving-side's files in an efficient
7manner. If --delete[-before] is enabled, we'll take advantage of the
8pre-transfer delete pass to prepare any alternate-basis-file matches we
9might find. If --delete-before is not enabled, rsync does the rename scan
10during the regular file-sending scan (scanning each directory right before
11the generator starts updating files from that dir). In this latter mode,
12rsync might delay the updating of a file (if no alternate-basis match was
13yet found) until the full scan of the receiving side is complete, at which
14point any delayed files are processed.
15
16I chose to hard-link the alternate-basis files into a ".~tmp~" subdir that
17takes advantage of rsync's pre-existing partial-dir logic. This uses less
18memory than trying to keep track of the matches internally, and also allows
19any deletions or file-updates to occur normally without interfering with
20these alternate-basis discoveries.
21
03019e41 22To use this patch, run these commands for a successful build:
1fffd582 23
03019e41
WD
24 patch -p1 <patches/detect-renamed.diff
25 ./configure (optional if already run)
1fffd582
WD
26 make
27
28TODO:
29
30 We need to never return a match from fattr_find() that has a basis
31 file. This will ensure that we don't try to give a renamed file to
32 a file that can't use it, while missing out on giving it to a file
33 that could use it.
34
cc3e685d
WD
35diff --git a/compat.c b/compat.c
36--- a/compat.c
37+++ b/compat.c
cdcd2137
WD
38@@ -41,6 +41,7 @@ extern int checksum_seed;
39 extern int basis_dir_cnt;
40 extern int prune_empty_dirs;
41 extern int protocol_version;
9bcaf4de 42+extern int detect_renamed;
cdcd2137
WD
43 extern int protect_args;
44 extern int preserve_uid;
45 extern int preserve_gid;
ae306a29 46@@ -120,6 +121,7 @@ void set_allow_inc_recurse(void)
9aab301c
WD
47 allow_inc_recurse = 0;
48 else if (!am_sender
49 && (delete_before || delete_after
50+ || detect_renamed
51 || delay_updates || prune_empty_dirs))
51bc0e89
WD
52 allow_inc_recurse = 0;
53 else if (am_server && !local_server
cc3e685d
WD
54diff --git a/flist.c b/flist.c
55--- a/flist.c
56+++ b/flist.c
f9df736a 57@@ -63,6 +63,7 @@ extern int non_perishable_cnt;
1fffd582
WD
58 extern int prune_empty_dirs;
59 extern int copy_links;
60 extern int copy_unsafe_links;
61+extern int detect_renamed;
62 extern int protocol_version;
63 extern int sanitize_paths;
cc3e685d 64 extern int munge_symlinks;
ae306a29 65@@ -121,6 +122,8 @@ static int64 tmp_dev, tmp_ino;
7b80cd0e 66 #endif
87a38eea 67 static char tmp_sum[MAX_DIGEST_LEN];
1fffd582
WD
68
69+struct file_list the_fattr_list;
70+
87a38eea 71 static char empty_sum[MAX_DIGEST_LEN];
a47d1f86 72 static int flist_count_offset; /* for --delete --progress */
6cbbe66d 73 static int dir_count = 0;
ae306a29 74@@ -288,6 +291,45 @@ static int is_excluded(const char *fname, int is_dir, int filter_level)
73adde61 75 return 0;
1fffd582
WD
76 }
77
78+static int fattr_compare(struct file_struct **file1, struct file_struct **file2)
79+{
80+ struct file_struct *f1 = *file1;
81+ struct file_struct *f2 = *file2;
a47d1f86 82+ int64 len1 = F_LENGTH(f1), len2 = F_LENGTH(f2);
1fffd582
WD
83+ int diff;
84+
a47d1f86
WD
85+ if (!f1->basename || !S_ISREG(f1->mode) || !len1) {
86+ if (!f2->basename || !S_ISREG(f2->mode) || !len2)
1fffd582
WD
87+ return 0;
88+ return 1;
89+ }
a47d1f86 90+ if (!f2->basename || !S_ISREG(f2->mode) || !len2)
1fffd582
WD
91+ return -1;
92+
93+ /* Don't use diff for values that are longer than an int. */
a47d1f86
WD
94+ if (len1 != len2)
95+ return len1 < len2 ? -1 : 1;
1fffd582
WD
96+
97+ if (always_checksum) {
70891d26 98+ diff = u_memcmp(F_SUM(f1), F_SUM(f2), checksum_len);
1fffd582
WD
99+ if (diff)
100+ return diff;
101+ } else if (f1->modtime != f2->modtime)
102+ return f1->modtime < f2->modtime ? -1 : 1;
103+
104+ diff = u_strcmp(f1->basename, f2->basename);
105+ if (diff)
106+ return diff;
107+
108+ if (f1->dirname == f2->dirname)
109+ return 0;
110+ if (!f1->dirname)
111+ return -1;
112+ if (!f2->dirname)
113+ return 1;
114+ return u_strcmp(f1->dirname, f2->dirname);
115+}
116+
73adde61 117 static void send_directory(int f, struct file_list *flist,
fc068916 118 char *fbuf, int len, int flags);
1fffd582 119
bf1bd9d4 120@@ -2344,6 +2386,25 @@ struct file_list *recv_file_list(int f)
6fa0767f 121
f2863bc0 122 flist_sort_and_clean(flist, relative_paths);
1fffd582
WD
123
124+ if (detect_renamed) {
7bfcb297
WD
125+ int j = flist->used;
126+ the_fattr_list.used = j;
1fffd582
WD
127+ the_fattr_list.files = new_array(struct file_struct *, j);
128+ if (!the_fattr_list.files)
70891d26 129+ out_of_memory("recv_file_list");
1fffd582
WD
130+ memcpy(the_fattr_list.files, flist->files,
131+ j * sizeof (struct file_struct *));
132+ qsort(the_fattr_list.files, j,
fc068916 133+ sizeof the_fattr_list.files[0], (int (*)())fattr_compare);
1fffd582
WD
134+ the_fattr_list.low = 0;
135+ while (j-- > 0) {
136+ struct file_struct *fp = the_fattr_list.files[j];
a47d1f86 137+ if (fp->basename && S_ISREG(fp->mode) && F_LENGTH(fp))
1fffd582
WD
138+ break;
139+ }
140+ the_fattr_list.high = j;
141+ }
142+
6fa0767f
WD
143 if (protocol_version < 30) {
144 /* Recv the io_error flag */
145 if (ignore_errors)
cc3e685d
WD
146diff --git a/generator.c b/generator.c
147--- a/generator.c
148+++ b/generator.c
c0c7984e 149@@ -80,6 +80,7 @@ extern char *basis_dir[];
1fffd582
WD
150 extern int compare_dest;
151 extern int copy_dest;
152 extern int link_dest;
153+extern int detect_renamed;
154 extern int whole_file;
155 extern int list_only;
ccdb48f6 156 extern int read_batch;
4c107044 157@@ -98,6 +99,7 @@ extern char *backup_suffix;
1fffd582 158 extern int backup_suffix_len;
fc068916 159 extern struct file_list *cur_flist, *first_flist, *dir_flist;
c0c7984e 160 extern struct filter_list_struct daemon_filter_list;
fc068916 161+extern struct file_list the_fattr_list;
d4dd2dd5
WD
162
163 int ignore_perishable = 0;
164 int non_perishable_cnt = 0;
4c107044 165@@ -105,6 +107,7 @@ int maybe_ATTRS_REPORT = 0;
d16b5fd6 166
fc068916 167 static dev_t dev_zero;
1fffd582
WD
168 static int deletion_count = 0; /* used to implement --max-delete */
169+static int unexplored_dirs = 1;
1071853f
WD
170 static int deldelay_size = 0, deldelay_cnt = 0;
171 static char *deldelay_buf = NULL;
172 static int deldelay_fd = -1;
4c107044 173@@ -115,7 +118,7 @@ static int need_retouch_dir_times;
9c85142a 174 static int need_retouch_dir_perms;
2dbc45e7 175 static const char *solo_file = NULL;
1fffd582 176
d16b5fd6
WD
177-/* For calling delete_item() and delete_dir_contents(). */
178+/* For calling delete_item(), delete_dir_contents(), and delete_in_dir(). */
f9df736a 179 #define DEL_NO_UID_WRITE (1<<0) /* file/dir has our uid w/o write perm */
a5e6228a 180 #define DEL_RECURSE (1<<1) /* if dir, delete all contents */
d16b5fd6 181 #define DEL_DIR_IS_EMPTY (1<<2) /* internal delete_FUNCTIONS use only */
4c107044 182@@ -124,6 +127,7 @@ static const char *solo_file = NULL;
a5e6228a
WD
183 #define DEL_FOR_SYMLINK (1<<5) /* making room for a replacement symlink */
184 #define DEL_FOR_DEVICE (1<<6) /* making room for a replacement device */
185 #define DEL_FOR_SPECIAL (1<<7) /* making room for a replacement special */
186+#define DEL_NO_DELETIONS (1<<9) /* just check for renames w/o deleting */
1fffd582 187
a5e6228a
WD
188 #define DEL_MAKE_ROOM (DEL_FOR_FILE|DEL_FOR_DIR|DEL_FOR_SYMLINK|DEL_FOR_DEVICE|DEL_FOR_SPECIAL)
189
4c107044 190@@ -148,11 +152,121 @@ static int is_backup_file(char *fn)
1fffd582
WD
191 return k > 0 && strcmp(fn+k, backup_suffix) == 0;
192 }
193
194+/* Search for a regular file that matches either (1) the size & modified
195+ * time (plus the basename, if possible) or (2) the size & checksum. If
196+ * we find an exact match down to the dirname, return -1 because we found
197+ * an up-to-date file in the transfer, not a renamed file. */
a47d1f86 198+static int fattr_find(struct file_struct *f, char *fname)
1fffd582
WD
199+{
200+ int low = the_fattr_list.low, high = the_fattr_list.high;
201+ int mid, ok_match = -1, good_match = -1;
202+ struct file_struct *fmid;
203+ int diff;
204+
205+ while (low <= high) {
206+ mid = (low + high) / 2;
207+ fmid = the_fattr_list.files[mid];
a47d1f86
WD
208+ if (F_LENGTH(fmid) != F_LENGTH(f)) {
209+ if (F_LENGTH(fmid) < F_LENGTH(f))
1fffd582
WD
210+ low = mid + 1;
211+ else
212+ high = mid - 1;
213+ continue;
214+ }
215+ if (always_checksum) {
9bcaf4de 216+ /* We use the FLAG_FILE_SENT flag to indicate when we
a47d1f86 217+ * have computed the checksum for an entry. */
9bcaf4de 218+ if (!(f->flags & FLAG_FILE_SENT)) {
1fffd582
WD
219+ if (fmid->modtime == f->modtime
220+ && f_name_cmp(fmid, f) == 0)
221+ return -1; /* assume we can't help */
d7d6347c 222+ file_checksum(fname, F_SUM(f), F_LENGTH(f));
9bcaf4de 223+ f->flags |= FLAG_FILE_SENT;
1fffd582 224+ }
70891d26 225+ diff = u_memcmp(F_SUM(fmid), F_SUM(f), checksum_len);
1fffd582
WD
226+ if (diff) {
227+ if (diff < 0)
228+ low = mid + 1;
229+ else
230+ high = mid - 1;
231+ continue;
232+ }
233+ } else {
234+ if (fmid->modtime != f->modtime) {
235+ if (fmid->modtime < f->modtime)
236+ low = mid + 1;
237+ else
238+ high = mid - 1;
239+ continue;
240+ }
241+ }
242+ ok_match = mid;
243+ diff = u_strcmp(fmid->basename, f->basename);
244+ if (diff == 0) {
245+ good_match = mid;
246+ if (fmid->dirname == f->dirname)
247+ return -1; /* file is up-to-date */
248+ if (!fmid->dirname) {
249+ low = mid + 1;
250+ continue;
251+ }
252+ if (!f->dirname) {
253+ high = mid - 1;
254+ continue;
255+ }
256+ diff = u_strcmp(fmid->dirname, f->dirname);
257+ if (diff == 0)
258+ return -1; /* file is up-to-date */
259+ }
260+ if (diff < 0)
261+ low = mid + 1;
262+ else
263+ high = mid - 1;
264+ }
265+
266+ return good_match >= 0 ? good_match : ok_match;
267+}
268+
a47d1f86 269+static void look_for_rename(struct file_struct *file, char *fname)
1fffd582
WD
270+{
271+ struct file_struct *fp;
272+ char *partialptr, *fn;
273+ STRUCT_STAT st;
274+ int ndx;
275+
6fa0767f 276+ if (!partial_dir || (ndx = fattr_find(file, fname)) < 0)
1fffd582
WD
277+ return;
278+
279+ fp = the_fattr_list.files[ndx];
280+ fn = f_name(fp, NULL);
281+ /* We don't provide an alternate-basis file if there is a basis file. */
282+ if (link_stat(fn, &st, 0) == 0)
283+ return;
1fffd582 284+
6fa0767f
WD
285+ if (!dry_run) {
286+ if ((partialptr = partial_dir_fname(fn)) == NULL
287+ || !handle_partial_dir(partialptr, PDIR_CREATE))
288+ return;
289+ /* We only use the file if we can hard-link it into our tmp dir. */
290+ if (link(fname, partialptr) != 0) {
291+ if (errno != EEXIST)
292+ handle_partial_dir(partialptr, PDIR_DELETE);
293+ return;
1fffd582 294+ }
1fffd582
WD
295+ }
296+
6fa0767f
WD
297+ /* I think this falls into the -vv category with "%s is uptodate", etc. */
298+ if (verbose > 1)
299+ rprintf(FINFO, "found renamed: %s => %s\n", fname, fn);
1fffd582 300+}
87d0091c
WD
301+
302 /* Delete a file or directory. If DEL_RECURSE is set in the flags, this will
303 * delete recursively.
1fffd582 304 *
f813befd 305 * Note that fbuf must point to a MAXPATHLEN buffer if the mode indicates it's
15894839
WD
306 * a directory! (The buffer is used for recursion, but returned unchanged.)
307+ *
308+ * Also note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
309 */
f9df736a 310 static enum delret delete_item(char *fbuf, uint16 mode, uint16 flags)
15894839 311 {
4c107044 312@@ -186,6 +300,8 @@ static enum delret delete_item(char *fbuf, uint16 mode, uint16 flags)
15894839
WD
313 goto check_ret;
314 /* OK: try to delete the directory. */
315 }
316+ if (flags & DEL_NO_DELETIONS)
317+ return DR_SUCCESS;
318
a5e6228a 319 if (!(flags & DEL_MAKE_ROOM) && max_delete >= 0 && ++deletion_count > max_delete)
15894839 320 return DR_AT_LIMIT;
4c107044 321@@ -241,6 +357,8 @@ static enum delret delete_item(char *fbuf, uint16 mode, uint16 flags)
d16b5fd6
WD
322 * its contents, otherwise just checks for content. Returns DR_SUCCESS or
323 * DR_NOT_EMPTY. Note that fname must point to a MAXPATHLEN buffer! (The
324 * buffer is used for recursion, but returned unchanged.)
1fffd582 325+ *
87d0091c 326+ * Note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
1fffd582 327 */
f9df736a 328 static enum delret delete_dir_contents(char *fname, uint16 flags)
1fffd582 329 {
4c107044 330@@ -260,7 +378,9 @@ static enum delret delete_dir_contents(char *fname, uint16 flags)
a47d1f86
WD
331 save_filters = push_local_filters(fname, dlen);
332
333 non_perishable_cnt = 0;
7e27b6c0 334+ file_extra_cnt += SUM_EXTRA_CNT;
a47d1f86 335 dirlist = get_dirlist(fname, dlen, 0);
7e27b6c0 336+ file_extra_cnt -= SUM_EXTRA_CNT;
a47d1f86
WD
337 ret = non_perishable_cnt ? DR_NOT_EMPTY : DR_SUCCESS;
338
9c85142a 339 if (!dirlist->used)
4c107044 340@@ -300,7 +420,8 @@ static enum delret delete_dir_contents(char *fname, uint16 flags)
f9df736a 341 if (S_ISDIR(fp->mode)) {
a5e6228a
WD
342 if (delete_dir_contents(fname, flags | DEL_RECURSE) != DR_SUCCESS)
343 ret = DR_NOT_EMPTY;
344- }
345+ } else if (detect_renamed && S_ISREG(fp->mode))
a47d1f86 346+ look_for_rename(fp, fname);
a5e6228a 347 if (delete_item(fname, fp->mode, flags) != DR_SUCCESS)
d16b5fd6
WD
348 ret = DR_NOT_EMPTY;
349 }
4c107044 350@@ -465,13 +586,18 @@ static void do_delayed_deletions(char *delbuf)
1fffd582
WD
351 * all the --delete-WHEN options. Note that the fbuf pointer must point to a
352 * MAXPATHLEN buffer with the name of the directory in it (the functions we
353 * call will append names onto the end, but the old dir value will be restored
354- * on exit). */
73adde61 355-static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
1fffd582
WD
356+ * on exit).
357+ *
358+ * Note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
359+ */
73adde61 360+static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev,
f9df736a 361+ int del_flags)
1fffd582 362 {
1fffd582
WD
363 static int already_warned = 0;
364 struct file_list *dirlist;
365- char delbuf[MAXPATHLEN];
6fa0767f 366- int dlen, i;
1fffd582
WD
367+ char *p, delbuf[MAXPATHLEN];
368+ unsigned remainder;
6fa0767f 369+ int dlen, i, restore_dot = 0;
f9df736a 370 int save_uid_ndx = uid_ndx;
1fffd582 371
73adde61 372 if (!fbuf) {
4c107044 373@@ -486,17 +612,22 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
1fffd582
WD
374 maybe_send_keepalive();
375
041d67b8 376 if (io_error && !ignore_errors) {
1fffd582
WD
377- if (already_warned)
378+ if (!already_warned) {
379+ rprintf(FINFO,
380+ "IO error encountered -- skipping file deletion\n");
381+ already_warned = 1;
382+ }
383+ if (!detect_renamed)
384 return;
385- rprintf(FINFO,
386- "IO error encountered -- skipping file deletion\n");
387- already_warned = 1;
388- return;
f9df736a 389+ del_flags |= DEL_NO_DELETIONS;
1fffd582
WD
390 }
391
1fffd582 392 dlen = strlen(fbuf);
fc068916 393 change_local_filter_dir(fbuf, dlen, F_DEPTH(file));
1fffd582
WD
394
395+ if (detect_renamed)
396+ unexplored_dirs--;
397+
398 if (one_file_system) {
399 if (file->flags & FLAG_TOP_DIR)
fc068916 400 filesystem_dev = *fs_dev;
4c107044 401@@ -509,6 +640,14 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
1fffd582
WD
402
403 dirlist = get_dirlist(fbuf, dlen, 0);
404
405+ p = fbuf + dlen;
6fa0767f
WD
406+ if (dlen == 1 && *fbuf == '.') {
407+ restore_dot = 1;
408+ p = fbuf;
409+ } else if (dlen != 1 || *fbuf != '/')
1fffd582
WD
410+ *p++ = '/';
411+ remainder = MAXPATHLEN - (p - fbuf);
412+
413 /* If an item in dirlist is not found in flist, delete it
414 * from the filesystem. */
9c85142a 415 for (i = dirlist->used; i--; ) {
abd3adb8 416@@ -521,6 +660,10 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
87d0091c 417 f_name(fp, NULL));
1fffd582 418 continue;
87d0091c 419 }
1fffd582
WD
420+ if (detect_renamed && S_ISREG(fp->mode)) {
421+ strlcpy(p, fp->basename, remainder);
a47d1f86 422+ look_for_rename(fp, fbuf);
1fffd582 423+ }
abd3adb8
WD
424 /* Here we want to match regardless of file type. Replacement
425 * of a file with one of another type is handled separately by
426 * a delete_item call with a DEL_MAKE_ROOM flag. */
427@@ -529,14 +672,19 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
f9df736a
WD
428 if (!(fp->mode & S_IWUSR) && !am_root && (uid_t)F_OWNER(fp) == our_uid)
429 flags |= DEL_NO_UID_WRITE;
1fffd582 430 f_name(fp, delbuf);
1071853f 431- if (delete_during == 2) {
a5e6228a 432- if (!remember_delete(fp, delbuf, flags))
f9df736a
WD
433+ if (delete_during == 2 && !(del_flags & DEL_NO_DELETIONS)) {
434+ if (!remember_delete(fp, delbuf, del_flags | flags))
1071853f
WD
435 break;
436 } else
a5e6228a 437- delete_item(delbuf, fp->mode, flags);
1fffd582 438- }
f9df736a 439+ delete_item(delbuf, fp->mode, del_flags | flags);
1fffd582
WD
440+ } else if (detect_renamed && S_ISDIR(fp->mode))
441+ unexplored_dirs++;
442 }
443
6fa0767f
WD
444+ if (restore_dot)
445+ fbuf[0] = '.';
1fffd582
WD
446+ fbuf[dlen] = '\0';
447+
448 flist_free(dirlist);
1fffd582 449
f9df736a 450 if (!save_uid_ndx) {
abd3adb8 451@@ -574,9 +722,9 @@ static void do_delete_pass(void)
1fffd582
WD
452 || !S_ISDIR(st.st_mode))
453 continue;
454
73adde61
WD
455- delete_in_dir(fbuf, file, &st.st_dev);
456+ delete_in_dir(fbuf, file, &st.st_dev, 0);
1fffd582 457 }
73adde61
WD
458- delete_in_dir(NULL, NULL, &dev_zero);
459+ delete_in_dir(NULL, NULL, &dev_zero, 0);
1fffd582
WD
460
461 if (do_progress && !am_server)
462 rprintf(FINFO, " \r");
abd3adb8 463@@ -1229,6 +1377,7 @@ static void list_file_entry(struct file_struct *f)
cdcd2137 464 }
1fffd582
WD
465 }
466
467+static struct bitbag *delayed_bits = NULL;
468 static int phase = 0;
ffc18846 469 static int dflt_perms;
1fffd582 470
abd3adb8 471@@ -1505,9 +1654,12 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
fc068916 472 }
c0c7984e 473 else if (delete_during && f_out != -1 && !phase
65ecbe35
WD
474 && !(file->flags & FLAG_MISSING_DIR)) {
475- if (file->flags & FLAG_CONTENT_DIR)
476- delete_in_dir(fname, file, &real_sx.st.st_dev);
477- else
478+ if (file->flags & FLAG_CONTENT_DIR) {
479+ if (detect_renamed && real_ret != 0)
480+ unexplored_dirs++;
481+ delete_in_dir(fname, file, &real_sx.st.st_dev,
482+ delete_during < 0 ? DEL_NO_DELETIONS : 0);
483+ } else
484 change_local_filter_dir(fname, strlen(fname), F_DEPTH(file));
65ecbe35 485 }
cbdf862c 486 goto cleanup;
abd3adb8 487@@ -1785,8 +1937,14 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
ffc18846 488 goto cleanup;
ccdb48f6 489 }
81172142 490 #endif
1fffd582
WD
491- if (stat_errno == ENOENT)
492+ if (stat_errno == ENOENT) {
493+ if (detect_renamed && unexplored_dirs > 0
a47d1f86 494+ && F_LENGTH(file)) {
1fffd582
WD
495+ bitbag_set_bit(delayed_bits, ndx);
496+ return;
497+ }
498 goto notify_others;
499+ }
cc3e685d 500 rsyserr(FERROR_XFER, stat_errno, "recv_generator: failed to stat %s",
1fffd582 501 full_fname(fname));
ffc18846 502 goto cleanup;
abd3adb8 503@@ -2187,6 +2345,12 @@ void generate_files(int f_out, const char *local_name)
fc068916
WD
504 if (verbose > 2)
505 rprintf(FINFO, "generator starting pid=%ld\n", (long)getpid());
1fffd582
WD
506
507+ if (detect_renamed) {
7bfcb297 508+ delayed_bits = bitbag_create(cur_flist->used);
1fffd582
WD
509+ if (!delete_before && !delete_during)
510+ delete_during = -1;
511+ }
512+
9c85142a 513 if (delete_before && !solo_file && cur_flist->used > 0)
73adde61 514 do_delete_pass();
1071853f 515 if (delete_during == 2) {
abd3adb8 516@@ -2197,7 +2361,7 @@ void generate_files(int f_out, const char *local_name)
1071853f 517 }
1fffd582
WD
518 do_progress = 0;
519
fc068916
WD
520- if (append_mode > 0 || whole_file < 0)
521+ if (append_mode > 0 || detect_renamed || whole_file < 0)
1fffd582
WD
522 whole_file = 0;
523 if (verbose >= 2) {
524 rprintf(FINFO, "delta-transmission %s\n",
abd3adb8 525@@ -2239,7 +2403,7 @@ void generate_files(int f_out, const char *local_name)
4c107044
WD
526 dirdev = MAKEDEV(DEV_MAJOR(devp), DEV_MINOR(devp));
527 } else
528 dirdev = MAKEDEV(0, 0);
65ecbe35
WD
529- delete_in_dir(fbuf, fp, &dirdev);
530+ delete_in_dir(fbuf, fp, &dirdev, 0);
531 } else
532 change_local_filter_dir(fbuf, strlen(fbuf), F_DEPTH(fp));
fc068916 533 }
abd3adb8 534@@ -2289,7 +2453,21 @@ void generate_files(int f_out, const char *local_name)
963ca808 535 write_ndx(f_out, NDX_DONE);
fc068916 536
2dbc45e7 537 if (delete_during)
73adde61
WD
538- delete_in_dir(NULL, NULL, &dev_zero);
539+ delete_in_dir(NULL, NULL, &dev_zero, 0);
2dbc45e7
WD
540+ if (detect_renamed) {
541+ if (delete_during < 0)
542+ delete_during = 0;
543+ detect_renamed = 0;
1fffd582 544+
2dbc45e7 545+ for (i = -1; (i = bitbag_next_bit(delayed_bits, i)) >= 0; ) {
9bcaf4de 546+ struct file_struct *file = cur_flist->files[i];
2dbc45e7
WD
547+ if (local_name)
548+ strlcpy(fbuf, local_name, sizeof fbuf);
549+ else
550+ f_name(file, fbuf);
551+ recv_generator(fbuf, file, i, itemizing, code, f_out);
552+ }
553+ }
554 phase++;
555 if (verbose > 2)
556 rprintf(FINFO, "generate_files phase=%d\n", phase);
cc3e685d
WD
557diff --git a/options.c b/options.c
558--- a/options.c
559+++ b/options.c
c0c7984e 560@@ -82,6 +82,7 @@ int am_generator = 0;
a94141d9 561 int am_starting_up = 1;
1fffd582
WD
562 int relative_paths = -1;
563 int implied_dirs = 1;
564+int detect_renamed = 0;
565 int numeric_ids = 0;
566 int allow_8bit_chars = 0;
567 int force_delete = 0;
abd3adb8 568@@ -392,6 +393,7 @@ void usage(enum logcode F)
1fffd582
WD
569 rprintf(F," --modify-window=NUM compare mod-times with reduced accuracy\n");
570 rprintf(F," -T, --temp-dir=DIR create temporary files in directory DIR\n");
571 rprintf(F," -y, --fuzzy find similar file for basis if no dest file\n");
572+ rprintf(F," --detect-renamed try to find renamed files to speed up the transfer\n");
573 rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n");
574 rprintf(F," --copy-dest=DIR ... and include copies of unchanged files\n");
575 rprintf(F," --link-dest=DIR hardlink to files in DIR when unchanged\n");
abd3adb8 576@@ -579,6 +581,7 @@ static struct poptOption long_options[] = {
1fffd582
WD
577 {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
578 {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
579 {"link-dest", 0, POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 },
580+ {"detect-renamed", 0, POPT_ARG_NONE, &detect_renamed, 0, 0, 0 },
c0c7984e
WD
581 {"fuzzy", 'y', POPT_ARG_VAL, &fuzzy_basis, 1, 0, 0 },
582 {"no-fuzzy", 0, POPT_ARG_VAL, &fuzzy_basis, 0, 0, 0 },
583 {"no-y", 0, POPT_ARG_VAL, &fuzzy_basis, 0, 0, 0 },
abd3adb8 584@@ -1591,7 +1594,7 @@ int parse_arguments(int *argc_p, const char ***argv_p)
1fffd582
WD
585 inplace = 1;
586 }
587
588- if (delay_updates && !partial_dir)
589+ if ((delay_updates || detect_renamed) && !partial_dir)
590 partial_dir = tmp_partialdir;
591
592 if (inplace) {
abd3adb8 593@@ -1600,6 +1603,7 @@ int parse_arguments(int *argc_p, const char ***argv_p)
1fffd582
WD
594 snprintf(err_buf, sizeof err_buf,
595 "--%s cannot be used with --%s\n",
596 append_mode ? "append" : "inplace",
597+ detect_renamed ? "detect-renamed" :
598 delay_updates ? "delay-updates" : "partial-dir");
599 return 0;
600 }
abd3adb8 601@@ -1961,6 +1965,8 @@ void server_options(char **args, int *argc_p)
51bc0e89
WD
602 args[ac++] = "--super";
603 if (size_only)
604 args[ac++] = "--size-only";
605+ if (detect_renamed)
606+ args[ac++] = "--detect-renamed";
607 } else {
608 if (skip_compress) {
609 if (asprintf(&arg, "--skip-compress=%s", skip_compress) < 0)
cc3e685d
WD
610diff --git a/rsync.yo b/rsync.yo
611--- a/rsync.yo
612+++ b/rsync.yo
abd3adb8 613@@ -389,6 +389,7 @@ to the detailed description below for a complete description. verb(
1fffd582
WD
614 --modify-window=NUM compare mod-times with reduced accuracy
615 -T, --temp-dir=DIR create temporary files in directory DIR
616 -y, --fuzzy find similar file for basis if no dest file
617+ --detect-renamed try to find renamed files to speed the xfer
618 --compare-dest=DIR also compare received files relative to DIR
619 --copy-dest=DIR ... and include copies of unchanged files
620 --link-dest=DIR hardlink to files in DIR when unchanged
abd3adb8 621@@ -1491,6 +1492,21 @@ Note that the use of the bf(--delete) option might get rid of any potential
1fffd582
WD
622 fuzzy-match files, so either use bf(--delete-after) or specify some
623 filename exclusions if you need to prevent this.
624
6fa0767f
WD
625+dit(bf(--detect-renamed)) With this option, for each new source file
626+(call it em(src/S)), rsync looks for a file em(dest/D) anywhere in the
627+destination that passes the quick check with em(src/S). If such a em(dest/D)
628+is found, rsync uses it as an alternate basis for transferring em(S). The
629+idea is that if em(src/S) was renamed from em(src/D) (as opposed to em(src/S)
630+passing the quick check with em(dest/D) by coincidence), the delta-transfer
631+algorithm will find that all the data matches between em(src/S) and em(dest/D),
632+and the transfer will be really fast.
633+
1fffd582
WD
634+By default, alternate-basis files are hard-linked into a directory named
635+".~tmp~" in each file's destination directory, but if you've specified
636+the bf(--partial-dir) option, that directory will be used instead. These
637+potential alternate-basis files will be removed as the transfer progresses.
638+This option conflicts with bf(--inplace) and bf(--append).
639+
640 dit(bf(--compare-dest=DIR)) This option instructs rsync to use em(DIR) on
641 the destination machine as an additional hierarchy to compare destination
642 files against doing transfers (if the files are missing in the destination
cc3e685d
WD
643diff --git a/util.c b/util.c
644--- a/util.c
645+++ b/util.c
abd3adb8 646@@ -1168,6 +1168,32 @@ int handle_partial_dir(const char *fname, int create)
1fffd582
WD
647 return 1;
648 }
649
650+/* We need to supply our own strcmp function for file list comparisons
651+ * to ensure that signed/unsigned usage is consistent between machines. */
652+int u_strcmp(const char *p1, const char *p2)
653+{
654+ for ( ; *p1; p1++, p2++) {
655+ if (*p1 != *p2)
656+ break;
657+ }
658+
659+ return (int)*(uchar*)p1 - (int)*(uchar*)p2;
660+}
661+
662+/* We need a memcmp function compares unsigned-byte values. */
663+int u_memcmp(const void *p1, const void *p2, size_t len)
664+{
665+ const uchar *u1 = p1;
666+ const uchar *u2 = p2;
667+
668+ while (len--) {
669+ if (*u1 != *u2)
670+ return (int)*u1 - (int)*u2;
671+ }
672+
673+ return 0;
674+}
675+
676 /**
677 * Determine if a symlink points outside the current directory tree.
678 * This is considered "unsafe" because e.g. when mirroring somebody