The patches for 3.0.0pre9.
[rsync/rsync-patches.git] / detect-renamed.diff
CommitLineData
1fffd582
WD
1This patch adds the --detect-renamed option which makes rsync notice files
2that either (1) match in size & modify-time (plus the basename, if possible)
3or (2) match in size & checksum (when --checksum was also specified) and use
4each match as an alternate basis file to speed up the transfer.
5
6The algorithm attempts to scan the receiving-side's files in an efficient
7manner. If --delete[-before] is enabled, we'll take advantage of the
8pre-transfer delete pass to prepare any alternate-basis-file matches we
9might find. If --delete-before is not enabled, rsync does the rename scan
10during the regular file-sending scan (scanning each directory right before
11the generator starts updating files from that dir). In this latter mode,
12rsync might delay the updating of a file (if no alternate-basis match was
13yet found) until the full scan of the receiving side is complete, at which
14point any delayed files are processed.
15
16I chose to hard-link the alternate-basis files into a ".~tmp~" subdir that
17takes advantage of rsync's pre-existing partial-dir logic. This uses less
18memory than trying to keep track of the matches internally, and also allows
19any deletions or file-updates to occur normally without interfering with
20these alternate-basis discoveries.
21
03019e41 22To use this patch, run these commands for a successful build:
1fffd582 23
03019e41
WD
24 patch -p1 <patches/detect-renamed.diff
25 ./configure (optional if already run)
1fffd582
WD
26 make
27
28TODO:
29
30 We need to never return a match from fattr_find() that has a basis
31 file. This will ensure that we don't try to give a renamed file to
32 a file that can't use it, while missing out on giving it to a file
33 that could use it.
34
cc3e685d
WD
35diff --git a/compat.c b/compat.c
36--- a/compat.c
37+++ b/compat.c
cdcd2137
WD
38@@ -41,6 +41,7 @@ extern int checksum_seed;
39 extern int basis_dir_cnt;
40 extern int prune_empty_dirs;
41 extern int protocol_version;
9bcaf4de 42+extern int detect_renamed;
cdcd2137
WD
43 extern int protect_args;
44 extern int preserve_uid;
45 extern int preserve_gid;
d4dd2dd5 46@@ -107,6 +108,7 @@ void set_allow_inc_recurse(void)
9aab301c
WD
47 allow_inc_recurse = 0;
48 else if (!am_sender
49 && (delete_before || delete_after
50+ || detect_renamed
51 || delay_updates || prune_empty_dirs))
51bc0e89
WD
52 allow_inc_recurse = 0;
53 else if (am_server && !local_server
cc3e685d
WD
54diff --git a/flist.c b/flist.c
55--- a/flist.c
56+++ b/flist.c
ccdb48f6 57@@ -61,6 +61,7 @@ extern int non_perishable_cnt;
1fffd582
WD
58 extern int prune_empty_dirs;
59 extern int copy_links;
60 extern int copy_unsafe_links;
61+extern int detect_renamed;
62 extern int protocol_version;
63 extern int sanitize_paths;
cc3e685d
WD
64 extern int munge_symlinks;
65@@ -118,6 +119,8 @@ static int64 tmp_dev, tmp_ino;
7b80cd0e 66 #endif
87a38eea 67 static char tmp_sum[MAX_DIGEST_LEN];
1fffd582
WD
68
69+struct file_list the_fattr_list;
70+
87a38eea 71 static char empty_sum[MAX_DIGEST_LEN];
a47d1f86 72 static int flist_count_offset; /* for --delete --progress */
6cbbe66d 73 static int dir_count = 0;
f2863bc0 74@@ -261,6 +264,45 @@ static int is_excluded(char *fname, int is_dir, int filter_level)
73adde61 75 return 0;
1fffd582
WD
76 }
77
78+static int fattr_compare(struct file_struct **file1, struct file_struct **file2)
79+{
80+ struct file_struct *f1 = *file1;
81+ struct file_struct *f2 = *file2;
a47d1f86 82+ int64 len1 = F_LENGTH(f1), len2 = F_LENGTH(f2);
1fffd582
WD
83+ int diff;
84+
a47d1f86
WD
85+ if (!f1->basename || !S_ISREG(f1->mode) || !len1) {
86+ if (!f2->basename || !S_ISREG(f2->mode) || !len2)
1fffd582
WD
87+ return 0;
88+ return 1;
89+ }
a47d1f86 90+ if (!f2->basename || !S_ISREG(f2->mode) || !len2)
1fffd582
WD
91+ return -1;
92+
93+ /* Don't use diff for values that are longer than an int. */
a47d1f86
WD
94+ if (len1 != len2)
95+ return len1 < len2 ? -1 : 1;
1fffd582
WD
96+
97+ if (always_checksum) {
70891d26 98+ diff = u_memcmp(F_SUM(f1), F_SUM(f2), checksum_len);
1fffd582
WD
99+ if (diff)
100+ return diff;
101+ } else if (f1->modtime != f2->modtime)
102+ return f1->modtime < f2->modtime ? -1 : 1;
103+
104+ diff = u_strcmp(f1->basename, f2->basename);
105+ if (diff)
106+ return diff;
107+
108+ if (f1->dirname == f2->dirname)
109+ return 0;
110+ if (!f1->dirname)
111+ return -1;
112+ if (!f2->dirname)
113+ return 1;
114+ return u_strcmp(f1->dirname, f2->dirname);
115+}
116+
73adde61 117 static void send_directory(int f, struct file_list *flist,
fc068916 118 char *fbuf, int len, int flags);
1fffd582 119
a5e6228a 120@@ -2178,6 +2220,25 @@ struct file_list *recv_file_list(int f)
6fa0767f 121
f2863bc0 122 flist_sort_and_clean(flist, relative_paths);
1fffd582
WD
123
124+ if (detect_renamed) {
7bfcb297
WD
125+ int j = flist->used;
126+ the_fattr_list.used = j;
1fffd582
WD
127+ the_fattr_list.files = new_array(struct file_struct *, j);
128+ if (!the_fattr_list.files)
70891d26 129+ out_of_memory("recv_file_list");
1fffd582
WD
130+ memcpy(the_fattr_list.files, flist->files,
131+ j * sizeof (struct file_struct *));
132+ qsort(the_fattr_list.files, j,
fc068916 133+ sizeof the_fattr_list.files[0], (int (*)())fattr_compare);
1fffd582
WD
134+ the_fattr_list.low = 0;
135+ while (j-- > 0) {
136+ struct file_struct *fp = the_fattr_list.files[j];
a47d1f86 137+ if (fp->basename && S_ISREG(fp->mode) && F_LENGTH(fp))
1fffd582
WD
138+ break;
139+ }
140+ the_fattr_list.high = j;
141+ }
142+
6fa0767f
WD
143 if (protocol_version < 30) {
144 /* Recv the io_error flag */
145 if (ignore_errors)
cc3e685d
WD
146diff --git a/generator.c b/generator.c
147--- a/generator.c
148+++ b/generator.c
ccdb48f6 149@@ -79,6 +79,7 @@ extern char *basis_dir[];
1fffd582
WD
150 extern int compare_dest;
151 extern int copy_dest;
152 extern int link_dest;
153+extern int detect_renamed;
154 extern int whole_file;
155 extern int list_only;
ccdb48f6 156 extern int read_batch;
a5e6228a 157@@ -97,6 +98,7 @@ extern char *backup_suffix;
1fffd582 158 extern int backup_suffix_len;
fc068916 159 extern struct file_list *cur_flist, *first_flist, *dir_flist;
1fffd582 160 extern struct filter_list_struct server_filter_list;
fc068916 161+extern struct file_list the_fattr_list;
d4dd2dd5
WD
162
163 int ignore_perishable = 0;
164 int non_perishable_cnt = 0;
a5e6228a 165@@ -104,6 +106,7 @@ int maybe_ATTRS_REPORT = 0;
d16b5fd6 166
fc068916 167 static dev_t dev_zero;
1fffd582
WD
168 static int deletion_count = 0; /* used to implement --max-delete */
169+static int unexplored_dirs = 1;
1071853f
WD
170 static int deldelay_size = 0, deldelay_cnt = 0;
171 static char *deldelay_buf = NULL;
172 static int deldelay_fd = -1;
a5e6228a 173@@ -113,7 +116,7 @@ static int need_retouch_dir_times;
9c85142a 174 static int need_retouch_dir_perms;
2dbc45e7 175 static const char *solo_file = NULL;
1fffd582 176
d16b5fd6
WD
177-/* For calling delete_item() and delete_dir_contents(). */
178+/* For calling delete_item(), delete_dir_contents(), and delete_in_dir(). */
a5e6228a
WD
179 #define DEL_OWNED_BY_US (1<<0) /* file/dir has our uid */
180 #define DEL_RECURSE (1<<1) /* if dir, delete all contents */
d16b5fd6 181 #define DEL_DIR_IS_EMPTY (1<<2) /* internal delete_FUNCTIONS use only */
a5e6228a
WD
182@@ -122,6 +125,7 @@ static const char *solo_file = NULL;
183 #define DEL_FOR_SYMLINK (1<<5) /* making room for a replacement symlink */
184 #define DEL_FOR_DEVICE (1<<6) /* making room for a replacement device */
185 #define DEL_FOR_SPECIAL (1<<7) /* making room for a replacement special */
186+#define DEL_NO_DELETIONS (1<<9) /* just check for renames w/o deleting */
1fffd582 187
a5e6228a
WD
188 #define DEL_MAKE_ROOM (DEL_FOR_FILE|DEL_FOR_DIR|DEL_FOR_SYMLINK|DEL_FOR_DEVICE|DEL_FOR_SPECIAL)
189
190@@ -142,11 +146,121 @@ static int is_backup_file(char *fn)
1fffd582
WD
191 return k > 0 && strcmp(fn+k, backup_suffix) == 0;
192 }
193
194+/* Search for a regular file that matches either (1) the size & modified
195+ * time (plus the basename, if possible) or (2) the size & checksum. If
196+ * we find an exact match down to the dirname, return -1 because we found
197+ * an up-to-date file in the transfer, not a renamed file. */
a47d1f86 198+static int fattr_find(struct file_struct *f, char *fname)
1fffd582
WD
199+{
200+ int low = the_fattr_list.low, high = the_fattr_list.high;
201+ int mid, ok_match = -1, good_match = -1;
202+ struct file_struct *fmid;
203+ int diff;
204+
205+ while (low <= high) {
206+ mid = (low + high) / 2;
207+ fmid = the_fattr_list.files[mid];
a47d1f86
WD
208+ if (F_LENGTH(fmid) != F_LENGTH(f)) {
209+ if (F_LENGTH(fmid) < F_LENGTH(f))
1fffd582
WD
210+ low = mid + 1;
211+ else
212+ high = mid - 1;
213+ continue;
214+ }
215+ if (always_checksum) {
9bcaf4de 216+ /* We use the FLAG_FILE_SENT flag to indicate when we
a47d1f86 217+ * have computed the checksum for an entry. */
9bcaf4de 218+ if (!(f->flags & FLAG_FILE_SENT)) {
1fffd582
WD
219+ if (fmid->modtime == f->modtime
220+ && f_name_cmp(fmid, f) == 0)
221+ return -1; /* assume we can't help */
d7d6347c 222+ file_checksum(fname, F_SUM(f), F_LENGTH(f));
9bcaf4de 223+ f->flags |= FLAG_FILE_SENT;
1fffd582 224+ }
70891d26 225+ diff = u_memcmp(F_SUM(fmid), F_SUM(f), checksum_len);
1fffd582
WD
226+ if (diff) {
227+ if (diff < 0)
228+ low = mid + 1;
229+ else
230+ high = mid - 1;
231+ continue;
232+ }
233+ } else {
234+ if (fmid->modtime != f->modtime) {
235+ if (fmid->modtime < f->modtime)
236+ low = mid + 1;
237+ else
238+ high = mid - 1;
239+ continue;
240+ }
241+ }
242+ ok_match = mid;
243+ diff = u_strcmp(fmid->basename, f->basename);
244+ if (diff == 0) {
245+ good_match = mid;
246+ if (fmid->dirname == f->dirname)
247+ return -1; /* file is up-to-date */
248+ if (!fmid->dirname) {
249+ low = mid + 1;
250+ continue;
251+ }
252+ if (!f->dirname) {
253+ high = mid - 1;
254+ continue;
255+ }
256+ diff = u_strcmp(fmid->dirname, f->dirname);
257+ if (diff == 0)
258+ return -1; /* file is up-to-date */
259+ }
260+ if (diff < 0)
261+ low = mid + 1;
262+ else
263+ high = mid - 1;
264+ }
265+
266+ return good_match >= 0 ? good_match : ok_match;
267+}
268+
a47d1f86 269+static void look_for_rename(struct file_struct *file, char *fname)
1fffd582
WD
270+{
271+ struct file_struct *fp;
272+ char *partialptr, *fn;
273+ STRUCT_STAT st;
274+ int ndx;
275+
6fa0767f 276+ if (!partial_dir || (ndx = fattr_find(file, fname)) < 0)
1fffd582
WD
277+ return;
278+
279+ fp = the_fattr_list.files[ndx];
280+ fn = f_name(fp, NULL);
281+ /* We don't provide an alternate-basis file if there is a basis file. */
282+ if (link_stat(fn, &st, 0) == 0)
283+ return;
1fffd582 284+
6fa0767f
WD
285+ if (!dry_run) {
286+ if ((partialptr = partial_dir_fname(fn)) == NULL
287+ || !handle_partial_dir(partialptr, PDIR_CREATE))
288+ return;
289+ /* We only use the file if we can hard-link it into our tmp dir. */
290+ if (link(fname, partialptr) != 0) {
291+ if (errno != EEXIST)
292+ handle_partial_dir(partialptr, PDIR_DELETE);
293+ return;
1fffd582 294+ }
1fffd582
WD
295+ }
296+
6fa0767f
WD
297+ /* I think this falls into the -vv category with "%s is uptodate", etc. */
298+ if (verbose > 1)
299+ rprintf(FINFO, "found renamed: %s => %s\n", fname, fn);
1fffd582 300+}
87d0091c
WD
301+
302 /* Delete a file or directory. If DEL_RECURSE is set in the flags, this will
303 * delete recursively.
1fffd582 304 *
f813befd 305 * Note that fbuf must point to a MAXPATHLEN buffer if the mode indicates it's
15894839
WD
306 * a directory! (The buffer is used for recursion, but returned unchanged.)
307+ *
308+ * Also note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
309 */
a5e6228a 310 static enum delret delete_item(char *fbuf, int mode, int flags)
15894839 311 {
a5e6228a 312@@ -171,6 +285,8 @@ static enum delret delete_item(char *fbuf, int mode, int flags)
15894839
WD
313 goto check_ret;
314 /* OK: try to delete the directory. */
315 }
316+ if (flags & DEL_NO_DELETIONS)
317+ return DR_SUCCESS;
318
a5e6228a 319 if (!(flags & DEL_MAKE_ROOM) && max_delete >= 0 && ++deletion_count > max_delete)
15894839 320 return DR_AT_LIMIT;
a5e6228a 321@@ -226,6 +342,8 @@ static enum delret delete_item(char *fbuf, int mode, int flags)
d16b5fd6
WD
322 * its contents, otherwise just checks for content. Returns DR_SUCCESS or
323 * DR_NOT_EMPTY. Note that fname must point to a MAXPATHLEN buffer! (The
324 * buffer is used for recursion, but returned unchanged.)
1fffd582 325+ *
87d0091c 326+ * Note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
1fffd582 327 */
87d0091c 328 static enum delret delete_dir_contents(char *fname, int flags)
1fffd582 329 {
a5e6228a 330@@ -245,7 +363,9 @@ static enum delret delete_dir_contents(char *fname, int flags)
a47d1f86
WD
331 save_filters = push_local_filters(fname, dlen);
332
333 non_perishable_cnt = 0;
7e27b6c0 334+ file_extra_cnt += SUM_EXTRA_CNT;
a47d1f86 335 dirlist = get_dirlist(fname, dlen, 0);
7e27b6c0 336+ file_extra_cnt -= SUM_EXTRA_CNT;
a47d1f86
WD
337 ret = non_perishable_cnt ? DR_NOT_EMPTY : DR_SUCCESS;
338
9c85142a 339 if (!dirlist->used)
a5e6228a
WD
340@@ -288,7 +408,8 @@ static enum delret delete_dir_contents(char *fname, int flags)
341 do_chmod(fname, fp->mode |= S_IWUSR);
342 if (delete_dir_contents(fname, flags | DEL_RECURSE) != DR_SUCCESS)
343 ret = DR_NOT_EMPTY;
344- }
345+ } else if (detect_renamed && S_ISREG(fp->mode))
a47d1f86 346+ look_for_rename(fp, fname);
a5e6228a 347 if (delete_item(fname, fp->mode, flags) != DR_SUCCESS)
d16b5fd6
WD
348 ret = DR_NOT_EMPTY;
349 }
a5e6228a 350@@ -449,13 +570,18 @@ static void do_delayed_deletions(char *delbuf)
1fffd582
WD
351 * all the --delete-WHEN options. Note that the fbuf pointer must point to a
352 * MAXPATHLEN buffer with the name of the directory in it (the functions we
353 * call will append names onto the end, but the old dir value will be restored
354- * on exit). */
73adde61 355-static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
1fffd582
WD
356+ * on exit).
357+ *
358+ * Note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
359+ */
73adde61
WD
360+static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev,
361+ int flags)
1fffd582 362 {
1fffd582
WD
363 static int already_warned = 0;
364 struct file_list *dirlist;
365- char delbuf[MAXPATHLEN];
6fa0767f 366- int dlen, i;
1fffd582
WD
367+ char *p, delbuf[MAXPATHLEN];
368+ unsigned remainder;
6fa0767f 369+ int dlen, i, restore_dot = 0;
1fffd582 370
73adde61 371 if (!fbuf) {
6fa0767f 372 change_local_filter_dir(NULL, 0, 0);
a5e6228a 373@@ -465,21 +591,28 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
1fffd582
WD
374 if (verbose > 2)
375 rprintf(FINFO, "delete_in_dir(%s)\n", fbuf);
376
87d0091c 377+ flags |= DEL_RECURSE;
1fffd582
WD
378+
379 if (allowed_lull)
380 maybe_send_keepalive();
381
041d67b8 382 if (io_error && !ignore_errors) {
1fffd582
WD
383- if (already_warned)
384+ if (!already_warned) {
385+ rprintf(FINFO,
386+ "IO error encountered -- skipping file deletion\n");
387+ already_warned = 1;
388+ }
389+ if (!detect_renamed)
390 return;
391- rprintf(FINFO,
392- "IO error encountered -- skipping file deletion\n");
393- already_warned = 1;
394- return;
395+ flags |= DEL_NO_DELETIONS;
396 }
397
1fffd582 398 dlen = strlen(fbuf);
fc068916 399 change_local_filter_dir(fbuf, dlen, F_DEPTH(file));
1fffd582
WD
400
401+ if (detect_renamed)
402+ unexplored_dirs--;
403+
404 if (one_file_system) {
405 if (file->flags & FLAG_TOP_DIR)
fc068916 406 filesystem_dev = *fs_dev;
a5e6228a 407@@ -489,6 +622,14 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
1fffd582
WD
408
409 dirlist = get_dirlist(fbuf, dlen, 0);
410
411+ p = fbuf + dlen;
6fa0767f
WD
412+ if (dlen == 1 && *fbuf == '.') {
413+ restore_dot = 1;
414+ p = fbuf;
415+ } else if (dlen != 1 || *fbuf != '/')
1fffd582
WD
416+ *p++ = '/';
417+ remainder = MAXPATHLEN - (p - fbuf);
418+
419 /* If an item in dirlist is not found in flist, delete it
420 * from the filesystem. */
9c85142a 421 for (i = dirlist->used; i--; ) {
a5e6228a 422@@ -501,18 +642,26 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
87d0091c 423 f_name(fp, NULL));
1fffd582 424 continue;
87d0091c 425 }
1fffd582
WD
426+ if (detect_renamed && S_ISREG(fp->mode)) {
427+ strlcpy(p, fp->basename, remainder);
a47d1f86 428+ look_for_rename(fp, fbuf);
1fffd582 429+ }
73adde61 430 if (flist_find(cur_flist, fp) < 0) {
a5e6228a
WD
431- int flags = DEL_RECURSE
432- | (!uid_ndx || (uid_t)F_OWNER(fp) == our_uid ? DEL_OWNED_BY_US : 0);
433+ int own_flag = (!uid_ndx || (uid_t)F_OWNER(fp) == our_uid ? DEL_OWNED_BY_US : 0);
1fffd582 434 f_name(fp, delbuf);
1071853f 435- if (delete_during == 2) {
a5e6228a 436- if (!remember_delete(fp, delbuf, flags))
a47d1f86 437+ if (delete_during == 2 && !(flags & DEL_NO_DELETIONS)) {
a5e6228a 438+ if (!remember_delete(fp, delbuf, own_flag | flags))
1071853f
WD
439 break;
440 } else
a5e6228a 441- delete_item(delbuf, fp->mode, flags);
1fffd582 442- }
a5e6228a 443+ delete_item(delbuf, fp->mode, own_flag | flags);
1fffd582
WD
444+ } else if (detect_renamed && S_ISDIR(fp->mode))
445+ unexplored_dirs++;
446 }
447
6fa0767f
WD
448+ if (restore_dot)
449+ fbuf[0] = '.';
1fffd582
WD
450+ fbuf[dlen] = '\0';
451+
452 flist_free(dirlist);
453 }
454
a5e6228a 455@@ -542,9 +691,9 @@ static void do_delete_pass(void)
1fffd582
WD
456 || !S_ISDIR(st.st_mode))
457 continue;
458
73adde61
WD
459- delete_in_dir(fbuf, file, &st.st_dev);
460+ delete_in_dir(fbuf, file, &st.st_dev, 0);
1fffd582 461 }
73adde61
WD
462- delete_in_dir(NULL, NULL, &dev_zero);
463+ delete_in_dir(NULL, NULL, &dev_zero, 0);
1fffd582
WD
464
465 if (do_progress && !am_server)
466 rprintf(FINFO, " \r");
a5e6228a 467@@ -1170,6 +1319,7 @@ static void list_file_entry(struct file_struct *f)
cdcd2137 468 }
1fffd582
WD
469 }
470
471+static struct bitbag *delayed_bits = NULL;
472 static int phase = 0;
ffc18846 473 static int dflt_perms;
1fffd582 474
a5e6228a 475@@ -1415,8 +1565,12 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
fc068916
WD
476 }
477 }
478 else if (delete_during && f_out != -1 && !phase && dry_run < 2
9668a39c 479- && (file->flags & FLAG_CONTENT_DIR))
73adde61 480- delete_in_dir(fname, file, &real_sx.st.st_dev);
9668a39c 481+ && (file->flags & FLAG_CONTENT_DIR)) {
9a70b743 482+ if (detect_renamed && real_ret != 0)
1fffd582 483+ unexplored_dirs++;
73adde61 484+ delete_in_dir(fname, file, &real_sx.st.st_dev,
1fffd582
WD
485+ delete_during < 0 ? DEL_NO_DELETIONS : 0);
486+ }
ffc18846 487 goto cleanup;
1fffd582
WD
488 }
489
a5e6228a 490@@ -1694,8 +1848,14 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
ffc18846 491 goto cleanup;
ccdb48f6 492 }
81172142 493 #endif
1fffd582
WD
494- if (stat_errno == ENOENT)
495+ if (stat_errno == ENOENT) {
496+ if (detect_renamed && unexplored_dirs > 0
a47d1f86 497+ && F_LENGTH(file)) {
1fffd582
WD
498+ bitbag_set_bit(delayed_bits, ndx);
499+ return;
500+ }
501 goto notify_others;
502+ }
cc3e685d 503 rsyserr(FERROR_XFER, stat_errno, "recv_generator: failed to stat %s",
1fffd582 504 full_fname(fname));
ffc18846 505 goto cleanup;
a5e6228a 506@@ -2038,6 +2198,12 @@ void generate_files(int f_out, const char *local_name)
fc068916
WD
507 if (verbose > 2)
508 rprintf(FINFO, "generator starting pid=%ld\n", (long)getpid());
1fffd582
WD
509
510+ if (detect_renamed) {
7bfcb297 511+ delayed_bits = bitbag_create(cur_flist->used);
1fffd582
WD
512+ if (!delete_before && !delete_during)
513+ delete_during = -1;
514+ }
515+
9c85142a 516 if (delete_before && !solo_file && cur_flist->used > 0)
73adde61 517 do_delete_pass();
1071853f 518 if (delete_during == 2) {
a5e6228a 519@@ -2048,7 +2214,7 @@ void generate_files(int f_out, const char *local_name)
1071853f 520 }
1fffd582
WD
521 do_progress = 0;
522
fc068916
WD
523- if (append_mode > 0 || whole_file < 0)
524+ if (append_mode > 0 || detect_renamed || whole_file < 0)
1fffd582
WD
525 whole_file = 0;
526 if (verbose >= 2) {
527 rprintf(FINFO, "delta-transmission %s\n",
a5e6228a 528@@ -2086,7 +2252,7 @@ void generate_files(int f_out, const char *local_name)
9c85142a
WD
529 dirdev = MAKEDEV(DEV_MAJOR(devp), DEV_MINOR(devp));
530 } else
531 dirdev = MAKEDEV(0, 0);
532- delete_in_dir(f_name(fp, fbuf), fp, &dirdev);
533+ delete_in_dir(f_name(fp, fbuf), fp, &dirdev, 0);
534 }
fc068916
WD
535 }
536 }
a5e6228a 537@@ -2129,7 +2295,21 @@ void generate_files(int f_out, const char *local_name)
2dbc45e7 538 } while ((cur_flist = cur_flist->next) != NULL);
fc068916 539
2dbc45e7 540 if (delete_during)
73adde61
WD
541- delete_in_dir(NULL, NULL, &dev_zero);
542+ delete_in_dir(NULL, NULL, &dev_zero, 0);
2dbc45e7
WD
543+ if (detect_renamed) {
544+ if (delete_during < 0)
545+ delete_during = 0;
546+ detect_renamed = 0;
1fffd582 547+
2dbc45e7 548+ for (i = -1; (i = bitbag_next_bit(delayed_bits, i)) >= 0; ) {
9bcaf4de 549+ struct file_struct *file = cur_flist->files[i];
2dbc45e7
WD
550+ if (local_name)
551+ strlcpy(fbuf, local_name, sizeof fbuf);
552+ else
553+ f_name(file, fbuf);
554+ recv_generator(fbuf, file, i, itemizing, code, f_out);
555+ }
556+ }
557 phase++;
558 if (verbose > 2)
559 rprintf(FINFO, "generate_files phase=%d\n", phase);
cc3e685d
WD
560diff --git a/options.c b/options.c
561--- a/options.c
562+++ b/options.c
f2863bc0 563@@ -81,6 +81,7 @@ int am_generator = 0;
a94141d9 564 int am_starting_up = 1;
1fffd582
WD
565 int relative_paths = -1;
566 int implied_dirs = 1;
567+int detect_renamed = 0;
568 int numeric_ids = 0;
569 int allow_8bit_chars = 0;
570 int force_delete = 0;
f2863bc0 571@@ -386,6 +387,7 @@ void usage(enum logcode F)
1fffd582
WD
572 rprintf(F," --modify-window=NUM compare mod-times with reduced accuracy\n");
573 rprintf(F," -T, --temp-dir=DIR create temporary files in directory DIR\n");
574 rprintf(F," -y, --fuzzy find similar file for basis if no dest file\n");
575+ rprintf(F," --detect-renamed try to find renamed files to speed up the transfer\n");
576 rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n");
577 rprintf(F," --copy-dest=DIR ... and include copies of unchanged files\n");
578 rprintf(F," --link-dest=DIR hardlink to files in DIR when unchanged\n");
f2863bc0 579@@ -564,6 +566,7 @@ static struct poptOption long_options[] = {
1fffd582
WD
580 {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
581 {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
582 {"link-dest", 0, POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 },
583+ {"detect-renamed", 0, POPT_ARG_NONE, &detect_renamed, 0, 0, 0 },
584 {"fuzzy", 'y', POPT_ARG_NONE, &fuzzy_basis, 0, 0, 0 },
585 {"compress", 'z', POPT_ARG_NONE, 0, 'z', 0, 0 },
6cbbe66d 586 {"no-compress", 0, POPT_ARG_VAL, &do_compression, 0, 0, 0 },
a5e6228a 587@@ -1542,7 +1545,7 @@ int parse_arguments(int *argc_p, const char ***argv_p, int frommain)
1fffd582
WD
588 inplace = 1;
589 }
590
591- if (delay_updates && !partial_dir)
592+ if ((delay_updates || detect_renamed) && !partial_dir)
593 partial_dir = tmp_partialdir;
594
595 if (inplace) {
a5e6228a 596@@ -1551,6 +1554,7 @@ int parse_arguments(int *argc_p, const char ***argv_p, int frommain)
1fffd582
WD
597 snprintf(err_buf, sizeof err_buf,
598 "--%s cannot be used with --%s\n",
599 append_mode ? "append" : "inplace",
600+ detect_renamed ? "detect-renamed" :
601 delay_updates ? "delay-updates" : "partial-dir");
602 return 0;
603 }
a5e6228a 604@@ -1897,6 +1901,8 @@ void server_options(char **args, int *argc_p)
51bc0e89
WD
605 args[ac++] = "--super";
606 if (size_only)
607 args[ac++] = "--size-only";
608+ if (detect_renamed)
609+ args[ac++] = "--detect-renamed";
610 } else {
611 if (skip_compress) {
612 if (asprintf(&arg, "--skip-compress=%s", skip_compress) < 0)
cc3e685d
WD
613diff --git a/rsync.yo b/rsync.yo
614--- a/rsync.yo
615+++ b/rsync.yo
616@@ -385,6 +385,7 @@ to the detailed description below for a complete description. verb(
1fffd582
WD
617 --modify-window=NUM compare mod-times with reduced accuracy
618 -T, --temp-dir=DIR create temporary files in directory DIR
619 -y, --fuzzy find similar file for basis if no dest file
620+ --detect-renamed try to find renamed files to speed the xfer
621 --compare-dest=DIR also compare received files relative to DIR
622 --copy-dest=DIR ... and include copies of unchanged files
623 --link-dest=DIR hardlink to files in DIR when unchanged
a5e6228a 624@@ -1467,6 +1468,21 @@ Note that the use of the bf(--delete) option might get rid of any potential
1fffd582
WD
625 fuzzy-match files, so either use bf(--delete-after) or specify some
626 filename exclusions if you need to prevent this.
627
6fa0767f
WD
628+dit(bf(--detect-renamed)) With this option, for each new source file
629+(call it em(src/S)), rsync looks for a file em(dest/D) anywhere in the
630+destination that passes the quick check with em(src/S). If such a em(dest/D)
631+is found, rsync uses it as an alternate basis for transferring em(S). The
632+idea is that if em(src/S) was renamed from em(src/D) (as opposed to em(src/S)
633+passing the quick check with em(dest/D) by coincidence), the delta-transfer
634+algorithm will find that all the data matches between em(src/S) and em(dest/D),
635+and the transfer will be really fast.
636+
1fffd582
WD
637+By default, alternate-basis files are hard-linked into a directory named
638+".~tmp~" in each file's destination directory, but if you've specified
639+the bf(--partial-dir) option, that directory will be used instead. These
640+potential alternate-basis files will be removed as the transfer progresses.
641+This option conflicts with bf(--inplace) and bf(--append).
642+
643 dit(bf(--compare-dest=DIR)) This option instructs rsync to use em(DIR) on
644 the destination machine as an additional hierarchy to compare destination
645 files against doing transfers (if the files are missing in the destination
cc3e685d
WD
646diff --git a/util.c b/util.c
647--- a/util.c
648+++ b/util.c
4c15e800 649@@ -1019,6 +1019,32 @@ int handle_partial_dir(const char *fname, int create)
1fffd582
WD
650 return 1;
651 }
652
653+/* We need to supply our own strcmp function for file list comparisons
654+ * to ensure that signed/unsigned usage is consistent between machines. */
655+int u_strcmp(const char *p1, const char *p2)
656+{
657+ for ( ; *p1; p1++, p2++) {
658+ if (*p1 != *p2)
659+ break;
660+ }
661+
662+ return (int)*(uchar*)p1 - (int)*(uchar*)p2;
663+}
664+
665+/* We need a memcmp function compares unsigned-byte values. */
666+int u_memcmp(const void *p1, const void *p2, size_t len)
667+{
668+ const uchar *u1 = p1;
669+ const uchar *u2 = p2;
670+
671+ while (len--) {
672+ if (*u1 != *u2)
673+ return (int)*u1 - (int)*u2;
674+ }
675+
676+ return 0;
677+}
678+
679 /**
680 * Determine if a symlink points outside the current directory tree.
681 * This is considered "unsafe" because e.g. when mirroring somebody