The patches for 3.0.5pre1.
[rsync/rsync-patches.git] / detect-renamed.diff
CommitLineData
1fffd582
WD
1This patch adds the --detect-renamed option which makes rsync notice files
2that either (1) match in size & modify-time (plus the basename, if possible)
3or (2) match in size & checksum (when --checksum was also specified) and use
4each match as an alternate basis file to speed up the transfer.
5
6The algorithm attempts to scan the receiving-side's files in an efficient
7manner. If --delete[-before] is enabled, we'll take advantage of the
8pre-transfer delete pass to prepare any alternate-basis-file matches we
9might find. If --delete-before is not enabled, rsync does the rename scan
10during the regular file-sending scan (scanning each directory right before
11the generator starts updating files from that dir). In this latter mode,
12rsync might delay the updating of a file (if no alternate-basis match was
13yet found) until the full scan of the receiving side is complete, at which
14point any delayed files are processed.
15
16I chose to hard-link the alternate-basis files into a ".~tmp~" subdir that
17takes advantage of rsync's pre-existing partial-dir logic. This uses less
18memory than trying to keep track of the matches internally, and also allows
19any deletions or file-updates to occur normally without interfering with
20these alternate-basis discoveries.
21
03019e41 22To use this patch, run these commands for a successful build:
1fffd582 23
03019e41
WD
24 patch -p1 <patches/detect-renamed.diff
25 ./configure (optional if already run)
1fffd582
WD
26 make
27
28TODO:
29
30 We need to never return a match from fattr_find() that has a basis
31 file. This will ensure that we don't try to give a renamed file to
32 a file that can't use it, while missing out on giving it to a file
33 that could use it.
34
cc3e685d
WD
35diff --git a/compat.c b/compat.c
36--- a/compat.c
37+++ b/compat.c
cdcd2137
WD
38@@ -41,6 +41,7 @@ extern int checksum_seed;
39 extern int basis_dir_cnt;
40 extern int prune_empty_dirs;
41 extern int protocol_version;
9bcaf4de 42+extern int detect_renamed;
cdcd2137
WD
43 extern int protect_args;
44 extern int preserve_uid;
45 extern int preserve_gid;
ae306a29 46@@ -120,6 +121,7 @@ void set_allow_inc_recurse(void)
9aab301c
WD
47 allow_inc_recurse = 0;
48 else if (!am_sender
49 && (delete_before || delete_after
50+ || detect_renamed
51 || delay_updates || prune_empty_dirs))
51bc0e89
WD
52 allow_inc_recurse = 0;
53 else if (am_server && !local_server
cc3e685d
WD
54diff --git a/flist.c b/flist.c
55--- a/flist.c
56+++ b/flist.c
f9df736a 57@@ -63,6 +63,7 @@ extern int non_perishable_cnt;
1fffd582
WD
58 extern int prune_empty_dirs;
59 extern int copy_links;
60 extern int copy_unsafe_links;
61+extern int detect_renamed;
62 extern int protocol_version;
63 extern int sanitize_paths;
cc3e685d 64 extern int munge_symlinks;
ae306a29 65@@ -121,6 +122,8 @@ static int64 tmp_dev, tmp_ino;
7b80cd0e 66 #endif
87a38eea 67 static char tmp_sum[MAX_DIGEST_LEN];
1fffd582
WD
68
69+struct file_list the_fattr_list;
70+
87a38eea 71 static char empty_sum[MAX_DIGEST_LEN];
a47d1f86 72 static int flist_count_offset; /* for --delete --progress */
6cbbe66d 73 static int dir_count = 0;
ae306a29 74@@ -288,6 +291,45 @@ static int is_excluded(const char *fname, int is_dir, int filter_level)
73adde61 75 return 0;
1fffd582
WD
76 }
77
78+static int fattr_compare(struct file_struct **file1, struct file_struct **file2)
79+{
80+ struct file_struct *f1 = *file1;
81+ struct file_struct *f2 = *file2;
a47d1f86 82+ int64 len1 = F_LENGTH(f1), len2 = F_LENGTH(f2);
1fffd582
WD
83+ int diff;
84+
a47d1f86
WD
85+ if (!f1->basename || !S_ISREG(f1->mode) || !len1) {
86+ if (!f2->basename || !S_ISREG(f2->mode) || !len2)
1fffd582
WD
87+ return 0;
88+ return 1;
89+ }
a47d1f86 90+ if (!f2->basename || !S_ISREG(f2->mode) || !len2)
1fffd582
WD
91+ return -1;
92+
93+ /* Don't use diff for values that are longer than an int. */
a47d1f86
WD
94+ if (len1 != len2)
95+ return len1 < len2 ? -1 : 1;
1fffd582
WD
96+
97+ if (always_checksum) {
70891d26 98+ diff = u_memcmp(F_SUM(f1), F_SUM(f2), checksum_len);
1fffd582
WD
99+ if (diff)
100+ return diff;
101+ } else if (f1->modtime != f2->modtime)
102+ return f1->modtime < f2->modtime ? -1 : 1;
103+
104+ diff = u_strcmp(f1->basename, f2->basename);
105+ if (diff)
106+ return diff;
107+
108+ if (f1->dirname == f2->dirname)
109+ return 0;
110+ if (!f1->dirname)
111+ return -1;
112+ if (!f2->dirname)
113+ return 1;
114+ return u_strcmp(f1->dirname, f2->dirname);
115+}
116+
73adde61 117 static void send_directory(int f, struct file_list *flist,
fc068916 118 char *fbuf, int len, int flags);
1fffd582 119
ae306a29 120@@ -2357,6 +2399,25 @@ struct file_list *recv_file_list(int f)
6fa0767f 121
f2863bc0 122 flist_sort_and_clean(flist, relative_paths);
1fffd582
WD
123
124+ if (detect_renamed) {
7bfcb297
WD
125+ int j = flist->used;
126+ the_fattr_list.used = j;
1fffd582
WD
127+ the_fattr_list.files = new_array(struct file_struct *, j);
128+ if (!the_fattr_list.files)
70891d26 129+ out_of_memory("recv_file_list");
1fffd582
WD
130+ memcpy(the_fattr_list.files, flist->files,
131+ j * sizeof (struct file_struct *));
132+ qsort(the_fattr_list.files, j,
fc068916 133+ sizeof the_fattr_list.files[0], (int (*)())fattr_compare);
1fffd582
WD
134+ the_fattr_list.low = 0;
135+ while (j-- > 0) {
136+ struct file_struct *fp = the_fattr_list.files[j];
a47d1f86 137+ if (fp->basename && S_ISREG(fp->mode) && F_LENGTH(fp))
1fffd582
WD
138+ break;
139+ }
140+ the_fattr_list.high = j;
141+ }
142+
6fa0767f
WD
143 if (protocol_version < 30) {
144 /* Recv the io_error flag */
145 if (ignore_errors)
cc3e685d
WD
146diff --git a/generator.c b/generator.c
147--- a/generator.c
148+++ b/generator.c
c0c7984e 149@@ -80,6 +80,7 @@ extern char *basis_dir[];
1fffd582
WD
150 extern int compare_dest;
151 extern int copy_dest;
152 extern int link_dest;
153+extern int detect_renamed;
154 extern int whole_file;
155 extern int list_only;
ccdb48f6 156 extern int read_batch;
4c107044 157@@ -98,6 +99,7 @@ extern char *backup_suffix;
1fffd582 158 extern int backup_suffix_len;
fc068916 159 extern struct file_list *cur_flist, *first_flist, *dir_flist;
c0c7984e 160 extern struct filter_list_struct daemon_filter_list;
fc068916 161+extern struct file_list the_fattr_list;
d4dd2dd5
WD
162
163 int ignore_perishable = 0;
164 int non_perishable_cnt = 0;
4c107044 165@@ -105,6 +107,7 @@ int maybe_ATTRS_REPORT = 0;
d16b5fd6 166
fc068916 167 static dev_t dev_zero;
1fffd582
WD
168 static int deletion_count = 0; /* used to implement --max-delete */
169+static int unexplored_dirs = 1;
1071853f
WD
170 static int deldelay_size = 0, deldelay_cnt = 0;
171 static char *deldelay_buf = NULL;
172 static int deldelay_fd = -1;
4c107044 173@@ -115,7 +118,7 @@ static int need_retouch_dir_times;
9c85142a 174 static int need_retouch_dir_perms;
2dbc45e7 175 static const char *solo_file = NULL;
1fffd582 176
d16b5fd6
WD
177-/* For calling delete_item() and delete_dir_contents(). */
178+/* For calling delete_item(), delete_dir_contents(), and delete_in_dir(). */
f9df736a 179 #define DEL_NO_UID_WRITE (1<<0) /* file/dir has our uid w/o write perm */
a5e6228a 180 #define DEL_RECURSE (1<<1) /* if dir, delete all contents */
d16b5fd6 181 #define DEL_DIR_IS_EMPTY (1<<2) /* internal delete_FUNCTIONS use only */
4c107044 182@@ -124,6 +127,7 @@ static const char *solo_file = NULL;
a5e6228a
WD
183 #define DEL_FOR_SYMLINK (1<<5) /* making room for a replacement symlink */
184 #define DEL_FOR_DEVICE (1<<6) /* making room for a replacement device */
185 #define DEL_FOR_SPECIAL (1<<7) /* making room for a replacement special */
186+#define DEL_NO_DELETIONS (1<<9) /* just check for renames w/o deleting */
1fffd582 187
a5e6228a
WD
188 #define DEL_MAKE_ROOM (DEL_FOR_FILE|DEL_FOR_DIR|DEL_FOR_SYMLINK|DEL_FOR_DEVICE|DEL_FOR_SPECIAL)
189
4c107044 190@@ -148,11 +152,121 @@ static int is_backup_file(char *fn)
1fffd582
WD
191 return k > 0 && strcmp(fn+k, backup_suffix) == 0;
192 }
193
194+/* Search for a regular file that matches either (1) the size & modified
195+ * time (plus the basename, if possible) or (2) the size & checksum. If
196+ * we find an exact match down to the dirname, return -1 because we found
197+ * an up-to-date file in the transfer, not a renamed file. */
a47d1f86 198+static int fattr_find(struct file_struct *f, char *fname)
1fffd582
WD
199+{
200+ int low = the_fattr_list.low, high = the_fattr_list.high;
201+ int mid, ok_match = -1, good_match = -1;
202+ struct file_struct *fmid;
203+ int diff;
204+
205+ while (low <= high) {
206+ mid = (low + high) / 2;
207+ fmid = the_fattr_list.files[mid];
a47d1f86
WD
208+ if (F_LENGTH(fmid) != F_LENGTH(f)) {
209+ if (F_LENGTH(fmid) < F_LENGTH(f))
1fffd582
WD
210+ low = mid + 1;
211+ else
212+ high = mid - 1;
213+ continue;
214+ }
215+ if (always_checksum) {
9bcaf4de 216+ /* We use the FLAG_FILE_SENT flag to indicate when we
a47d1f86 217+ * have computed the checksum for an entry. */
9bcaf4de 218+ if (!(f->flags & FLAG_FILE_SENT)) {
1fffd582
WD
219+ if (fmid->modtime == f->modtime
220+ && f_name_cmp(fmid, f) == 0)
221+ return -1; /* assume we can't help */
d7d6347c 222+ file_checksum(fname, F_SUM(f), F_LENGTH(f));
9bcaf4de 223+ f->flags |= FLAG_FILE_SENT;
1fffd582 224+ }
70891d26 225+ diff = u_memcmp(F_SUM(fmid), F_SUM(f), checksum_len);
1fffd582
WD
226+ if (diff) {
227+ if (diff < 0)
228+ low = mid + 1;
229+ else
230+ high = mid - 1;
231+ continue;
232+ }
233+ } else {
234+ if (fmid->modtime != f->modtime) {
235+ if (fmid->modtime < f->modtime)
236+ low = mid + 1;
237+ else
238+ high = mid - 1;
239+ continue;
240+ }
241+ }
242+ ok_match = mid;
243+ diff = u_strcmp(fmid->basename, f->basename);
244+ if (diff == 0) {
245+ good_match = mid;
246+ if (fmid->dirname == f->dirname)
247+ return -1; /* file is up-to-date */
248+ if (!fmid->dirname) {
249+ low = mid + 1;
250+ continue;
251+ }
252+ if (!f->dirname) {
253+ high = mid - 1;
254+ continue;
255+ }
256+ diff = u_strcmp(fmid->dirname, f->dirname);
257+ if (diff == 0)
258+ return -1; /* file is up-to-date */
259+ }
260+ if (diff < 0)
261+ low = mid + 1;
262+ else
263+ high = mid - 1;
264+ }
265+
266+ return good_match >= 0 ? good_match : ok_match;
267+}
268+
a47d1f86 269+static void look_for_rename(struct file_struct *file, char *fname)
1fffd582
WD
270+{
271+ struct file_struct *fp;
272+ char *partialptr, *fn;
273+ STRUCT_STAT st;
274+ int ndx;
275+
6fa0767f 276+ if (!partial_dir || (ndx = fattr_find(file, fname)) < 0)
1fffd582
WD
277+ return;
278+
279+ fp = the_fattr_list.files[ndx];
280+ fn = f_name(fp, NULL);
281+ /* We don't provide an alternate-basis file if there is a basis file. */
282+ if (link_stat(fn, &st, 0) == 0)
283+ return;
1fffd582 284+
6fa0767f
WD
285+ if (!dry_run) {
286+ if ((partialptr = partial_dir_fname(fn)) == NULL
287+ || !handle_partial_dir(partialptr, PDIR_CREATE))
288+ return;
289+ /* We only use the file if we can hard-link it into our tmp dir. */
290+ if (link(fname, partialptr) != 0) {
291+ if (errno != EEXIST)
292+ handle_partial_dir(partialptr, PDIR_DELETE);
293+ return;
1fffd582 294+ }
1fffd582
WD
295+ }
296+
6fa0767f
WD
297+ /* I think this falls into the -vv category with "%s is uptodate", etc. */
298+ if (verbose > 1)
299+ rprintf(FINFO, "found renamed: %s => %s\n", fname, fn);
1fffd582 300+}
87d0091c
WD
301+
302 /* Delete a file or directory. If DEL_RECURSE is set in the flags, this will
303 * delete recursively.
1fffd582 304 *
f813befd 305 * Note that fbuf must point to a MAXPATHLEN buffer if the mode indicates it's
15894839
WD
306 * a directory! (The buffer is used for recursion, but returned unchanged.)
307+ *
308+ * Also note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
309 */
f9df736a 310 static enum delret delete_item(char *fbuf, uint16 mode, uint16 flags)
15894839 311 {
4c107044 312@@ -186,6 +300,8 @@ static enum delret delete_item(char *fbuf, uint16 mode, uint16 flags)
15894839
WD
313 goto check_ret;
314 /* OK: try to delete the directory. */
315 }
316+ if (flags & DEL_NO_DELETIONS)
317+ return DR_SUCCESS;
318
a5e6228a 319 if (!(flags & DEL_MAKE_ROOM) && max_delete >= 0 && ++deletion_count > max_delete)
15894839 320 return DR_AT_LIMIT;
4c107044 321@@ -241,6 +357,8 @@ static enum delret delete_item(char *fbuf, uint16 mode, uint16 flags)
d16b5fd6
WD
322 * its contents, otherwise just checks for content. Returns DR_SUCCESS or
323 * DR_NOT_EMPTY. Note that fname must point to a MAXPATHLEN buffer! (The
324 * buffer is used for recursion, but returned unchanged.)
1fffd582 325+ *
87d0091c 326+ * Note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
1fffd582 327 */
f9df736a 328 static enum delret delete_dir_contents(char *fname, uint16 flags)
1fffd582 329 {
4c107044 330@@ -260,7 +378,9 @@ static enum delret delete_dir_contents(char *fname, uint16 flags)
a47d1f86
WD
331 save_filters = push_local_filters(fname, dlen);
332
333 non_perishable_cnt = 0;
7e27b6c0 334+ file_extra_cnt += SUM_EXTRA_CNT;
a47d1f86 335 dirlist = get_dirlist(fname, dlen, 0);
7e27b6c0 336+ file_extra_cnt -= SUM_EXTRA_CNT;
a47d1f86
WD
337 ret = non_perishable_cnt ? DR_NOT_EMPTY : DR_SUCCESS;
338
9c85142a 339 if (!dirlist->used)
4c107044 340@@ -300,7 +420,8 @@ static enum delret delete_dir_contents(char *fname, uint16 flags)
f9df736a 341 if (S_ISDIR(fp->mode)) {
a5e6228a
WD
342 if (delete_dir_contents(fname, flags | DEL_RECURSE) != DR_SUCCESS)
343 ret = DR_NOT_EMPTY;
344- }
345+ } else if (detect_renamed && S_ISREG(fp->mode))
a47d1f86 346+ look_for_rename(fp, fname);
a5e6228a 347 if (delete_item(fname, fp->mode, flags) != DR_SUCCESS)
d16b5fd6
WD
348 ret = DR_NOT_EMPTY;
349 }
4c107044 350@@ -465,13 +586,18 @@ static void do_delayed_deletions(char *delbuf)
1fffd582
WD
351 * all the --delete-WHEN options. Note that the fbuf pointer must point to a
352 * MAXPATHLEN buffer with the name of the directory in it (the functions we
353 * call will append names onto the end, but the old dir value will be restored
354- * on exit). */
73adde61 355-static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
1fffd582
WD
356+ * on exit).
357+ *
358+ * Note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
359+ */
73adde61 360+static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev,
f9df736a 361+ int del_flags)
1fffd582 362 {
1fffd582
WD
363 static int already_warned = 0;
364 struct file_list *dirlist;
365- char delbuf[MAXPATHLEN];
6fa0767f 366- int dlen, i;
1fffd582
WD
367+ char *p, delbuf[MAXPATHLEN];
368+ unsigned remainder;
6fa0767f 369+ int dlen, i, restore_dot = 0;
f9df736a 370 int save_uid_ndx = uid_ndx;
1fffd582 371
73adde61 372 if (!fbuf) {
4c107044 373@@ -486,17 +612,22 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
1fffd582
WD
374 maybe_send_keepalive();
375
041d67b8 376 if (io_error && !ignore_errors) {
1fffd582
WD
377- if (already_warned)
378+ if (!already_warned) {
379+ rprintf(FINFO,
380+ "IO error encountered -- skipping file deletion\n");
381+ already_warned = 1;
382+ }
383+ if (!detect_renamed)
384 return;
385- rprintf(FINFO,
386- "IO error encountered -- skipping file deletion\n");
387- already_warned = 1;
388- return;
f9df736a 389+ del_flags |= DEL_NO_DELETIONS;
1fffd582
WD
390 }
391
1fffd582 392 dlen = strlen(fbuf);
fc068916 393 change_local_filter_dir(fbuf, dlen, F_DEPTH(file));
1fffd582
WD
394
395+ if (detect_renamed)
396+ unexplored_dirs--;
397+
398 if (one_file_system) {
399 if (file->flags & FLAG_TOP_DIR)
fc068916 400 filesystem_dev = *fs_dev;
4c107044 401@@ -509,6 +640,14 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
1fffd582
WD
402
403 dirlist = get_dirlist(fbuf, dlen, 0);
404
405+ p = fbuf + dlen;
6fa0767f
WD
406+ if (dlen == 1 && *fbuf == '.') {
407+ restore_dot = 1;
408+ p = fbuf;
409+ } else if (dlen != 1 || *fbuf != '/')
1fffd582
WD
410+ *p++ = '/';
411+ remainder = MAXPATHLEN - (p - fbuf);
412+
413 /* If an item in dirlist is not found in flist, delete it
414 * from the filesystem. */
9c85142a 415 for (i = dirlist->used; i--; ) {
4c107044 416@@ -521,19 +660,28 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
87d0091c 417 f_name(fp, NULL));
1fffd582 418 continue;
87d0091c 419 }
1fffd582
WD
420+ if (detect_renamed && S_ISREG(fp->mode)) {
421+ strlcpy(p, fp->basename, remainder);
a47d1f86 422+ look_for_rename(fp, fbuf);
1fffd582 423+ }
73adde61 424 if (flist_find(cur_flist, fp) < 0) {
f9df736a
WD
425 int flags = DEL_RECURSE;
426 if (!(fp->mode & S_IWUSR) && !am_root && (uid_t)F_OWNER(fp) == our_uid)
427 flags |= DEL_NO_UID_WRITE;
1fffd582 428 f_name(fp, delbuf);
1071853f 429- if (delete_during == 2) {
a5e6228a 430- if (!remember_delete(fp, delbuf, flags))
f9df736a
WD
431+ if (delete_during == 2 && !(del_flags & DEL_NO_DELETIONS)) {
432+ if (!remember_delete(fp, delbuf, del_flags | flags))
1071853f
WD
433 break;
434 } else
a5e6228a 435- delete_item(delbuf, fp->mode, flags);
1fffd582 436- }
f9df736a 437+ delete_item(delbuf, fp->mode, del_flags | flags);
1fffd582
WD
438+ } else if (detect_renamed && S_ISDIR(fp->mode))
439+ unexplored_dirs++;
440 }
441
6fa0767f
WD
442+ if (restore_dot)
443+ fbuf[0] = '.';
1fffd582
WD
444+ fbuf[dlen] = '\0';
445+
446 flist_free(dirlist);
1fffd582 447
f9df736a 448 if (!save_uid_ndx) {
4c107044 449@@ -571,9 +719,9 @@ static void do_delete_pass(void)
1fffd582
WD
450 || !S_ISDIR(st.st_mode))
451 continue;
452
73adde61
WD
453- delete_in_dir(fbuf, file, &st.st_dev);
454+ delete_in_dir(fbuf, file, &st.st_dev, 0);
1fffd582 455 }
73adde61
WD
456- delete_in_dir(NULL, NULL, &dev_zero);
457+ delete_in_dir(NULL, NULL, &dev_zero, 0);
1fffd582
WD
458
459 if (do_progress && !am_server)
460 rprintf(FINFO, " \r");
4c107044 461@@ -1226,6 +1374,7 @@ static void list_file_entry(struct file_struct *f)
cdcd2137 462 }
1fffd582
WD
463 }
464
465+static struct bitbag *delayed_bits = NULL;
466 static int phase = 0;
ffc18846 467 static int dflt_perms;
1fffd582 468
4c107044 469@@ -1502,9 +1651,12 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
fc068916 470 }
c0c7984e 471 else if (delete_during && f_out != -1 && !phase
65ecbe35
WD
472 && !(file->flags & FLAG_MISSING_DIR)) {
473- if (file->flags & FLAG_CONTENT_DIR)
474- delete_in_dir(fname, file, &real_sx.st.st_dev);
475- else
476+ if (file->flags & FLAG_CONTENT_DIR) {
477+ if (detect_renamed && real_ret != 0)
478+ unexplored_dirs++;
479+ delete_in_dir(fname, file, &real_sx.st.st_dev,
480+ delete_during < 0 ? DEL_NO_DELETIONS : 0);
481+ } else
482 change_local_filter_dir(fname, strlen(fname), F_DEPTH(file));
65ecbe35 483 }
cbdf862c 484 goto cleanup;
4c107044 485@@ -1782,8 +1934,14 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
ffc18846 486 goto cleanup;
ccdb48f6 487 }
81172142 488 #endif
1fffd582
WD
489- if (stat_errno == ENOENT)
490+ if (stat_errno == ENOENT) {
491+ if (detect_renamed && unexplored_dirs > 0
a47d1f86 492+ && F_LENGTH(file)) {
1fffd582
WD
493+ bitbag_set_bit(delayed_bits, ndx);
494+ return;
495+ }
496 goto notify_others;
497+ }
cc3e685d 498 rsyserr(FERROR_XFER, stat_errno, "recv_generator: failed to stat %s",
1fffd582 499 full_fname(fname));
ffc18846 500 goto cleanup;
4c107044 501@@ -2184,6 +2342,12 @@ void generate_files(int f_out, const char *local_name)
fc068916
WD
502 if (verbose > 2)
503 rprintf(FINFO, "generator starting pid=%ld\n", (long)getpid());
1fffd582
WD
504
505+ if (detect_renamed) {
7bfcb297 506+ delayed_bits = bitbag_create(cur_flist->used);
1fffd582
WD
507+ if (!delete_before && !delete_during)
508+ delete_during = -1;
509+ }
510+
9c85142a 511 if (delete_before && !solo_file && cur_flist->used > 0)
73adde61 512 do_delete_pass();
1071853f 513 if (delete_during == 2) {
4c107044 514@@ -2194,7 +2358,7 @@ void generate_files(int f_out, const char *local_name)
1071853f 515 }
1fffd582
WD
516 do_progress = 0;
517
fc068916
WD
518- if (append_mode > 0 || whole_file < 0)
519+ if (append_mode > 0 || detect_renamed || whole_file < 0)
1fffd582
WD
520 whole_file = 0;
521 if (verbose >= 2) {
522 rprintf(FINFO, "delta-transmission %s\n",
4c107044
WD
523@@ -2236,7 +2400,7 @@ void generate_files(int f_out, const char *local_name)
524 dirdev = MAKEDEV(DEV_MAJOR(devp), DEV_MINOR(devp));
525 } else
526 dirdev = MAKEDEV(0, 0);
65ecbe35
WD
527- delete_in_dir(fbuf, fp, &dirdev);
528+ delete_in_dir(fbuf, fp, &dirdev, 0);
529 } else
530 change_local_filter_dir(fbuf, strlen(fbuf), F_DEPTH(fp));
fc068916 531 }
963ca808
WD
532@@ -2286,7 +2450,21 @@ void generate_files(int f_out, const char *local_name)
533 write_ndx(f_out, NDX_DONE);
fc068916 534
2dbc45e7 535 if (delete_during)
73adde61
WD
536- delete_in_dir(NULL, NULL, &dev_zero);
537+ delete_in_dir(NULL, NULL, &dev_zero, 0);
2dbc45e7
WD
538+ if (detect_renamed) {
539+ if (delete_during < 0)
540+ delete_during = 0;
541+ detect_renamed = 0;
1fffd582 542+
2dbc45e7 543+ for (i = -1; (i = bitbag_next_bit(delayed_bits, i)) >= 0; ) {
9bcaf4de 544+ struct file_struct *file = cur_flist->files[i];
2dbc45e7
WD
545+ if (local_name)
546+ strlcpy(fbuf, local_name, sizeof fbuf);
547+ else
548+ f_name(file, fbuf);
549+ recv_generator(fbuf, file, i, itemizing, code, f_out);
550+ }
551+ }
552 phase++;
553 if (verbose > 2)
554 rprintf(FINFO, "generate_files phase=%d\n", phase);
cc3e685d
WD
555diff --git a/options.c b/options.c
556--- a/options.c
557+++ b/options.c
c0c7984e 558@@ -82,6 +82,7 @@ int am_generator = 0;
a94141d9 559 int am_starting_up = 1;
1fffd582
WD
560 int relative_paths = -1;
561 int implied_dirs = 1;
562+int detect_renamed = 0;
563 int numeric_ids = 0;
564 int allow_8bit_chars = 0;
565 int force_delete = 0;
c0c7984e 566@@ -391,6 +392,7 @@ void usage(enum logcode F)
1fffd582
WD
567 rprintf(F," --modify-window=NUM compare mod-times with reduced accuracy\n");
568 rprintf(F," -T, --temp-dir=DIR create temporary files in directory DIR\n");
569 rprintf(F," -y, --fuzzy find similar file for basis if no dest file\n");
570+ rprintf(F," --detect-renamed try to find renamed files to speed up the transfer\n");
571 rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n");
572 rprintf(F," --copy-dest=DIR ... and include copies of unchanged files\n");
573 rprintf(F," --link-dest=DIR hardlink to files in DIR when unchanged\n");
c0c7984e 574@@ -578,6 +580,7 @@ static struct poptOption long_options[] = {
1fffd582
WD
575 {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
576 {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
577 {"link-dest", 0, POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 },
578+ {"detect-renamed", 0, POPT_ARG_NONE, &detect_renamed, 0, 0, 0 },
c0c7984e
WD
579 {"fuzzy", 'y', POPT_ARG_VAL, &fuzzy_basis, 1, 0, 0 },
580 {"no-fuzzy", 0, POPT_ARG_VAL, &fuzzy_basis, 0, 0, 0 },
581 {"no-y", 0, POPT_ARG_VAL, &fuzzy_basis, 0, 0, 0 },
582@@ -1590,7 +1593,7 @@ int parse_arguments(int *argc_p, const char ***argv_p)
1fffd582
WD
583 inplace = 1;
584 }
585
586- if (delay_updates && !partial_dir)
587+ if ((delay_updates || detect_renamed) && !partial_dir)
588 partial_dir = tmp_partialdir;
589
590 if (inplace) {
c0c7984e 591@@ -1599,6 +1602,7 @@ int parse_arguments(int *argc_p, const char ***argv_p)
1fffd582
WD
592 snprintf(err_buf, sizeof err_buf,
593 "--%s cannot be used with --%s\n",
594 append_mode ? "append" : "inplace",
595+ detect_renamed ? "detect-renamed" :
596 delay_updates ? "delay-updates" : "partial-dir");
597 return 0;
598 }
963ca808 599@@ -1960,6 +1964,8 @@ void server_options(char **args, int *argc_p)
51bc0e89
WD
600 args[ac++] = "--super";
601 if (size_only)
602 args[ac++] = "--size-only";
603+ if (detect_renamed)
604+ args[ac++] = "--detect-renamed";
605 } else {
606 if (skip_compress) {
607 if (asprintf(&arg, "--skip-compress=%s", skip_compress) < 0)
cc3e685d
WD
608diff --git a/rsync.yo b/rsync.yo
609--- a/rsync.yo
610+++ b/rsync.yo
611@@ -385,6 +385,7 @@ to the detailed description below for a complete description. verb(
1fffd582
WD
612 --modify-window=NUM compare mod-times with reduced accuracy
613 -T, --temp-dir=DIR create temporary files in directory DIR
614 -y, --fuzzy find similar file for basis if no dest file
615+ --detect-renamed try to find renamed files to speed the xfer
616 --compare-dest=DIR also compare received files relative to DIR
617 --copy-dest=DIR ... and include copies of unchanged files
618 --link-dest=DIR hardlink to files in DIR when unchanged
ae306a29 619@@ -1487,6 +1488,21 @@ Note that the use of the bf(--delete) option might get rid of any potential
1fffd582
WD
620 fuzzy-match files, so either use bf(--delete-after) or specify some
621 filename exclusions if you need to prevent this.
622
6fa0767f
WD
623+dit(bf(--detect-renamed)) With this option, for each new source file
624+(call it em(src/S)), rsync looks for a file em(dest/D) anywhere in the
625+destination that passes the quick check with em(src/S). If such a em(dest/D)
626+is found, rsync uses it as an alternate basis for transferring em(S). The
627+idea is that if em(src/S) was renamed from em(src/D) (as opposed to em(src/S)
628+passing the quick check with em(dest/D) by coincidence), the delta-transfer
629+algorithm will find that all the data matches between em(src/S) and em(dest/D),
630+and the transfer will be really fast.
631+
1fffd582
WD
632+By default, alternate-basis files are hard-linked into a directory named
633+".~tmp~" in each file's destination directory, but if you've specified
634+the bf(--partial-dir) option, that directory will be used instead. These
635+potential alternate-basis files will be removed as the transfer progresses.
636+This option conflicts with bf(--inplace) and bf(--append).
637+
638 dit(bf(--compare-dest=DIR)) This option instructs rsync to use em(DIR) on
639 the destination machine as an additional hierarchy to compare destination
640 files against doing transfers (if the files are missing in the destination
cc3e685d
WD
641diff --git a/util.c b/util.c
642--- a/util.c
643+++ b/util.c
963ca808 644@@ -1157,6 +1157,32 @@ int handle_partial_dir(const char *fname, int create)
1fffd582
WD
645 return 1;
646 }
647
648+/* We need to supply our own strcmp function for file list comparisons
649+ * to ensure that signed/unsigned usage is consistent between machines. */
650+int u_strcmp(const char *p1, const char *p2)
651+{
652+ for ( ; *p1; p1++, p2++) {
653+ if (*p1 != *p2)
654+ break;
655+ }
656+
657+ return (int)*(uchar*)p1 - (int)*(uchar*)p2;
658+}
659+
660+/* We need a memcmp function compares unsigned-byte values. */
661+int u_memcmp(const void *p1, const void *p2, size_t len)
662+{
663+ const uchar *u1 = p1;
664+ const uchar *u2 = p2;
665+
666+ while (len--) {
667+ if (*u1 != *u2)
668+ return (int)*u1 - (int)*u2;
669+ }
670+
671+ return 0;
672+}
673+
674 /**
675 * Determine if a symlink points outside the current directory tree.
676 * This is considered "unsafe" because e.g. when mirroring somebody