The patches for 3.0.0pre9.
[rsync/rsync-patches.git] / detect-renamed.diff
1 This patch adds the --detect-renamed option which makes rsync notice files
2 that either (1) match in size & modify-time (plus the basename, if possible)
3 or (2) match in size & checksum (when --checksum was also specified) and use
4 each match as an alternate basis file to speed up the transfer.
5
6 The algorithm attempts to scan the receiving-side's files in an efficient
7 manner.  If --delete[-before] is enabled, we'll take advantage of the
8 pre-transfer delete pass to prepare any alternate-basis-file matches we
9 might find.  If --delete-before is not enabled, rsync does the rename scan
10 during the regular file-sending scan (scanning each directory right before
11 the generator starts updating files from that dir).  In this latter mode,
12 rsync might delay the updating of a file (if no alternate-basis match was
13 yet found) until the full scan of the receiving side is complete, at which
14 point any delayed files are processed.
15
16 I chose to hard-link the alternate-basis files into a ".~tmp~" subdir that
17 takes advantage of rsync's pre-existing partial-dir logic.  This uses less
18 memory than trying to keep track of the matches internally, and also allows
19 any deletions or file-updates to occur normally without interfering with
20 these alternate-basis discoveries.
21
22 To use this patch, run these commands for a successful build:
23
24     patch -p1 <patches/detect-renamed.diff
25     ./configure                                 (optional if already run)
26     make
27
28 TODO:
29
30   We need to never return a match from fattr_find() that has a basis
31   file.  This will ensure that we don't try to give a renamed file to
32   a file that can't use it, while missing out on giving it to a file
33   that could use it.
34
35 diff --git a/compat.c b/compat.c
36 --- a/compat.c
37 +++ b/compat.c
38 @@ -41,6 +41,7 @@ extern int checksum_seed;
39  extern int basis_dir_cnt;
40  extern int prune_empty_dirs;
41  extern int protocol_version;
42 +extern int detect_renamed;
43  extern int protect_args;
44  extern int preserve_uid;
45  extern int preserve_gid;
46 @@ -107,6 +108,7 @@ void set_allow_inc_recurse(void)
47                 allow_inc_recurse = 0;
48         else if (!am_sender
49          && (delete_before || delete_after
50 +         || detect_renamed
51           || delay_updates || prune_empty_dirs))
52                 allow_inc_recurse = 0;
53         else if (am_server && !local_server
54 diff --git a/flist.c b/flist.c
55 --- a/flist.c
56 +++ b/flist.c
57 @@ -61,6 +61,7 @@ extern int non_perishable_cnt;
58  extern int prune_empty_dirs;
59  extern int copy_links;
60  extern int copy_unsafe_links;
61 +extern int detect_renamed;
62  extern int protocol_version;
63  extern int sanitize_paths;
64  extern int munge_symlinks;
65 @@ -118,6 +119,8 @@ static int64 tmp_dev, tmp_ino;
66  #endif
67  static char tmp_sum[MAX_DIGEST_LEN];
68  
69 +struct file_list the_fattr_list;
70 +
71  static char empty_sum[MAX_DIGEST_LEN];
72  static int flist_count_offset; /* for --delete --progress */
73  static int dir_count = 0;
74 @@ -261,6 +264,45 @@ static int is_excluded(char *fname, int is_dir, int filter_level)
75         return 0;
76  }
77  
78 +static int fattr_compare(struct file_struct **file1, struct file_struct **file2)
79 +{
80 +       struct file_struct *f1 = *file1;
81 +       struct file_struct *f2 = *file2;
82 +       int64 len1 = F_LENGTH(f1), len2 = F_LENGTH(f2);
83 +       int diff;
84 +
85 +       if (!f1->basename || !S_ISREG(f1->mode) || !len1) {
86 +               if (!f2->basename || !S_ISREG(f2->mode) || !len2)
87 +                       return 0;
88 +               return 1;
89 +       }
90 +       if (!f2->basename || !S_ISREG(f2->mode) || !len2)
91 +               return -1;
92 +
93 +       /* Don't use diff for values that are longer than an int. */
94 +       if (len1 != len2)
95 +               return len1 < len2 ? -1 : 1;
96 +
97 +       if (always_checksum) {
98 +               diff = u_memcmp(F_SUM(f1), F_SUM(f2), checksum_len);
99 +               if (diff)
100 +                       return diff;
101 +       } else if (f1->modtime != f2->modtime)
102 +               return f1->modtime < f2->modtime ? -1 : 1;
103 +
104 +       diff = u_strcmp(f1->basename, f2->basename);
105 +       if (diff)
106 +               return diff;
107 +
108 +       if (f1->dirname == f2->dirname)
109 +               return 0;
110 +       if (!f1->dirname)
111 +               return -1;
112 +       if (!f2->dirname)
113 +               return 1;
114 +       return u_strcmp(f1->dirname, f2->dirname);
115 +}
116 +
117  static void send_directory(int f, struct file_list *flist,
118                            char *fbuf, int len, int flags);
119  
120 @@ -2178,6 +2220,25 @@ struct file_list *recv_file_list(int f)
121  
122         flist_sort_and_clean(flist, relative_paths);
123  
124 +       if (detect_renamed) {
125 +               int j = flist->used;
126 +               the_fattr_list.used = j;
127 +               the_fattr_list.files = new_array(struct file_struct *, j);
128 +               if (!the_fattr_list.files)
129 +                       out_of_memory("recv_file_list");
130 +               memcpy(the_fattr_list.files, flist->files,
131 +                      j * sizeof (struct file_struct *));
132 +               qsort(the_fattr_list.files, j,
133 +                     sizeof the_fattr_list.files[0], (int (*)())fattr_compare);
134 +               the_fattr_list.low = 0;
135 +               while (j-- > 0) {
136 +                       struct file_struct *fp = the_fattr_list.files[j];
137 +                       if (fp->basename && S_ISREG(fp->mode) && F_LENGTH(fp))
138 +                               break;
139 +               }
140 +               the_fattr_list.high = j;
141 +       }
142 +
143         if (protocol_version < 30) {
144                 /* Recv the io_error flag */
145                 if (ignore_errors)
146 diff --git a/generator.c b/generator.c
147 --- a/generator.c
148 +++ b/generator.c
149 @@ -79,6 +79,7 @@ extern char *basis_dir[];
150  extern int compare_dest;
151  extern int copy_dest;
152  extern int link_dest;
153 +extern int detect_renamed;
154  extern int whole_file;
155  extern int list_only;
156  extern int read_batch;
157 @@ -97,6 +98,7 @@ extern char *backup_suffix;
158  extern int backup_suffix_len;
159  extern struct file_list *cur_flist, *first_flist, *dir_flist;
160  extern struct filter_list_struct server_filter_list;
161 +extern struct file_list the_fattr_list;
162  
163  int ignore_perishable = 0;
164  int non_perishable_cnt = 0;
165 @@ -104,6 +106,7 @@ int maybe_ATTRS_REPORT = 0;
166  
167  static dev_t dev_zero;
168  static int deletion_count = 0; /* used to implement --max-delete */
169 +static int unexplored_dirs = 1;
170  static int deldelay_size = 0, deldelay_cnt = 0;
171  static char *deldelay_buf = NULL;
172  static int deldelay_fd = -1;
173 @@ -113,7 +116,7 @@ static int need_retouch_dir_times;
174  static int need_retouch_dir_perms;
175  static const char *solo_file = NULL;
176  
177 -/* For calling delete_item() and delete_dir_contents(). */
178 +/* For calling delete_item(), delete_dir_contents(), and delete_in_dir(). */
179  #define DEL_OWNED_BY_US        (1<<0) /* file/dir has our uid */
180  #define DEL_RECURSE            (1<<1) /* if dir, delete all contents */
181  #define DEL_DIR_IS_EMPTY       (1<<2) /* internal delete_FUNCTIONS use only */
182 @@ -122,6 +125,7 @@ static const char *solo_file = NULL;
183  #define DEL_FOR_SYMLINK        (1<<5) /* making room for a replacement symlink */
184  #define DEL_FOR_DEVICE         (1<<6) /* making room for a replacement device */
185  #define DEL_FOR_SPECIAL        (1<<7) /* making room for a replacement special */
186 +#define DEL_NO_DELETIONS       (1<<9) /* just check for renames w/o deleting */
187  
188  #define DEL_MAKE_ROOM (DEL_FOR_FILE|DEL_FOR_DIR|DEL_FOR_SYMLINK|DEL_FOR_DEVICE|DEL_FOR_SPECIAL)
189  
190 @@ -142,11 +146,121 @@ static int is_backup_file(char *fn)
191         return k > 0 && strcmp(fn+k, backup_suffix) == 0;
192  }
193  
194 +/* Search for a regular file that matches either (1) the size & modified
195 + * time (plus the basename, if possible) or (2) the size & checksum.  If
196 + * we find an exact match down to the dirname, return -1 because we found
197 + * an up-to-date file in the transfer, not a renamed file. */
198 +static int fattr_find(struct file_struct *f, char *fname)
199 +{
200 +       int low = the_fattr_list.low, high = the_fattr_list.high;
201 +       int mid, ok_match = -1, good_match = -1;
202 +       struct file_struct *fmid;
203 +       int diff;
204 +
205 +       while (low <= high) {
206 +               mid = (low + high) / 2;
207 +               fmid = the_fattr_list.files[mid];
208 +               if (F_LENGTH(fmid) != F_LENGTH(f)) {
209 +                       if (F_LENGTH(fmid) < F_LENGTH(f))
210 +                               low = mid + 1;
211 +                       else
212 +                               high = mid - 1;
213 +                       continue;
214 +               }
215 +               if (always_checksum) {
216 +                       /* We use the FLAG_FILE_SENT flag to indicate when we
217 +                        * have computed the checksum for an entry. */
218 +                       if (!(f->flags & FLAG_FILE_SENT)) {
219 +                               if (fmid->modtime == f->modtime
220 +                                && f_name_cmp(fmid, f) == 0)
221 +                                       return -1; /* assume we can't help */
222 +                               file_checksum(fname, F_SUM(f), F_LENGTH(f));
223 +                               f->flags |= FLAG_FILE_SENT;
224 +                       }
225 +                       diff = u_memcmp(F_SUM(fmid), F_SUM(f), checksum_len);
226 +                       if (diff) {
227 +                               if (diff < 0)
228 +                                       low = mid + 1;
229 +                               else
230 +                                       high = mid - 1;
231 +                               continue;
232 +                       }
233 +               } else {
234 +                       if (fmid->modtime != f->modtime) {
235 +                               if (fmid->modtime < f->modtime)
236 +                                       low = mid + 1;
237 +                               else
238 +                                       high = mid - 1;
239 +                               continue;
240 +                       }
241 +               }
242 +               ok_match = mid;
243 +               diff = u_strcmp(fmid->basename, f->basename);
244 +               if (diff == 0) {
245 +                       good_match = mid;
246 +                       if (fmid->dirname == f->dirname)
247 +                               return -1; /* file is up-to-date */
248 +                       if (!fmid->dirname) {
249 +                               low = mid + 1;
250 +                               continue;
251 +                       }
252 +                       if (!f->dirname) {
253 +                               high = mid - 1;
254 +                               continue;
255 +                       }
256 +                       diff = u_strcmp(fmid->dirname, f->dirname);
257 +                       if (diff == 0)
258 +                               return -1; /* file is up-to-date */
259 +               }
260 +               if (diff < 0)
261 +                       low = mid + 1;
262 +               else
263 +                       high = mid - 1;
264 +       }
265 +
266 +       return good_match >= 0 ? good_match : ok_match;
267 +}
268 +
269 +static void look_for_rename(struct file_struct *file, char *fname)
270 +{
271 +       struct file_struct *fp;
272 +       char *partialptr, *fn;
273 +       STRUCT_STAT st;
274 +       int ndx;
275 +
276 +       if (!partial_dir || (ndx = fattr_find(file, fname)) < 0)
277 +               return;
278 +
279 +       fp = the_fattr_list.files[ndx];
280 +       fn = f_name(fp, NULL);
281 +       /* We don't provide an alternate-basis file if there is a basis file. */
282 +       if (link_stat(fn, &st, 0) == 0)
283 +               return;
284 +
285 +       if (!dry_run) {
286 +               if ((partialptr = partial_dir_fname(fn)) == NULL
287 +                || !handle_partial_dir(partialptr, PDIR_CREATE))
288 +                       return;
289 +               /* We only use the file if we can hard-link it into our tmp dir. */
290 +               if (link(fname, partialptr) != 0) {
291 +                       if (errno != EEXIST)
292 +                               handle_partial_dir(partialptr, PDIR_DELETE);
293 +                       return;
294 +               }
295 +       }
296 +
297 +       /* I think this falls into the -vv category with "%s is uptodate", etc. */
298 +       if (verbose > 1)
299 +               rprintf(FINFO, "found renamed: %s => %s\n", fname, fn);
300 +}
301 +
302  /* Delete a file or directory.  If DEL_RECURSE is set in the flags, this will
303   * delete recursively.
304   *
305   * Note that fbuf must point to a MAXPATHLEN buffer if the mode indicates it's
306   * a directory! (The buffer is used for recursion, but returned unchanged.)
307 + *
308 + * Also note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
309   */
310  static enum delret delete_item(char *fbuf, int mode, int flags)
311  {
312 @@ -171,6 +285,8 @@ static enum delret delete_item(char *fbuf, int mode, int flags)
313                         goto check_ret;
314                 /* OK: try to delete the directory. */
315         }
316 +       if (flags & DEL_NO_DELETIONS)
317 +               return DR_SUCCESS;
318  
319         if (!(flags & DEL_MAKE_ROOM) && max_delete >= 0 && ++deletion_count > max_delete)
320                 return DR_AT_LIMIT;
321 @@ -226,6 +342,8 @@ static enum delret delete_item(char *fbuf, int mode, int flags)
322   * its contents, otherwise just checks for content.  Returns DR_SUCCESS or
323   * DR_NOT_EMPTY.  Note that fname must point to a MAXPATHLEN buffer!  (The
324   * buffer is used for recursion, but returned unchanged.)
325 + *
326 + * Note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
327   */
328  static enum delret delete_dir_contents(char *fname, int flags)
329  {
330 @@ -245,7 +363,9 @@ static enum delret delete_dir_contents(char *fname, int flags)
331         save_filters = push_local_filters(fname, dlen);
332  
333         non_perishable_cnt = 0;
334 +       file_extra_cnt += SUM_EXTRA_CNT;
335         dirlist = get_dirlist(fname, dlen, 0);
336 +       file_extra_cnt -= SUM_EXTRA_CNT;
337         ret = non_perishable_cnt ? DR_NOT_EMPTY : DR_SUCCESS;
338  
339         if (!dirlist->used)
340 @@ -288,7 +408,8 @@ static enum delret delete_dir_contents(char *fname, int flags)
341                                 do_chmod(fname, fp->mode |= S_IWUSR);
342                         if (delete_dir_contents(fname, flags | DEL_RECURSE) != DR_SUCCESS)
343                                 ret = DR_NOT_EMPTY;
344 -               }
345 +               } else if (detect_renamed && S_ISREG(fp->mode))
346 +                       look_for_rename(fp, fname);
347                 if (delete_item(fname, fp->mode, flags) != DR_SUCCESS)
348                         ret = DR_NOT_EMPTY;
349         }
350 @@ -449,13 +570,18 @@ static void do_delayed_deletions(char *delbuf)
351   * all the --delete-WHEN options.  Note that the fbuf pointer must point to a
352   * MAXPATHLEN buffer with the name of the directory in it (the functions we
353   * call will append names onto the end, but the old dir value will be restored
354 - * on exit). */
355 -static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
356 + * on exit).
357 + *
358 + * Note:  --detect-rename may use this routine with DEL_NO_DELETIONS set!
359 + */
360 +static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev,
361 +                         int flags)
362  {
363         static int already_warned = 0;
364         struct file_list *dirlist;
365 -       char delbuf[MAXPATHLEN];
366 -       int dlen, i;
367 +       char *p, delbuf[MAXPATHLEN];
368 +       unsigned remainder;
369 +       int dlen, i, restore_dot = 0;
370  
371         if (!fbuf) {
372                 change_local_filter_dir(NULL, 0, 0);
373 @@ -465,21 +591,28 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
374         if (verbose > 2)
375                 rprintf(FINFO, "delete_in_dir(%s)\n", fbuf);
376  
377 +       flags |= DEL_RECURSE;
378 +
379         if (allowed_lull)
380                 maybe_send_keepalive();
381  
382         if (io_error && !ignore_errors) {
383 -               if (already_warned)
384 +               if (!already_warned) {
385 +                       rprintf(FINFO,
386 +                           "IO error encountered -- skipping file deletion\n");
387 +                       already_warned = 1;
388 +               }
389 +               if (!detect_renamed)
390                         return;
391 -               rprintf(FINFO,
392 -                       "IO error encountered -- skipping file deletion\n");
393 -               already_warned = 1;
394 -               return;
395 +               flags |= DEL_NO_DELETIONS;
396         }
397  
398         dlen = strlen(fbuf);
399         change_local_filter_dir(fbuf, dlen, F_DEPTH(file));
400  
401 +       if (detect_renamed)
402 +               unexplored_dirs--;
403 +
404         if (one_file_system) {
405                 if (file->flags & FLAG_TOP_DIR)
406                         filesystem_dev = *fs_dev;
407 @@ -489,6 +622,14 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
408  
409         dirlist = get_dirlist(fbuf, dlen, 0);
410  
411 +       p = fbuf + dlen;
412 +       if (dlen == 1 && *fbuf == '.') {
413 +               restore_dot = 1;
414 +               p = fbuf;
415 +       } else if (dlen != 1 || *fbuf != '/')
416 +               *p++ = '/';
417 +       remainder = MAXPATHLEN - (p - fbuf);
418 +
419         /* If an item in dirlist is not found in flist, delete it
420          * from the filesystem. */
421         for (i = dirlist->used; i--; ) {
422 @@ -501,18 +642,26 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
423                                         f_name(fp, NULL));
424                         continue;
425                 }
426 +               if (detect_renamed && S_ISREG(fp->mode)) {
427 +                       strlcpy(p, fp->basename, remainder);
428 +                       look_for_rename(fp, fbuf);
429 +               }
430                 if (flist_find(cur_flist, fp) < 0) {
431 -                       int flags = DEL_RECURSE
432 -                                 | (!uid_ndx || (uid_t)F_OWNER(fp) == our_uid ? DEL_OWNED_BY_US : 0);
433 +                       int own_flag = (!uid_ndx || (uid_t)F_OWNER(fp) == our_uid ? DEL_OWNED_BY_US : 0);
434                         f_name(fp, delbuf);
435 -                       if (delete_during == 2) {
436 -                               if (!remember_delete(fp, delbuf, flags))
437 +                       if (delete_during == 2 && !(flags & DEL_NO_DELETIONS)) {
438 +                               if (!remember_delete(fp, delbuf, own_flag | flags))
439                                         break;
440                         } else
441 -                               delete_item(delbuf, fp->mode, flags);
442 -               }
443 +                               delete_item(delbuf, fp->mode, own_flag | flags);
444 +               } else if (detect_renamed && S_ISDIR(fp->mode))
445 +                       unexplored_dirs++;
446         }
447  
448 +       if (restore_dot)
449 +               fbuf[0] = '.';
450 +       fbuf[dlen] = '\0';
451 +
452         flist_free(dirlist);
453  }
454  
455 @@ -542,9 +691,9 @@ static void do_delete_pass(void)
456                  || !S_ISDIR(st.st_mode))
457                         continue;
458  
459 -               delete_in_dir(fbuf, file, &st.st_dev);
460 +               delete_in_dir(fbuf, file, &st.st_dev, 0);
461         }
462 -       delete_in_dir(NULL, NULL, &dev_zero);
463 +       delete_in_dir(NULL, NULL, &dev_zero, 0);
464  
465         if (do_progress && !am_server)
466                 rprintf(FINFO, "                    \r");
467 @@ -1170,6 +1319,7 @@ static void list_file_entry(struct file_struct *f)
468         }
469  }
470  
471 +static struct bitbag *delayed_bits = NULL;
472  static int phase = 0;
473  static int dflt_perms;
474  
475 @@ -1415,8 +1565,12 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
476                         }
477                 }
478                 else if (delete_during && f_out != -1 && !phase && dry_run < 2
479 -                   && (file->flags & FLAG_CONTENT_DIR))
480 -                       delete_in_dir(fname, file, &real_sx.st.st_dev);
481 +                   && (file->flags & FLAG_CONTENT_DIR)) {
482 +                       if (detect_renamed && real_ret != 0)
483 +                               unexplored_dirs++;
484 +                       delete_in_dir(fname, file, &real_sx.st.st_dev,
485 +                                     delete_during < 0 ? DEL_NO_DELETIONS : 0);
486 +               }
487                 goto cleanup;
488         }
489  
490 @@ -1694,8 +1848,14 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
491                         goto cleanup;
492                 }
493  #endif
494 -               if (stat_errno == ENOENT)
495 +               if (stat_errno == ENOENT) {
496 +                       if (detect_renamed && unexplored_dirs > 0
497 +                        && F_LENGTH(file)) {
498 +                               bitbag_set_bit(delayed_bits, ndx);
499 +                               return;
500 +                       }
501                         goto notify_others;
502 +               }
503                 rsyserr(FERROR_XFER, stat_errno, "recv_generator: failed to stat %s",
504                         full_fname(fname));
505                 goto cleanup;
506 @@ -2038,6 +2198,12 @@ void generate_files(int f_out, const char *local_name)
507         if (verbose > 2)
508                 rprintf(FINFO, "generator starting pid=%ld\n", (long)getpid());
509  
510 +       if (detect_renamed) {
511 +               delayed_bits = bitbag_create(cur_flist->used);
512 +               if (!delete_before && !delete_during)
513 +                       delete_during = -1;
514 +       }
515 +
516         if (delete_before && !solo_file && cur_flist->used > 0)
517                 do_delete_pass();
518         if (delete_during == 2) {
519 @@ -2048,7 +2214,7 @@ void generate_files(int f_out, const char *local_name)
520         }
521         do_progress = 0;
522  
523 -       if (append_mode > 0 || whole_file < 0)
524 +       if (append_mode > 0 || detect_renamed || whole_file < 0)
525                 whole_file = 0;
526         if (verbose >= 2) {
527                 rprintf(FINFO, "delta-transmission %s\n",
528 @@ -2086,7 +2252,7 @@ void generate_files(int f_out, const char *local_name)
529                                                 dirdev = MAKEDEV(DEV_MAJOR(devp), DEV_MINOR(devp));
530                                         } else
531                                                 dirdev = MAKEDEV(0, 0);
532 -                                       delete_in_dir(f_name(fp, fbuf), fp, &dirdev);
533 +                                       delete_in_dir(f_name(fp, fbuf), fp, &dirdev, 0);
534                                 }
535                         }
536                 }
537 @@ -2129,7 +2295,21 @@ void generate_files(int f_out, const char *local_name)
538         } while ((cur_flist = cur_flist->next) != NULL);
539  
540         if (delete_during)
541 -               delete_in_dir(NULL, NULL, &dev_zero);
542 +               delete_in_dir(NULL, NULL, &dev_zero, 0);
543 +       if (detect_renamed) {
544 +               if (delete_during < 0)
545 +                       delete_during = 0;
546 +               detect_renamed = 0;
547 +
548 +               for (i = -1; (i = bitbag_next_bit(delayed_bits, i)) >= 0; ) {
549 +                       struct file_struct *file = cur_flist->files[i];
550 +                       if (local_name)
551 +                               strlcpy(fbuf, local_name, sizeof fbuf);
552 +                       else
553 +                               f_name(file, fbuf);
554 +                       recv_generator(fbuf, file, i, itemizing, code, f_out);
555 +               }
556 +       }
557         phase++;
558         if (verbose > 2)
559                 rprintf(FINFO, "generate_files phase=%d\n", phase);
560 diff --git a/options.c b/options.c
561 --- a/options.c
562 +++ b/options.c
563 @@ -81,6 +81,7 @@ int am_generator = 0;
564  int am_starting_up = 1;
565  int relative_paths = -1;
566  int implied_dirs = 1;
567 +int detect_renamed = 0;
568  int numeric_ids = 0;
569  int allow_8bit_chars = 0;
570  int force_delete = 0;
571 @@ -386,6 +387,7 @@ void usage(enum logcode F)
572    rprintf(F,"     --modify-window=NUM     compare mod-times with reduced accuracy\n");
573    rprintf(F," -T, --temp-dir=DIR          create temporary files in directory DIR\n");
574    rprintf(F," -y, --fuzzy                 find similar file for basis if no dest file\n");
575 +  rprintf(F,"     --detect-renamed        try to find renamed files to speed up the transfer\n");
576    rprintf(F,"     --compare-dest=DIR      also compare destination files relative to DIR\n");
577    rprintf(F,"     --copy-dest=DIR         ... and include copies of unchanged files\n");
578    rprintf(F,"     --link-dest=DIR         hardlink to files in DIR when unchanged\n");
579 @@ -564,6 +566,7 @@ static struct poptOption long_options[] = {
580    {"compare-dest",     0,  POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
581    {"copy-dest",        0,  POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
582    {"link-dest",        0,  POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 },
583 +  {"detect-renamed",   0,  POPT_ARG_NONE,   &detect_renamed, 0, 0, 0 },
584    {"fuzzy",           'y', POPT_ARG_NONE,   &fuzzy_basis, 0, 0, 0 },
585    {"compress",        'z', POPT_ARG_NONE,   0, 'z', 0, 0 },
586    {"no-compress",      0,  POPT_ARG_VAL,    &do_compression, 0, 0, 0 },
587 @@ -1542,7 +1545,7 @@ int parse_arguments(int *argc_p, const char ***argv_p, int frommain)
588                 inplace = 1;
589         }
590  
591 -       if (delay_updates && !partial_dir)
592 +       if ((delay_updates || detect_renamed) && !partial_dir)
593                 partial_dir = tmp_partialdir;
594  
595         if (inplace) {
596 @@ -1551,6 +1554,7 @@ int parse_arguments(int *argc_p, const char ***argv_p, int frommain)
597                         snprintf(err_buf, sizeof err_buf,
598                                  "--%s cannot be used with --%s\n",
599                                  append_mode ? "append" : "inplace",
600 +                                detect_renamed ? "detect-renamed" :
601                                  delay_updates ? "delay-updates" : "partial-dir");
602                         return 0;
603                 }
604 @@ -1897,6 +1901,8 @@ void server_options(char **args, int *argc_p)
605                         args[ac++] = "--super";
606                 if (size_only)
607                         args[ac++] = "--size-only";
608 +               if (detect_renamed)
609 +                       args[ac++] = "--detect-renamed";
610         } else {
611                 if (skip_compress) {
612                         if (asprintf(&arg, "--skip-compress=%s", skip_compress) < 0)
613 diff --git a/rsync.yo b/rsync.yo
614 --- a/rsync.yo
615 +++ b/rsync.yo
616 @@ -385,6 +385,7 @@ to the detailed description below for a complete description.  verb(
617       --modify-window=NUM     compare mod-times with reduced accuracy
618   -T, --temp-dir=DIR          create temporary files in directory DIR
619   -y, --fuzzy                 find similar file for basis if no dest file
620 +     --detect-renamed        try to find renamed files to speed the xfer
621       --compare-dest=DIR      also compare received files relative to DIR
622       --copy-dest=DIR         ... and include copies of unchanged files
623       --link-dest=DIR         hardlink to files in DIR when unchanged
624 @@ -1467,6 +1468,21 @@ Note that the use of the bf(--delete) option might get rid of any potential
625  fuzzy-match files, so either use bf(--delete-after) or specify some
626  filename exclusions if you need to prevent this.
627  
628 +dit(bf(--detect-renamed)) With this option, for each new source file
629 +(call it em(src/S)), rsync looks for a file em(dest/D) anywhere in the
630 +destination that passes the quick check with em(src/S).  If such a em(dest/D)
631 +is found, rsync uses it as an alternate basis for transferring em(S).  The
632 +idea is that if em(src/S) was renamed from em(src/D) (as opposed to em(src/S)
633 +passing the quick check with em(dest/D) by coincidence), the delta-transfer
634 +algorithm will find that all the data matches between em(src/S) and em(dest/D),
635 +and the transfer will be really fast.
636 +
637 +By default, alternate-basis files are hard-linked into a directory named
638 +".~tmp~" in each file's destination directory, but if you've specified
639 +the bf(--partial-dir) option, that directory will be used instead.  These
640 +potential alternate-basis files will be removed as the transfer progresses.
641 +This option conflicts with bf(--inplace) and bf(--append).
642 +
643  dit(bf(--compare-dest=DIR)) This option instructs rsync to use em(DIR) on
644  the destination machine as an additional hierarchy to compare destination
645  files against doing transfers (if the files are missing in the destination
646 diff --git a/util.c b/util.c
647 --- a/util.c
648 +++ b/util.c
649 @@ -1019,6 +1019,32 @@ int handle_partial_dir(const char *fname, int create)
650         return 1;
651  }
652  
653 +/* We need to supply our own strcmp function for file list comparisons
654 + * to ensure that signed/unsigned usage is consistent between machines. */
655 +int u_strcmp(const char *p1, const char *p2)
656 +{
657 +        for ( ; *p1; p1++, p2++) {
658 +               if (*p1 != *p2)
659 +                       break;
660 +       }
661 +
662 +       return (int)*(uchar*)p1 - (int)*(uchar*)p2;
663 +}
664 +
665 +/* We need a memcmp function compares unsigned-byte values. */
666 +int u_memcmp(const void *p1, const void *p2, size_t len)
667 +{
668 +       const uchar *u1 = p1;
669 +       const uchar *u2 = p2;
670 +
671 +       while (len--) {
672 +               if (*u1 != *u2)
673 +                       return (int)*u1 - (int)*u2;
674 +       }
675 +
676 +       return 0;
677 +}
678 +
679  /**
680   * Determine if a symlink points outside the current directory tree.
681   * This is considered "unsafe" because e.g. when mirroring somebody