Updated patches to work with the current trunk.
[rsync/rsync-patches.git] / detect-renamed.diff
1 This patch adds the --detect-renamed option which makes rsync notice files
2 that either (1) match in size & modify-time (plus the basename, if possible)
3 or (2) match in size & checksum (when --checksum was also specified) and use
4 each match as an alternate basis file to speed up the transfer.
5
6 The algorithm attempts to scan the receiving-side's files in an efficient
7 manner.  If --delete[-before] is enabled, we'll take advantage of the
8 pre-transfer delete pass to prepare any alternate-basis-file matches we
9 might find.  If --delete-before is not enabled, rsync does the rename scan
10 during the regular file-sending scan (scanning each directory right before
11 the generator starts updating files from that dir).  In this latter mode,
12 rsync might delay the updating of a file (if no alternate-basis match was
13 yet found) until the full scan of the receiving side is complete, at which
14 point any delayed files are processed.
15
16 I chose to hard-link the alternate-basis files into a ".~tmp~" subdir that
17 takes advantage of rsync's pre-existing partial-dir logic.  This uses less
18 memory than trying to keep track of the matches internally, and also allows
19 any deletions or file-updates to occur normally without interfering with
20 these alternate-basis discoveries.
21
22 To use this patch, run these commands for a successful build:
23
24     patch -p1 <patches/detect-renamed.diff
25     ./configure                                 (optional if already run)
26     make
27
28 TODO:
29
30   We need to never return a match from fattr_find() that has a basis
31   file.  This will ensure that we don't try to give a renamed file to
32   a file that can't use it, while missing out on giving it to a file
33   that could use it.
34
35 diff --git a/compat.c b/compat.c
36 index 6e00072..5fc9e37 100644
37 --- a/compat.c
38 +++ b/compat.c
39 @@ -40,6 +40,7 @@ extern int checksum_seed;
40  extern int basis_dir_cnt;
41  extern int prune_empty_dirs;
42  extern int protocol_version;
43 +extern int detect_renamed;
44  extern int protect_args;
45  extern int preserve_uid;
46  extern int preserve_gid;
47 @@ -119,6 +120,7 @@ void set_allow_inc_recurse(void)
48                 allow_inc_recurse = 0;
49         else if (!am_sender
50          && (delete_before || delete_after
51 +         || detect_renamed
52           || delay_updates || prune_empty_dirs))
53                 allow_inc_recurse = 0;
54         else if (am_server && !local_server
55 diff --git a/delete.c b/delete.c
56 index 33fdd0e..6130a4c 100644
57 --- a/delete.c
58 +++ b/delete.c
59 @@ -25,6 +25,7 @@
60  extern int am_root;
61  extern int make_backups;
62  extern int max_delete;
63 +extern int detect_renamed;
64  extern char *backup_dir;
65  extern char *backup_suffix;
66  extern int backup_suffix_len;
67 @@ -45,6 +46,8 @@ static inline int is_backup_file(char *fn)
68   * its contents, otherwise just checks for content.  Returns DR_SUCCESS or
69   * DR_NOT_EMPTY.  Note that fname must point to a MAXPATHLEN buffer!  (The
70   * buffer is used for recursion, but returned unchanged.)
71 + *
72 + * Note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
73   */
74  static enum delret delete_dir_contents(char *fname, uint16 flags)
75  {
76 @@ -64,7 +67,9 @@ static enum delret delete_dir_contents(char *fname, uint16 flags)
77         save_filters = push_local_filters(fname, dlen);
78  
79         non_perishable_cnt = 0;
80 +       file_extra_cnt += SUM_EXTRA_CNT;
81         dirlist = get_dirlist(fname, dlen, 0);
82 +       file_extra_cnt -= SUM_EXTRA_CNT;
83         ret = non_perishable_cnt ? DR_NOT_EMPTY : DR_SUCCESS;
84  
85         if (!dirlist->used)
86 @@ -104,7 +109,8 @@ static enum delret delete_dir_contents(char *fname, uint16 flags)
87                 if (S_ISDIR(fp->mode)) {
88                         if (delete_dir_contents(fname, flags | DEL_RECURSE) != DR_SUCCESS)
89                                 ret = DR_NOT_EMPTY;
90 -               }
91 +               } else if (detect_renamed && S_ISREG(fp->mode))
92 +                       look_for_rename(fp, fname);
93                 if (delete_item(fname, fp->mode, flags) != DR_SUCCESS)
94                         ret = DR_NOT_EMPTY;
95         }
96 @@ -127,6 +133,8 @@ static enum delret delete_dir_contents(char *fname, uint16 flags)
97   *
98   * Note that fbuf must point to a MAXPATHLEN buffer if the mode indicates it's
99   * a directory! (The buffer is used for recursion, but returned unchanged.)
100 + *
101 + * Also note: --detect-rename may use this routine with DEL_NO_DELETIONS set!
102   */
103  enum delret delete_item(char *fbuf, uint16 mode, uint16 flags)
104  {
105 diff --git a/flist.c b/flist.c
106 index 09b4fc5..929aa85 100644
107 --- a/flist.c
108 +++ b/flist.c
109 @@ -63,6 +63,7 @@ extern int non_perishable_cnt;
110  extern int prune_empty_dirs;
111  extern int copy_links;
112  extern int copy_unsafe_links;
113 +extern int detect_renamed;
114  extern int protocol_version;
115  extern int sanitize_paths;
116  extern int munge_symlinks;
117 @@ -124,6 +125,8 @@ static int64 tmp_dev, tmp_ino;
118  #endif
119  static char tmp_sum[MAX_DIGEST_LEN];
120  
121 +struct file_list the_fattr_list;
122 +
123  static char empty_sum[MAX_DIGEST_LEN];
124  static int flist_count_offset; /* for --delete --progress */
125  
126 @@ -291,6 +294,45 @@ static int is_excluded(const char *fname, int is_dir, int filter_level)
127         return 0;
128  }
129  
130 +static int fattr_compare(struct file_struct **file1, struct file_struct **file2)
131 +{
132 +       struct file_struct *f1 = *file1;
133 +       struct file_struct *f2 = *file2;
134 +       int64 len1 = F_LENGTH(f1), len2 = F_LENGTH(f2);
135 +       int diff;
136 +
137 +       if (!f1->basename || !S_ISREG(f1->mode) || !len1) {
138 +               if (!f2->basename || !S_ISREG(f2->mode) || !len2)
139 +                       return 0;
140 +               return 1;
141 +       }
142 +       if (!f2->basename || !S_ISREG(f2->mode) || !len2)
143 +               return -1;
144 +
145 +       /* Don't use diff for values that are longer than an int. */
146 +       if (len1 != len2)
147 +               return len1 < len2 ? -1 : 1;
148 +
149 +       if (always_checksum) {
150 +               diff = u_memcmp(F_SUM(f1), F_SUM(f2), checksum_len);
151 +               if (diff)
152 +                       return diff;
153 +       } else if (f1->modtime != f2->modtime)
154 +               return f1->modtime < f2->modtime ? -1 : 1;
155 +
156 +       diff = u_strcmp(f1->basename, f2->basename);
157 +       if (diff)
158 +               return diff;
159 +
160 +       if (f1->dirname == f2->dirname)
161 +               return 0;
162 +       if (!f1->dirname)
163 +               return -1;
164 +       if (!f2->dirname)
165 +               return 1;
166 +       return u_strcmp(f1->dirname, f2->dirname);
167 +}
168 +
169  static void send_directory(int f, struct file_list *flist,
170                            char *fbuf, int len, int flags);
171  
172 @@ -2464,6 +2506,25 @@ struct file_list *recv_file_list(int f)
173  
174         flist_sort_and_clean(flist, relative_paths);
175  
176 +       if (detect_renamed) {
177 +               int j = flist->used;
178 +               the_fattr_list.used = j;
179 +               the_fattr_list.files = new_array(struct file_struct *, j);
180 +               if (!the_fattr_list.files)
181 +                       out_of_memory("recv_file_list");
182 +               memcpy(the_fattr_list.files, flist->files,
183 +                      j * sizeof (struct file_struct *));
184 +               qsort(the_fattr_list.files, j,
185 +                     sizeof the_fattr_list.files[0], (int (*)())fattr_compare);
186 +               the_fattr_list.low = 0;
187 +               while (j-- > 0) {
188 +                       struct file_struct *fp = the_fattr_list.files[j];
189 +                       if (fp->basename && S_ISREG(fp->mode) && F_LENGTH(fp))
190 +                               break;
191 +               }
192 +               the_fattr_list.high = j;
193 +       }
194 +
195         if (protocol_version < 30) {
196                 /* Recv the io_error flag */
197                 int err = read_int(f);
198 diff --git a/generator.c b/generator.c
199 index 12007a1..35ba203 100644
200 --- a/generator.c
201 +++ b/generator.c
202 @@ -81,6 +81,7 @@ extern char *partial_dir;
203  extern int compare_dest;
204  extern int copy_dest;
205  extern int link_dest;
206 +extern int detect_renamed;
207  extern int whole_file;
208  extern int list_only;
209  extern int read_batch;
210 @@ -98,10 +99,12 @@ extern uid_t our_uid;
211  extern char *basis_dir[MAX_BASIS_DIRS+1];
212  extern struct file_list *cur_flist, *first_flist, *dir_flist;
213  extern struct filter_list_struct filter_list, daemon_filter_list;
214 +extern struct file_list the_fattr_list;
215  
216  int maybe_ATTRS_REPORT = 0;
217  
218  static dev_t dev_zero;
219 +static int unexplored_dirs = 1;
220  static int deldelay_size = 0, deldelay_cnt = 0;
221  static char *deldelay_buf = NULL;
222  static int deldelay_fd = -1;
223 @@ -181,6 +184,8 @@ static int remember_delete(struct file_struct *file, const char *fname, int flag
224                 if (!flush_delete_delay())
225                         return 0;
226         }
227 +       if (flags & DEL_NO_DELETIONS)
228 +               return DR_SUCCESS;
229  
230         return 1;
231  }
232 @@ -272,13 +277,18 @@ static void do_delayed_deletions(char *delbuf)
233   * all the --delete-WHEN options.  Note that the fbuf pointer must point to a
234   * MAXPATHLEN buffer with the name of the directory in it (the functions we
235   * call will append names onto the end, but the old dir value will be restored
236 - * on exit). */
237 -static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
238 + * on exit).
239 + *
240 + * Note:  --detect-rename may use this routine with DEL_NO_DELETIONS set!
241 + */
242 +static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev,
243 +                         int del_flags)
244  {
245         static int already_warned = 0;
246         struct file_list *dirlist;
247 -       char delbuf[MAXPATHLEN];
248 -       int dlen, i;
249 +       char *p, delbuf[MAXPATHLEN];
250 +       unsigned remainder;
251 +       int dlen, i, restore_dot = 0;
252         int save_uid_ndx = uid_ndx;
253  
254         if (!fbuf) {
255 @@ -293,17 +303,22 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
256                 maybe_send_keepalive();
257  
258         if (io_error && !ignore_errors) {
259 -               if (already_warned)
260 +               if (!already_warned) {
261 +                       rprintf(FINFO,
262 +                           "IO error encountered -- skipping file deletion\n");
263 +                       already_warned = 1;
264 +               }
265 +               if (!detect_renamed)
266                         return;
267 -               rprintf(FINFO,
268 -                       "IO error encountered -- skipping file deletion\n");
269 -               already_warned = 1;
270 -               return;
271 +               del_flags |= DEL_NO_DELETIONS;
272         }
273  
274         dlen = strlen(fbuf);
275         change_local_filter_dir(fbuf, dlen, F_DEPTH(file));
276  
277 +       if (detect_renamed)
278 +               unexplored_dirs--;
279 +
280         if (one_file_system) {
281                 if (file->flags & FLAG_TOP_DIR)
282                         filesystem_dev = *fs_dev;
283 @@ -316,6 +331,14 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
284  
285         dirlist = get_dirlist(fbuf, dlen, 0);
286  
287 +       p = fbuf + dlen;
288 +       if (dlen == 1 && *fbuf == '.') {
289 +               restore_dot = 1;
290 +               p = fbuf;
291 +       } else if (dlen != 1 || *fbuf != '/')
292 +               *p++ = '/';
293 +       remainder = MAXPATHLEN - (p - fbuf);
294 +
295         /* If an item in dirlist is not found in flist, delete it
296          * from the filesystem. */
297         for (i = dirlist->used; i--; ) {
298 @@ -328,6 +351,10 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
299                                         f_name(fp, NULL));
300                         continue;
301                 }
302 +               if (detect_renamed && S_ISREG(fp->mode)) {
303 +                       strlcpy(p, fp->basename, remainder);
304 +                       look_for_rename(fp, fbuf);
305 +               }
306                 /* Here we want to match regardless of file type.  Replacement
307                  * of a file with one of another type is handled separately by
308                  * a delete_item call with a DEL_MAKE_ROOM flag. */
309 @@ -336,14 +363,19 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
310                         if (!(fp->mode & S_IWUSR) && !am_root && (uid_t)F_OWNER(fp) == our_uid)
311                                 flags |= DEL_NO_UID_WRITE;
312                         f_name(fp, delbuf);
313 -                       if (delete_during == 2) {
314 -                               if (!remember_delete(fp, delbuf, flags))
315 +                       if (delete_during == 2 && !(del_flags & DEL_NO_DELETIONS)) {
316 +                               if (!remember_delete(fp, delbuf, del_flags | flags))
317                                         break;
318                         } else
319 -                               delete_item(delbuf, fp->mode, flags);
320 -               }
321 +                               delete_item(delbuf, fp->mode, del_flags | flags);
322 +               } else if (detect_renamed && S_ISDIR(fp->mode))
323 +                       unexplored_dirs++;
324         }
325  
326 +       if (restore_dot)
327 +               fbuf[0] = '.';
328 +       fbuf[dlen] = '\0';
329 +
330         flist_free(dirlist);
331  
332         if (!save_uid_ndx) {
333 @@ -381,14 +413,122 @@ static void do_delete_pass(void)
334                  || !S_ISDIR(st.st_mode))
335                         continue;
336  
337 -               delete_in_dir(fbuf, file, &st.st_dev);
338 +               delete_in_dir(fbuf, file, &st.st_dev, 0);
339         }
340 -       delete_in_dir(NULL, NULL, &dev_zero);
341 +       delete_in_dir(NULL, NULL, &dev_zero, 0);
342  
343         if (INFO_GTE(FLIST, 2) && !am_server)
344                 rprintf(FINFO, "                    \r");
345  }
346  
347 +/* Search for a regular file that matches either (1) the size & modified
348 + * time (plus the basename, if possible) or (2) the size & checksum.  If
349 + * we find an exact match down to the dirname, return -1 because we found
350 + * an up-to-date file in the transfer, not a renamed file. */
351 +static int fattr_find(struct file_struct *f, char *fname)
352 +{
353 +       int low = the_fattr_list.low, high = the_fattr_list.high;
354 +       int mid, ok_match = -1, good_match = -1;
355 +       struct file_struct *fmid;
356 +       int diff;
357 +
358 +       while (low <= high) {
359 +               mid = (low + high) / 2;
360 +               fmid = the_fattr_list.files[mid];
361 +               if (F_LENGTH(fmid) != F_LENGTH(f)) {
362 +                       if (F_LENGTH(fmid) < F_LENGTH(f))
363 +                               low = mid + 1;
364 +                       else
365 +                               high = mid - 1;
366 +                       continue;
367 +               }
368 +               if (always_checksum) {
369 +                       /* We use the FLAG_FILE_SENT flag to indicate when we
370 +                        * have computed the checksum for an entry. */
371 +                       if (!(f->flags & FLAG_FILE_SENT)) {
372 +                               if (fmid->modtime == f->modtime
373 +                                && f_name_cmp(fmid, f) == 0)
374 +                                       return -1; /* assume we can't help */
375 +                               file_checksum(fname, F_SUM(f), F_LENGTH(f));
376 +                               f->flags |= FLAG_FILE_SENT;
377 +                       }
378 +                       diff = u_memcmp(F_SUM(fmid), F_SUM(f), checksum_len);
379 +                       if (diff) {
380 +                               if (diff < 0)
381 +                                       low = mid + 1;
382 +                               else
383 +                                       high = mid - 1;
384 +                               continue;
385 +                       }
386 +               } else {
387 +                       if (fmid->modtime != f->modtime) {
388 +                               if (fmid->modtime < f->modtime)
389 +                                       low = mid + 1;
390 +                               else
391 +                                       high = mid - 1;
392 +                               continue;
393 +                       }
394 +               }
395 +               ok_match = mid;
396 +               diff = u_strcmp(fmid->basename, f->basename);
397 +               if (diff == 0) {
398 +                       good_match = mid;
399 +                       if (fmid->dirname == f->dirname)
400 +                               return -1; /* file is up-to-date */
401 +                       if (!fmid->dirname) {
402 +                               low = mid + 1;
403 +                               continue;
404 +                       }
405 +                       if (!f->dirname) {
406 +                               high = mid - 1;
407 +                               continue;
408 +                       }
409 +                       diff = u_strcmp(fmid->dirname, f->dirname);
410 +                       if (diff == 0)
411 +                               return -1; /* file is up-to-date */
412 +               }
413 +               if (diff < 0)
414 +                       low = mid + 1;
415 +               else
416 +                       high = mid - 1;
417 +       }
418 +
419 +       return good_match >= 0 ? good_match : ok_match;
420 +}
421 +
422 +void look_for_rename(struct file_struct *file, char *fname)
423 +{
424 +       struct file_struct *fp;
425 +       char *partialptr, *fn;
426 +       STRUCT_STAT st;
427 +       int ndx;
428 +
429 +       if (!partial_dir || (ndx = fattr_find(file, fname)) < 0)
430 +               return;
431 +
432 +       fp = the_fattr_list.files[ndx];
433 +       fn = f_name(fp, NULL);
434 +       /* We don't provide an alternate-basis file if there is a basis file. */
435 +       if (link_stat(fn, &st, 0) == 0)
436 +               return;
437 +
438 +       if (!dry_run) {
439 +               if ((partialptr = partial_dir_fname(fn)) == NULL
440 +                || !handle_partial_dir(partialptr, PDIR_CREATE))
441 +                       return;
442 +               /* We only use the file if we can hard-link it into our tmp dir. */
443 +               if (link(fname, partialptr) != 0) {
444 +                       if (errno != EEXIST)
445 +                               handle_partial_dir(partialptr, PDIR_DELETE);
446 +                       return;
447 +               }
448 +       }
449 +
450 +       /* I think this falls into the -vv category with "%s is uptodate", etc. */
451 +       if (INFO_GTE(MISC, 2))
452 +               rprintf(FINFO, "found renamed: %s => %s\n", fname, fn);
453 +}
454 +
455  int unchanged_attrs(const char *fname, struct file_struct *file, stat_x *sxp)
456  {
457  #if !defined HAVE_LUTIMES || !defined HAVE_UTIMES
458 @@ -1042,6 +1182,7 @@ static void list_file_entry(struct file_struct *f)
459         }
460  }
461  
462 +static struct bitbag *delayed_bits = NULL;
463  static int phase = 0;
464  static int dflt_perms;
465  
466 @@ -1321,9 +1462,12 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
467                 }
468                 else if (delete_during && f_out != -1 && !phase
469                     && !(file->flags & FLAG_MISSING_DIR)) {
470 -                       if (file->flags & FLAG_CONTENT_DIR)
471 -                               delete_in_dir(fname, file, &real_sx.st.st_dev);
472 -                       else
473 +                       if (file->flags & FLAG_CONTENT_DIR) {
474 +                               if (detect_renamed && real_ret != 0)
475 +                                       unexplored_dirs++;
476 +                               delete_in_dir(fname, file, &real_sx.st.st_dev,
477 +                                             delete_during < 0 ? DEL_NO_DELETIONS : 0);
478 +                       } else
479                                 change_local_filter_dir(fname, strlen(fname), F_DEPTH(file));
480                 }
481                 goto cleanup;
482 @@ -1601,8 +1745,14 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx,
483                         goto cleanup;
484                 }
485  #endif
486 -               if (stat_errno == ENOENT)
487 +               if (stat_errno == ENOENT) {
488 +                       if (detect_renamed && unexplored_dirs > 0
489 +                        && F_LENGTH(file)) {
490 +                               bitbag_set_bit(delayed_bits, ndx);
491 +                               return;
492 +                       }
493                         goto notify_others;
494 +               }
495                 rsyserr(FERROR_XFER, stat_errno, "recv_generator: failed to stat %s",
496                         full_fname(fname));
497                 goto cleanup;
498 @@ -2003,6 +2153,12 @@ void generate_files(int f_out, const char *local_name)
499         if (DEBUG_GTE(GENR, 1))
500                 rprintf(FINFO, "generator starting pid=%ld\n", (long)getpid());
501  
502 +       if (detect_renamed) {
503 +               delayed_bits = bitbag_create(cur_flist->used);
504 +               if (!delete_before && !delete_during)
505 +                       delete_during = -1;
506 +       }
507 +
508         if (delete_before && !solo_file && cur_flist->used > 0)
509                 do_delete_pass();
510         if (delete_during == 2) {
511 @@ -2013,7 +2169,7 @@ void generate_files(int f_out, const char *local_name)
512         }
513         info_levels[INFO_FLIST] = info_levels[INFO_PROGRESS] = 0;
514  
515 -       if (append_mode > 0 || whole_file < 0)
516 +       if (append_mode > 0 || detect_renamed || whole_file < 0)
517                 whole_file = 0;
518         if (DEBUG_GTE(FLIST, 1)) {
519                 rprintf(FINFO, "delta-transmission %s\n",
520 @@ -2055,7 +2211,7 @@ void generate_files(int f_out, const char *local_name)
521                                                 dirdev = MAKEDEV(DEV_MAJOR(devp), DEV_MINOR(devp));
522                                         } else
523                                                 dirdev = MAKEDEV(0, 0);
524 -                                       delete_in_dir(fbuf, fp, &dirdev);
525 +                                       delete_in_dir(fbuf, fp, &dirdev, 0);
526                                 } else
527                                         change_local_filter_dir(fbuf, strlen(fbuf), F_DEPTH(fp));
528                         }
529 @@ -2102,7 +2258,21 @@ void generate_files(int f_out, const char *local_name)
530         } while ((cur_flist = cur_flist->next) != NULL);
531  
532         if (delete_during)
533 -               delete_in_dir(NULL, NULL, &dev_zero);
534 +               delete_in_dir(NULL, NULL, &dev_zero, 0);
535 +       if (detect_renamed) {
536 +               if (delete_during < 0)
537 +                       delete_during = 0;
538 +               detect_renamed = 0;
539 +
540 +               for (i = -1; (i = bitbag_next_bit(delayed_bits, i)) >= 0; ) {
541 +                       struct file_struct *file = cur_flist->files[i];
542 +                       if (local_name)
543 +                               strlcpy(fbuf, local_name, sizeof fbuf);
544 +                       else
545 +                               f_name(file, fbuf);
546 +                       recv_generator(fbuf, file, i, itemizing, code, f_out);
547 +               }
548 +       }
549         phase++;
550         if (DEBUG_GTE(GENR, 1))
551                 rprintf(FINFO, "generate_files phase=%d\n", phase);
552 diff --git a/options.c b/options.c
553 index e7c6c61..7e454b3 100644
554 --- a/options.c
555 +++ b/options.c
556 @@ -80,6 +80,7 @@ int am_server = 0;
557  int am_sender = 0;
558  int am_starting_up = 1;
559  int relative_paths = -1;
560 +int detect_renamed = 0;
561  int implied_dirs = 1;
562  int missing_args = 0; /* 0 = FERROR_XFER, 1 = ignore, 2 = delete */
563  int numeric_ids = 0;
564 @@ -742,6 +743,7 @@ void usage(enum logcode F)
565    rprintf(F,"     --modify-window=NUM     compare mod-times with reduced accuracy\n");
566    rprintf(F," -T, --temp-dir=DIR          create temporary files in directory DIR\n");
567    rprintf(F," -y, --fuzzy                 find similar file for basis if no dest file\n");
568 +  rprintf(F,"     --detect-renamed        try to find renamed files to speed up the transfer\n");
569    rprintf(F,"     --compare-dest=DIR      also compare destination files relative to DIR\n");
570    rprintf(F,"     --copy-dest=DIR         ... and include copies of unchanged files\n");
571    rprintf(F,"     --link-dest=DIR         hardlink to files in DIR when unchanged\n");
572 @@ -937,6 +939,7 @@ static struct poptOption long_options[] = {
573    {"compare-dest",     0,  POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
574    {"copy-dest",        0,  POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
575    {"link-dest",        0,  POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 },
576 +  {"detect-renamed",   0,  POPT_ARG_NONE,   &detect_renamed, 0, 0, 0 },
577    {"fuzzy",           'y', POPT_ARG_VAL,    &fuzzy_basis, 1, 0, 0 },
578    {"no-fuzzy",         0,  POPT_ARG_VAL,    &fuzzy_basis, 0, 0, 0 },
579    {"no-y",             0,  POPT_ARG_VAL,    &fuzzy_basis, 0, 0, 0 },
580 @@ -2105,7 +2108,7 @@ int parse_arguments(int *argc_p, const char ***argv_p)
581                 inplace = 1;
582         }
583  
584 -       if (delay_updates && !partial_dir)
585 +       if ((delay_updates || detect_renamed) && !partial_dir)
586                 partial_dir = tmp_partialdir;
587  
588         if (inplace) {
589 @@ -2114,6 +2117,7 @@ int parse_arguments(int *argc_p, const char ***argv_p)
590                         snprintf(err_buf, sizeof err_buf,
591                                  "--%s cannot be used with --%s\n",
592                                  append_mode ? "append" : "inplace",
593 +                                detect_renamed ? "detect-renamed" :
594                                  delay_updates ? "delay-updates" : "partial-dir");
595                         return 0;
596                 }
597 @@ -2476,6 +2480,8 @@ void server_options(char **args, int *argc_p)
598                         args[ac++] = "--super";
599                 if (size_only)
600                         args[ac++] = "--size-only";
601 +               if (detect_renamed)
602 +                       args[ac++] = "--detect-renamed";
603                 if (do_stats)
604                         args[ac++] = "--stats";
605         } else {
606 diff --git a/rsync.h b/rsync.h
607 index be7cf8a..278e282 100644
608 --- a/rsync.h
609 +++ b/rsync.h
610 @@ -242,7 +242,7 @@ enum msgcode {
611  #define NDX_DEL_STATS -2
612  #define NDX_FLIST_OFFSET -101
613  
614 -/* For calling delete_item() and delete_dir_contents(). */
615 +/* For calling delete_item(), delete_dir_contents(), and delete_in_dir(). */
616  #define DEL_NO_UID_WRITE       (1<<0) /* file/dir has our uid w/o write perm */
617  #define DEL_RECURSE            (1<<1) /* if dir, delete all contents */
618  #define DEL_DIR_IS_EMPTY       (1<<2) /* internal delete_FUNCTIONS use only */
619 @@ -252,6 +252,7 @@ enum msgcode {
620  #define DEL_FOR_DEVICE         (1<<6) /* making room for a replacement device */
621  #define DEL_FOR_SPECIAL        (1<<7) /* making room for a replacement special */
622  #define DEL_FOR_BACKUP         (1<<8) /* the delete is for a backup operation */
623 +#define DEL_NO_DELETIONS       (1<<9) /* just check for renames w/o deleting */
624  
625  #define DEL_MAKE_ROOM (DEL_FOR_FILE|DEL_FOR_DIR|DEL_FOR_SYMLINK|DEL_FOR_DEVICE|DEL_FOR_SPECIAL)
626  
627 diff --git a/rsync.yo b/rsync.yo
628 index 941f7a5..4df39b0 100644
629 --- a/rsync.yo
630 +++ b/rsync.yo
631 @@ -397,6 +397,7 @@ to the detailed description below for a complete description.  verb(
632       --modify-window=NUM     compare mod-times with reduced accuracy
633   -T, --temp-dir=DIR          create temporary files in directory DIR
634   -y, --fuzzy                 find similar file for basis if no dest file
635 +     --detect-renamed        try to find renamed files to speed the xfer
636       --compare-dest=DIR      also compare received files relative to DIR
637       --copy-dest=DIR         ... and include copies of unchanged files
638       --link-dest=DIR         hardlink to files in DIR when unchanged
639 @@ -1605,6 +1606,21 @@ Note that the use of the bf(--delete) option might get rid of any potential
640  fuzzy-match files, so either use bf(--delete-after) or specify some
641  filename exclusions if you need to prevent this.
642  
643 +dit(bf(--detect-renamed)) With this option, for each new source file
644 +(call it em(src/S)), rsync looks for a file em(dest/D) anywhere in the
645 +destination that passes the quick check with em(src/S).  If such a em(dest/D)
646 +is found, rsync uses it as an alternate basis for transferring em(S).  The
647 +idea is that if em(src/S) was renamed from em(src/D) (as opposed to em(src/S)
648 +passing the quick check with em(dest/D) by coincidence), the delta-transfer
649 +algorithm will find that all the data matches between em(src/S) and em(dest/D),
650 +and the transfer will be really fast.
651 +
652 +By default, alternate-basis files are hard-linked into a directory named
653 +".~tmp~" in each file's destination directory, but if you've specified
654 +the bf(--partial-dir) option, that directory will be used instead.  These
655 +potential alternate-basis files will be removed as the transfer progresses.
656 +This option conflicts with bf(--inplace) and bf(--append).
657 +
658  dit(bf(--compare-dest=DIR)) This option instructs rsync to use em(DIR) on
659  the destination machine as an additional hierarchy to compare destination
660  files against doing transfers (if the files are missing in the destination
661 diff --git a/util.c b/util.c
662 index 0cafed6..7e5c71f 100644
663 --- a/util.c
664 +++ b/util.c
665 @@ -1064,6 +1064,32 @@ char *normalize_path(char *path, BOOL force_newbuf, unsigned int *len_ptr)
666         return path;
667  }
668  
669 +/* We need to supply our own strcmp function for file list comparisons
670 + * to ensure that signed/unsigned usage is consistent between machines. */
671 +int u_strcmp(const char *p1, const char *p2)
672 +{
673 +        for ( ; *p1; p1++, p2++) {
674 +               if (*p1 != *p2)
675 +                       break;
676 +       }
677 +
678 +       return (int)*(uchar*)p1 - (int)*(uchar*)p2;
679 +}
680 +
681 +/* We need a memcmp function compares unsigned-byte values. */
682 +int u_memcmp(const void *p1, const void *p2, size_t len)
683 +{
684 +       const uchar *u1 = p1;
685 +       const uchar *u2 = p2;
686 +
687 +       while (len--) {
688 +               if (*u1 != *u2)
689 +                       return (int)*u1 - (int)*u2;
690 +       }
691 +
692 +       return 0;
693 +}
694 +
695  /**
696   * Return a quoted string with the full pathname of the indicated filename.
697   * The string " (in MODNAME)" may also be appended.  The returned pointer