| 1 | This patch adds the --detect-renamed option which makes rsync notice files |
| 2 | that either (1) match in size & modify-time (plus the basename, if possible) |
| 3 | or (2) match in size & checksum (when --checksum was also specified) and use |
| 4 | each match as an alternate basis file to speed up the transfer. |
| 5 | |
| 6 | The algorithm attempts to scan the receiving-side's files in an efficient |
| 7 | manner. If --delete[-before] is enabled, we'll take advantage of the |
| 8 | pre-transfer delete pass to prepare any alternate-basis-file matches we |
| 9 | might find. If --delete-before is not enabled, rsync does the rename scan |
| 10 | during the regular file-sending scan (scanning each directory right before |
| 11 | the generator starts updating files from that dir). In this latter mode, |
| 12 | rsync might delay the updating of a file (if no alternate-basis match was |
| 13 | yet found) until the full scan of the receiving side is complete, at which |
| 14 | point any delayed files are processed. |
| 15 | |
| 16 | I chose to hard-link the alternate-basis files into a ".~tmp~" subdir that |
| 17 | takes advantage of rsync's pre-existing partial-dir logic. This uses less |
| 18 | memory than trying to keep track of the matches internally, and also allows |
| 19 | any deletions or file-updates to occur normally without interfering with |
| 20 | these alternate-basis discoveries. |
| 21 | |
| 22 | To use this patch, run these commands for a successful build: |
| 23 | |
| 24 | patch -p1 <patches/detect-renamed.diff |
| 25 | ./configure (optional if already run) |
| 26 | make |
| 27 | |
| 28 | TODO: |
| 29 | |
| 30 | We need to never return a match from fattr_find() that has a basis |
| 31 | file. This will ensure that we don't try to give a renamed file to |
| 32 | a file that can't use it, while missing out on giving it to a file |
| 33 | that could use it. |
| 34 | |
| 35 | --- old/compat.c |
| 36 | +++ new/compat.c |
| 37 | @@ -48,6 +48,7 @@ extern int preserve_hard_links; |
| 38 | extern int need_messages_from_generator; |
| 39 | extern int delete_mode, delete_before, delete_during, delete_after; |
| 40 | extern int delete_excluded; |
| 41 | +extern int detect_renamed; |
| 42 | extern int make_backups; |
| 43 | extern char *shell_cmd; /* contains VER.SUB string if client is a pre-release */ |
| 44 | extern char *backup_dir, *backup_suffix; |
| 45 | @@ -205,7 +206,7 @@ void setup_protocol(int f_out,int f_in) |
| 46 | } else if (protocol_version >= 30) { |
| 47 | if (recurse && allow_inc_recurse && !preserve_hard_links |
| 48 | && !delete_before && !delete_after && !delay_updates |
| 49 | - && !prune_empty_dirs) |
| 50 | + && !prune_empty_dirs && !detect_renamed) |
| 51 | inc_recurse = 1; |
| 52 | need_messages_from_generator = 1; |
| 53 | } |
| 54 | --- old/flist.c |
| 55 | +++ new/flist.c |
| 56 | @@ -59,6 +59,7 @@ extern int non_perishable_cnt; |
| 57 | extern int prune_empty_dirs; |
| 58 | extern int copy_links; |
| 59 | extern int copy_unsafe_links; |
| 60 | +extern int detect_renamed; |
| 61 | extern int protocol_version; |
| 62 | extern int sanitize_paths; |
| 63 | extern struct stats stats; |
| 64 | @@ -95,6 +96,8 @@ static int64 tmp_dev, tmp_ino; |
| 65 | #endif |
| 66 | static char tmp_sum[MAX_DIGEST_LEN]; |
| 67 | |
| 68 | +struct file_list the_fattr_list; |
| 69 | + |
| 70 | static char empty_sum[MAX_DIGEST_LEN]; |
| 71 | static int flist_count_offset; /* for --delete --progress */ |
| 72 | |
| 73 | @@ -261,6 +264,45 @@ static int is_excluded(char *fname, int |
| 74 | return 0; |
| 75 | } |
| 76 | |
| 77 | +static int fattr_compare(struct file_struct **file1, struct file_struct **file2) |
| 78 | +{ |
| 79 | + struct file_struct *f1 = *file1; |
| 80 | + struct file_struct *f2 = *file2; |
| 81 | + int64 len1 = F_LENGTH(f1), len2 = F_LENGTH(f2); |
| 82 | + int diff; |
| 83 | + |
| 84 | + if (!f1->basename || !S_ISREG(f1->mode) || !len1) { |
| 85 | + if (!f2->basename || !S_ISREG(f2->mode) || !len2) |
| 86 | + return 0; |
| 87 | + return 1; |
| 88 | + } |
| 89 | + if (!f2->basename || !S_ISREG(f2->mode) || !len2) |
| 90 | + return -1; |
| 91 | + |
| 92 | + /* Don't use diff for values that are longer than an int. */ |
| 93 | + if (len1 != len2) |
| 94 | + return len1 < len2 ? -1 : 1; |
| 95 | + |
| 96 | + if (always_checksum) { |
| 97 | + diff = u_memcmp(F_SUM(f1), F_SUM(f2), checksum_len); |
| 98 | + if (diff) |
| 99 | + return diff; |
| 100 | + } else if (f1->modtime != f2->modtime) |
| 101 | + return f1->modtime < f2->modtime ? -1 : 1; |
| 102 | + |
| 103 | + diff = u_strcmp(f1->basename, f2->basename); |
| 104 | + if (diff) |
| 105 | + return diff; |
| 106 | + |
| 107 | + if (f1->dirname == f2->dirname) |
| 108 | + return 0; |
| 109 | + if (!f1->dirname) |
| 110 | + return -1; |
| 111 | + if (!f2->dirname) |
| 112 | + return 1; |
| 113 | + return u_strcmp(f1->dirname, f2->dirname); |
| 114 | +} |
| 115 | + |
| 116 | static void send_directory(int f, struct file_list *flist, |
| 117 | char *fbuf, int len, int flags); |
| 118 | |
| 119 | @@ -1855,6 +1897,25 @@ struct file_list *send_file_list(int f, |
| 120 | if (verbose > 2) |
| 121 | rprintf(FINFO, "send_file_list done\n"); |
| 122 | |
| 123 | + if (detect_renamed) { |
| 124 | + int j = flist->count; |
| 125 | + the_fattr_list.count = j; |
| 126 | + the_fattr_list.files = new_array(struct file_struct *, j); |
| 127 | + if (!the_fattr_list.files) |
| 128 | + out_of_memory("recv_file_list"); |
| 129 | + memcpy(the_fattr_list.files, flist->files, |
| 130 | + j * sizeof (struct file_struct *)); |
| 131 | + qsort(the_fattr_list.files, j, |
| 132 | + sizeof the_fattr_list.files[0], (int (*)())fattr_compare); |
| 133 | + the_fattr_list.low = 0; |
| 134 | + while (j-- > 0) { |
| 135 | + struct file_struct *fp = the_fattr_list.files[j]; |
| 136 | + if (fp->basename && S_ISREG(fp->mode) && F_LENGTH(fp)) |
| 137 | + break; |
| 138 | + } |
| 139 | + the_fattr_list.high = j; |
| 140 | + } |
| 141 | + |
| 142 | if (inc_recurse) { |
| 143 | #ifdef ICONV_OPTION |
| 144 | if (!need_unsorted_flist) |
| 145 | --- old/generator.c |
| 146 | +++ new/generator.c |
| 147 | @@ -80,6 +80,7 @@ extern char *basis_dir[]; |
| 148 | extern int compare_dest; |
| 149 | extern int copy_dest; |
| 150 | extern int link_dest; |
| 151 | +extern int detect_renamed; |
| 152 | extern int whole_file; |
| 153 | extern int list_only; |
| 154 | extern int new_root_dir; |
| 155 | @@ -97,6 +98,7 @@ extern char *backup_suffix; |
| 156 | extern int backup_suffix_len; |
| 157 | extern struct file_list *cur_flist, *first_flist, *dir_flist; |
| 158 | extern struct filter_list_struct server_filter_list; |
| 159 | +extern struct file_list the_fattr_list; |
| 160 | #ifdef ICONV_OPTION |
| 161 | extern int ic_ndx; |
| 162 | #endif |
| 163 | @@ -107,6 +109,7 @@ int maybe_ATTRS_REPORT = 0; |
| 164 | |
| 165 | static dev_t dev_zero; |
| 166 | static int deletion_count = 0; /* used to implement --max-delete */ |
| 167 | +static int unexplored_dirs = 1; |
| 168 | static int deldelay_size = 0, deldelay_cnt = 0; |
| 169 | static char *deldelay_buf = NULL; |
| 170 | static int deldelay_fd = -1; |
| 171 | @@ -115,7 +118,8 @@ static int dir_tweaking; |
| 172 | static int need_retouch_dir_times; |
| 173 | static const char *solo_file = NULL; |
| 174 | |
| 175 | -/* For calling delete_item() and delete_dir_contents(). */ |
| 176 | +/* For calling delete_item(), delete_dir_contents(), and delete_in_dir(). */ |
| 177 | +#define DEL_NO_DELETIONS (1<<0) |
| 178 | #define DEL_RECURSE (1<<1) /* recurse */ |
| 179 | #define DEL_DIR_IS_EMPTY (1<<2) /* internal delete_FUNCTIONS use only */ |
| 180 | |
| 181 | @@ -137,11 +141,120 @@ static int is_backup_file(char *fn) |
| 182 | return k > 0 && strcmp(fn+k, backup_suffix) == 0; |
| 183 | } |
| 184 | |
| 185 | +/* Search for a regular file that matches either (1) the size & modified |
| 186 | + * time (plus the basename, if possible) or (2) the size & checksum. If |
| 187 | + * we find an exact match down to the dirname, return -1 because we found |
| 188 | + * an up-to-date file in the transfer, not a renamed file. */ |
| 189 | +static int fattr_find(struct file_struct *f, char *fname) |
| 190 | +{ |
| 191 | + int low = the_fattr_list.low, high = the_fattr_list.high; |
| 192 | + int mid, ok_match = -1, good_match = -1; |
| 193 | + struct file_struct *fmid; |
| 194 | + int diff; |
| 195 | + |
| 196 | + while (low <= high) { |
| 197 | + mid = (low + high) / 2; |
| 198 | + fmid = the_fattr_list.files[mid]; |
| 199 | + if (F_LENGTH(fmid) != F_LENGTH(f)) { |
| 200 | + if (F_LENGTH(fmid) < F_LENGTH(f)) |
| 201 | + low = mid + 1; |
| 202 | + else |
| 203 | + high = mid - 1; |
| 204 | + continue; |
| 205 | + } |
| 206 | + if (always_checksum) { |
| 207 | + /* We use the FLAG_FILE_SENT flag to indicate when we |
| 208 | + * have computed the checksum for an entry. */ |
| 209 | + if (!(f->flags & FLAG_FILE_SENT)) { |
| 210 | + if (fmid->modtime == f->modtime |
| 211 | + && f_name_cmp(fmid, f) == 0) |
| 212 | + return -1; /* assume we can't help */ |
| 213 | + file_checksum(fname, (char*)F_SUM(f), F_LENGTH(f)); |
| 214 | + f->flags |= FLAG_FILE_SENT; |
| 215 | + } |
| 216 | + diff = u_memcmp(F_SUM(fmid), F_SUM(f), checksum_len); |
| 217 | + if (diff) { |
| 218 | + if (diff < 0) |
| 219 | + low = mid + 1; |
| 220 | + else |
| 221 | + high = mid - 1; |
| 222 | + continue; |
| 223 | + } |
| 224 | + } else { |
| 225 | + if (fmid->modtime != f->modtime) { |
| 226 | + if (fmid->modtime < f->modtime) |
| 227 | + low = mid + 1; |
| 228 | + else |
| 229 | + high = mid - 1; |
| 230 | + continue; |
| 231 | + } |
| 232 | + } |
| 233 | + ok_match = mid; |
| 234 | + diff = u_strcmp(fmid->basename, f->basename); |
| 235 | + if (diff == 0) { |
| 236 | + good_match = mid; |
| 237 | + if (fmid->dirname == f->dirname) |
| 238 | + return -1; /* file is up-to-date */ |
| 239 | + if (!fmid->dirname) { |
| 240 | + low = mid + 1; |
| 241 | + continue; |
| 242 | + } |
| 243 | + if (!f->dirname) { |
| 244 | + high = mid - 1; |
| 245 | + continue; |
| 246 | + } |
| 247 | + diff = u_strcmp(fmid->dirname, f->dirname); |
| 248 | + if (diff == 0) |
| 249 | + return -1; /* file is up-to-date */ |
| 250 | + } |
| 251 | + if (diff < 0) |
| 252 | + low = mid + 1; |
| 253 | + else |
| 254 | + high = mid - 1; |
| 255 | + } |
| 256 | + |
| 257 | + return good_match >= 0 ? good_match : ok_match; |
| 258 | +} |
| 259 | + |
| 260 | +static void look_for_rename(struct file_struct *file, char *fname) |
| 261 | +{ |
| 262 | + struct file_struct *fp; |
| 263 | + char *partialptr, *fn; |
| 264 | + STRUCT_STAT st; |
| 265 | + int ndx; |
| 266 | + |
| 267 | + if ((ndx = fattr_find(file, fname)) < 0) |
| 268 | + return; |
| 269 | + |
| 270 | + fp = the_fattr_list.files[ndx]; |
| 271 | + fn = f_name(fp, NULL); |
| 272 | + /* We don't provide an alternate-basis file if there is a basis file. */ |
| 273 | + if (link_stat(fn, &st, 0) == 0) |
| 274 | + return; |
| 275 | + if ((partialptr = partial_dir_fname(fn)) == NULL |
| 276 | + || !handle_partial_dir(partialptr, PDIR_CREATE)) |
| 277 | + return; |
| 278 | + |
| 279 | + /* We only use the file if we can hard-link it into our tmp dir. */ |
| 280 | + if (link(fname, partialptr) == 0) { |
| 281 | + if (verbose > 2) { |
| 282 | + rprintf(FINFO, "found renamed: %s => %s\n", |
| 283 | + fname, partialptr); |
| 284 | + } |
| 285 | + return; |
| 286 | + } |
| 287 | + |
| 288 | + if (errno != EEXIST) |
| 289 | + handle_partial_dir(partialptr, PDIR_DELETE); |
| 290 | +} |
| 291 | + |
| 292 | /* Delete a file or directory. If DEL_RECURSE is set in the flags, this will |
| 293 | * delete recursively. |
| 294 | * |
| 295 | * Note that fbuf must point to a MAXPATHLEN buffer if the mode indicates it's |
| 296 | * a directory! (The buffer is used for recursion, but returned unchanged.) |
| 297 | + * |
| 298 | + * Also note: --detect-rename may use this routine with DEL_NO_DELETIONS set! |
| 299 | */ |
| 300 | static enum delret delete_item(char *fbuf, int mode, char *replace, int flags) |
| 301 | { |
| 302 | @@ -163,6 +276,8 @@ static enum delret delete_item(char *fbu |
| 303 | goto check_ret; |
| 304 | /* OK: try to delete the directory. */ |
| 305 | } |
| 306 | + if (flags & DEL_NO_DELETIONS) |
| 307 | + return DR_SUCCESS; |
| 308 | |
| 309 | if (!replace && max_delete >= 0 && ++deletion_count > max_delete) |
| 310 | return DR_AT_LIMIT; |
| 311 | @@ -209,6 +324,8 @@ static enum delret delete_item(char *fbu |
| 312 | * its contents, otherwise just checks for content. Returns DR_SUCCESS or |
| 313 | * DR_NOT_EMPTY. Note that fname must point to a MAXPATHLEN buffer! (The |
| 314 | * buffer is used for recursion, but returned unchanged.) |
| 315 | + * |
| 316 | + * Note: --detect-rename may use this routine with DEL_NO_DELETIONS set! |
| 317 | */ |
| 318 | static enum delret delete_dir_contents(char *fname, int flags) |
| 319 | { |
| 320 | @@ -228,7 +345,9 @@ static enum delret delete_dir_contents(c |
| 321 | save_filters = push_local_filters(fname, dlen); |
| 322 | |
| 323 | non_perishable_cnt = 0; |
| 324 | + file_extra_cnt += SUM_EXTRA_CNT; |
| 325 | dirlist = get_dirlist(fname, dlen, 0); |
| 326 | + file_extra_cnt -= SUM_EXTRA_CNT; |
| 327 | ret = non_perishable_cnt ? DR_NOT_EMPTY : DR_SUCCESS; |
| 328 | |
| 329 | if (!dirlist->count) |
| 330 | @@ -265,6 +384,8 @@ static enum delret delete_dir_contents(c |
| 331 | if (S_ISDIR(fp->mode) |
| 332 | && delete_dir_contents(fname, flags | DEL_RECURSE) != DR_SUCCESS) |
| 333 | ret = DR_NOT_EMPTY; |
| 334 | + if (detect_renamed && S_ISREG(fp->mode)) |
| 335 | + look_for_rename(fp, fname); |
| 336 | if (delete_item(fname, fp->mode, NULL, flags) != DR_SUCCESS) |
| 337 | ret = DR_NOT_EMPTY; |
| 338 | } |
| 339 | @@ -417,12 +538,17 @@ static void do_delayed_deletions(char *d |
| 340 | * all the --delete-WHEN options. Note that the fbuf pointer must point to a |
| 341 | * MAXPATHLEN buffer with the name of the directory in it (the functions we |
| 342 | * call will append names onto the end, but the old dir value will be restored |
| 343 | - * on exit). */ |
| 344 | -static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev) |
| 345 | + * on exit). |
| 346 | + * |
| 347 | + * Note: --detect-rename may use this routine with DEL_NO_DELETIONS set! |
| 348 | + */ |
| 349 | +static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev, |
| 350 | + int flags) |
| 351 | { |
| 352 | static int already_warned = 0; |
| 353 | struct file_list *dirlist; |
| 354 | - char delbuf[MAXPATHLEN]; |
| 355 | + char *p, delbuf[MAXPATHLEN]; |
| 356 | + unsigned remainder; |
| 357 | int dlen, i; |
| 358 | |
| 359 | if (!fbuf) { |
| 360 | @@ -433,21 +559,28 @@ static void delete_in_dir(char *fbuf, st |
| 361 | if (verbose > 2) |
| 362 | rprintf(FINFO, "delete_in_dir(%s)\n", fbuf); |
| 363 | |
| 364 | + flags |= DEL_RECURSE; |
| 365 | + |
| 366 | if (allowed_lull) |
| 367 | maybe_send_keepalive(); |
| 368 | |
| 369 | if (io_error && !ignore_errors) { |
| 370 | - if (already_warned) |
| 371 | + if (!already_warned) { |
| 372 | + rprintf(FINFO, |
| 373 | + "IO error encountered -- skipping file deletion\n"); |
| 374 | + already_warned = 1; |
| 375 | + } |
| 376 | + if (!detect_renamed) |
| 377 | return; |
| 378 | - rprintf(FINFO, |
| 379 | - "IO error encountered -- skipping file deletion\n"); |
| 380 | - already_warned = 1; |
| 381 | - return; |
| 382 | + flags |= DEL_NO_DELETIONS; |
| 383 | } |
| 384 | |
| 385 | dlen = strlen(fbuf); |
| 386 | change_local_filter_dir(fbuf, dlen, F_DEPTH(file)); |
| 387 | |
| 388 | + if (detect_renamed) |
| 389 | + unexplored_dirs--; |
| 390 | + |
| 391 | if (one_file_system) { |
| 392 | if (file->flags & FLAG_TOP_DIR) |
| 393 | filesystem_dev = *fs_dev; |
| 394 | @@ -457,6 +590,11 @@ static void delete_in_dir(char *fbuf, st |
| 395 | |
| 396 | dirlist = get_dirlist(fbuf, dlen, 0); |
| 397 | |
| 398 | + p = fbuf + dlen; |
| 399 | + if (dlen != 1 || *fbuf != '/') |
| 400 | + *p++ = '/'; |
| 401 | + remainder = MAXPATHLEN - (p - fbuf); |
| 402 | + |
| 403 | /* If an item in dirlist is not found in flist, delete it |
| 404 | * from the filesystem. */ |
| 405 | for (i = dirlist->count; i--; ) { |
| 406 | @@ -469,16 +607,23 @@ static void delete_in_dir(char *fbuf, st |
| 407 | f_name(fp, NULL)); |
| 408 | continue; |
| 409 | } |
| 410 | + if (detect_renamed && S_ISREG(fp->mode)) { |
| 411 | + strlcpy(p, fp->basename, remainder); |
| 412 | + look_for_rename(fp, fbuf); |
| 413 | + } |
| 414 | if (flist_find(cur_flist, fp) < 0) { |
| 415 | f_name(fp, delbuf); |
| 416 | - if (delete_during == 2) { |
| 417 | + if (delete_during == 2 && !(flags & DEL_NO_DELETIONS)) { |
| 418 | if (!remember_delete(fp, delbuf)) |
| 419 | break; |
| 420 | } else |
| 421 | - delete_item(delbuf, fp->mode, NULL, DEL_RECURSE); |
| 422 | - } |
| 423 | + delete_item(delbuf, fp->mode, NULL, flags); |
| 424 | + } else if (detect_renamed && S_ISDIR(fp->mode)) |
| 425 | + unexplored_dirs++; |
| 426 | } |
| 427 | |
| 428 | + fbuf[dlen] = '\0'; |
| 429 | + |
| 430 | flist_free(dirlist); |
| 431 | } |
| 432 | |
| 433 | @@ -508,9 +653,9 @@ static void do_delete_pass(void) |
| 434 | || !S_ISDIR(st.st_mode)) |
| 435 | continue; |
| 436 | |
| 437 | - delete_in_dir(fbuf, file, &st.st_dev); |
| 438 | + delete_in_dir(fbuf, file, &st.st_dev, 0); |
| 439 | } |
| 440 | - delete_in_dir(NULL, NULL, &dev_zero); |
| 441 | + delete_in_dir(NULL, NULL, &dev_zero, 0); |
| 442 | |
| 443 | if (do_progress && !am_server) |
| 444 | rprintf(FINFO, " \r"); |
| 445 | @@ -1073,6 +1218,7 @@ static int try_dests_non(struct file_str |
| 446 | return j; |
| 447 | } |
| 448 | |
| 449 | +static struct bitbag *delayed_bits = NULL; |
| 450 | static int phase = 0; |
| 451 | static int dflt_perms; |
| 452 | |
| 453 | @@ -1272,8 +1418,12 @@ static void recv_generator(char *fname, |
| 454 | } |
| 455 | } |
| 456 | else if (delete_during && f_out != -1 && !phase && dry_run < 2 |
| 457 | - && (file->flags & FLAG_XFER_DIR)) |
| 458 | - delete_in_dir(fname, file, &real_sx.st.st_dev); |
| 459 | + && (file->flags & FLAG_XFER_DIR)) { |
| 460 | + if (detect_renamed && real_ret != 0) |
| 461 | + unexplored_dirs++; |
| 462 | + delete_in_dir(fname, file, &real_sx.st.st_dev, |
| 463 | + delete_during < 0 ? DEL_NO_DELETIONS : 0); |
| 464 | + } |
| 465 | goto cleanup; |
| 466 | } |
| 467 | |
| 468 | @@ -1545,8 +1695,14 @@ static void recv_generator(char *fname, |
| 469 | if (preserve_hard_links && F_HLINK_NOT_LAST(file)) |
| 470 | goto cleanup; |
| 471 | #endif |
| 472 | - if (stat_errno == ENOENT) |
| 473 | + if (stat_errno == ENOENT) { |
| 474 | + if (detect_renamed && unexplored_dirs > 0 |
| 475 | + && F_LENGTH(file)) { |
| 476 | + bitbag_set_bit(delayed_bits, ndx); |
| 477 | + return; |
| 478 | + } |
| 479 | goto notify_others; |
| 480 | + } |
| 481 | rsyserr(FERROR, stat_errno, "recv_generator: failed to stat %s", |
| 482 | full_fname(fname)); |
| 483 | goto cleanup; |
| 484 | @@ -1861,6 +2017,12 @@ void generate_files(int f_out, const cha |
| 485 | if (verbose > 2) |
| 486 | rprintf(FINFO, "generator starting pid=%ld\n", (long)getpid()); |
| 487 | |
| 488 | + if (detect_renamed) { |
| 489 | + delayed_bits = bitbag_create(cur_flist->count); |
| 490 | + if (!delete_before && !delete_during) |
| 491 | + delete_during = -1; |
| 492 | + } |
| 493 | + |
| 494 | if (delete_before && !solo_file && cur_flist->count > 0) |
| 495 | do_delete_pass(); |
| 496 | if (delete_during == 2) { |
| 497 | @@ -1871,7 +2033,7 @@ void generate_files(int f_out, const cha |
| 498 | } |
| 499 | do_progress = 0; |
| 500 | |
| 501 | - if (append_mode > 0 || whole_file < 0) |
| 502 | + if (append_mode > 0 || detect_renamed || whole_file < 0) |
| 503 | whole_file = 0; |
| 504 | if (verbose >= 2) { |
| 505 | rprintf(FINFO, "delta-transmission %s\n", |
| 506 | @@ -1898,7 +2060,7 @@ void generate_files(int f_out, const cha |
| 507 | dirdev = MAKEDEV(DEV_MAJOR(devp), DEV_MINOR(devp)); |
| 508 | } else |
| 509 | dirdev = MAKEDEV(0, 0); |
| 510 | - delete_in_dir(f_name(fp, fbuf), fp, &dirdev); |
| 511 | + delete_in_dir(f_name(fp, fbuf), fp, &dirdev, 0); |
| 512 | } |
| 513 | } |
| 514 | for (i = cur_flist->low; i <= cur_flist->high; i++) { |
| 515 | @@ -1960,7 +2122,21 @@ void generate_files(int f_out, const cha |
| 516 | } while ((cur_flist = cur_flist->next) != NULL); |
| 517 | |
| 518 | if (delete_during) |
| 519 | - delete_in_dir(NULL, NULL, &dev_zero); |
| 520 | + delete_in_dir(NULL, NULL, &dev_zero, 0); |
| 521 | + if (detect_renamed) { |
| 522 | + if (delete_during < 0) |
| 523 | + delete_during = 0; |
| 524 | + detect_renamed = 0; |
| 525 | + |
| 526 | + for (i = -1; (i = bitbag_next_bit(delayed_bits, i)) >= 0; ) { |
| 527 | + struct file_struct *file = cur_flist->files[i]; |
| 528 | + if (local_name) |
| 529 | + strlcpy(fbuf, local_name, sizeof fbuf); |
| 530 | + else |
| 531 | + f_name(file, fbuf); |
| 532 | + recv_generator(fbuf, file, i, itemizing, code, f_out); |
| 533 | + } |
| 534 | + } |
| 535 | phase++; |
| 536 | if (verbose > 2) |
| 537 | rprintf(FINFO, "generate_files phase=%d\n", phase); |
| 538 | --- old/options.c |
| 539 | +++ new/options.c |
| 540 | @@ -79,6 +79,7 @@ int am_generator = 0; |
| 541 | int am_starting_up = 1; |
| 542 | int relative_paths = -1; |
| 543 | int implied_dirs = 1; |
| 544 | +int detect_renamed = 0; |
| 545 | int numeric_ids = 0; |
| 546 | int allow_8bit_chars = 0; |
| 547 | int force_delete = 0; |
| 548 | @@ -373,6 +374,7 @@ void usage(enum logcode F) |
| 549 | rprintf(F," --modify-window=NUM compare mod-times with reduced accuracy\n"); |
| 550 | rprintf(F," -T, --temp-dir=DIR create temporary files in directory DIR\n"); |
| 551 | rprintf(F," -y, --fuzzy find similar file for basis if no dest file\n"); |
| 552 | + rprintf(F," --detect-renamed try to find renamed files to speed up the transfer\n"); |
| 553 | rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n"); |
| 554 | rprintf(F," --copy-dest=DIR ... and include copies of unchanged files\n"); |
| 555 | rprintf(F," --link-dest=DIR hardlink to files in DIR when unchanged\n"); |
| 556 | @@ -541,6 +543,7 @@ static struct poptOption long_options[] |
| 557 | {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 }, |
| 558 | {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 }, |
| 559 | {"link-dest", 0, POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 }, |
| 560 | + {"detect-renamed", 0, POPT_ARG_NONE, &detect_renamed, 0, 0, 0 }, |
| 561 | {"fuzzy", 'y', POPT_ARG_NONE, &fuzzy_basis, 0, 0, 0 }, |
| 562 | {"compress", 'z', POPT_ARG_NONE, 0, 'z', 0, 0 }, |
| 563 | {"compress-level", 0, POPT_ARG_INT, &def_compress_level, 'z', 0, 0 }, |
| 564 | @@ -1472,7 +1475,7 @@ int parse_arguments(int *argc, const cha |
| 565 | inplace = 1; |
| 566 | } |
| 567 | |
| 568 | - if (delay_updates && !partial_dir) |
| 569 | + if ((delay_updates || detect_renamed) && !partial_dir) |
| 570 | partial_dir = tmp_partialdir; |
| 571 | |
| 572 | if (inplace) { |
| 573 | @@ -1481,6 +1484,7 @@ int parse_arguments(int *argc, const cha |
| 574 | snprintf(err_buf, sizeof err_buf, |
| 575 | "--%s cannot be used with --%s\n", |
| 576 | append_mode ? "append" : "inplace", |
| 577 | + detect_renamed ? "detect-renamed" : |
| 578 | delay_updates ? "delay-updates" : "partial-dir"); |
| 579 | return 0; |
| 580 | } |
| 581 | @@ -1820,6 +1824,8 @@ void server_options(char **args,int *arg |
| 582 | args[ac++] = "--super"; |
| 583 | if (size_only) |
| 584 | args[ac++] = "--size-only"; |
| 585 | + if (detect_renamed) |
| 586 | + args[ac++] = "--detect-renamed"; |
| 587 | } |
| 588 | |
| 589 | if (modify_window_set) { |
| 590 | --- old/rsync.yo |
| 591 | +++ new/rsync.yo |
| 592 | @@ -367,6 +367,7 @@ to the detailed description below for a |
| 593 | --modify-window=NUM compare mod-times with reduced accuracy |
| 594 | -T, --temp-dir=DIR create temporary files in directory DIR |
| 595 | -y, --fuzzy find similar file for basis if no dest file |
| 596 | + --detect-renamed try to find renamed files to speed the xfer |
| 597 | --compare-dest=DIR also compare received files relative to DIR |
| 598 | --copy-dest=DIR ... and include copies of unchanged files |
| 599 | --link-dest=DIR hardlink to files in DIR when unchanged |
| 600 | @@ -1347,6 +1348,15 @@ Note that the use of the bf(--delete) op |
| 601 | fuzzy-match files, so either use bf(--delete-after) or specify some |
| 602 | filename exclusions if you need to prevent this. |
| 603 | |
| 604 | +dit(bf(--detect-renamed)) This option tells rsync to scan the receiving |
| 605 | +side for files that have been renamed, and to use any that are found as |
| 606 | +alternate basis files to help speed up the transfer. |
| 607 | +By default, alternate-basis files are hard-linked into a directory named |
| 608 | +".~tmp~" in each file's destination directory, but if you've specified |
| 609 | +the bf(--partial-dir) option, that directory will be used instead. These |
| 610 | +potential alternate-basis files will be removed as the transfer progresses. |
| 611 | +This option conflicts with bf(--inplace) and bf(--append). |
| 612 | + |
| 613 | dit(bf(--compare-dest=DIR)) This option instructs rsync to use em(DIR) on |
| 614 | the destination machine as an additional hierarchy to compare destination |
| 615 | files against doing transfers (if the files are missing in the destination |
| 616 | --- old/util.c |
| 617 | +++ new/util.c |
| 618 | @@ -1026,6 +1026,32 @@ int handle_partial_dir(const char *fname |
| 619 | return 1; |
| 620 | } |
| 621 | |
| 622 | +/* We need to supply our own strcmp function for file list comparisons |
| 623 | + * to ensure that signed/unsigned usage is consistent between machines. */ |
| 624 | +int u_strcmp(const char *p1, const char *p2) |
| 625 | +{ |
| 626 | + for ( ; *p1; p1++, p2++) { |
| 627 | + if (*p1 != *p2) |
| 628 | + break; |
| 629 | + } |
| 630 | + |
| 631 | + return (int)*(uchar*)p1 - (int)*(uchar*)p2; |
| 632 | +} |
| 633 | + |
| 634 | +/* We need a memcmp function compares unsigned-byte values. */ |
| 635 | +int u_memcmp(const void *p1, const void *p2, size_t len) |
| 636 | +{ |
| 637 | + const uchar *u1 = p1; |
| 638 | + const uchar *u2 = p2; |
| 639 | + |
| 640 | + while (len--) { |
| 641 | + if (*u1 != *u2) |
| 642 | + return (int)*u1 - (int)*u2; |
| 643 | + } |
| 644 | + |
| 645 | + return 0; |
| 646 | +} |
| 647 | + |
| 648 | /** |
| 649 | * Determine if a symlink points outside the current directory tree. |
| 650 | * This is considered "unsafe" because e.g. when mirroring somebody |