Commit | Line | Data |
---|---|---|
1fffd582 WD |
1 | This patch adds the --detect-renamed option which makes rsync notice files |
2 | that either (1) match in size & modify-time (plus the basename, if possible) | |
3 | or (2) match in size & checksum (when --checksum was also specified) and use | |
4 | each match as an alternate basis file to speed up the transfer. | |
5 | ||
6 | The algorithm attempts to scan the receiving-side's files in an efficient | |
7 | manner. If --delete[-before] is enabled, we'll take advantage of the | |
8 | pre-transfer delete pass to prepare any alternate-basis-file matches we | |
9 | might find. If --delete-before is not enabled, rsync does the rename scan | |
10 | during the regular file-sending scan (scanning each directory right before | |
11 | the generator starts updating files from that dir). In this latter mode, | |
12 | rsync might delay the updating of a file (if no alternate-basis match was | |
13 | yet found) until the full scan of the receiving side is complete, at which | |
14 | point any delayed files are processed. | |
15 | ||
16 | I chose to hard-link the alternate-basis files into a ".~tmp~" subdir that | |
17 | takes advantage of rsync's pre-existing partial-dir logic. This uses less | |
18 | memory than trying to keep track of the matches internally, and also allows | |
19 | any deletions or file-updates to occur normally without interfering with | |
20 | these alternate-basis discoveries. | |
21 | ||
03019e41 | 22 | To use this patch, run these commands for a successful build: |
1fffd582 | 23 | |
03019e41 WD |
24 | patch -p1 <patches/detect-renamed.diff |
25 | ./configure (optional if already run) | |
1fffd582 WD |
26 | make |
27 | ||
28 | TODO: | |
29 | ||
30 | We need to never return a match from fattr_find() that has a basis | |
31 | file. This will ensure that we don't try to give a renamed file to | |
32 | a file that can't use it, while missing out on giving it to a file | |
33 | that could use it. | |
34 | ||
c1ff70aa | 35 | based-on: a01e3b490eb36ccf9e704840e1b6683dab867550 |
cc3e685d WD |
36 | diff --git a/compat.c b/compat.c |
37 | --- a/compat.c | |
38 | +++ b/compat.c | |
c1ff70aa | 39 | @@ -41,6 +41,7 @@ extern int checksum_seed; |
cdcd2137 WD |
40 | extern int basis_dir_cnt; |
41 | extern int prune_empty_dirs; | |
42 | extern int protocol_version; | |
9bcaf4de | 43 | +extern int detect_renamed; |
cdcd2137 WD |
44 | extern int protect_args; |
45 | extern int preserve_uid; | |
46 | extern int preserve_gid; | |
c1ff70aa | 47 | @@ -121,6 +122,7 @@ void set_allow_inc_recurse(void) |
9aab301c WD |
48 | allow_inc_recurse = 0; |
49 | else if (!am_sender | |
50 | && (delete_before || delete_after | |
51 | + || detect_renamed | |
52 | || delay_updates || prune_empty_dirs)) | |
51bc0e89 WD |
53 | allow_inc_recurse = 0; |
54 | else if (am_server && !local_server | |
fc557362 | 55 | diff --git a/delete.c b/delete.c |
fc557362 WD |
56 | --- a/delete.c |
57 | +++ b/delete.c | |
58 | @@ -25,6 +25,7 @@ | |
59 | extern int am_root; | |
60 | extern int make_backups; | |
61 | extern int max_delete; | |
62 | +extern int detect_renamed; | |
63 | extern char *backup_dir; | |
64 | extern char *backup_suffix; | |
65 | extern int backup_suffix_len; | |
66 | @@ -45,6 +46,8 @@ static inline int is_backup_file(char *fn) | |
67 | * its contents, otherwise just checks for content. Returns DR_SUCCESS or | |
68 | * DR_NOT_EMPTY. Note that fname must point to a MAXPATHLEN buffer! (The | |
69 | * buffer is used for recursion, but returned unchanged.) | |
70 | + * | |
71 | + * Note: --detect-rename may use this routine with DEL_NO_DELETIONS set! | |
72 | */ | |
73 | static enum delret delete_dir_contents(char *fname, uint16 flags) | |
74 | { | |
75 | @@ -64,7 +67,9 @@ static enum delret delete_dir_contents(char *fname, uint16 flags) | |
76 | save_filters = push_local_filters(fname, dlen); | |
77 | ||
78 | non_perishable_cnt = 0; | |
79 | + file_extra_cnt += SUM_EXTRA_CNT; | |
80 | dirlist = get_dirlist(fname, dlen, 0); | |
81 | + file_extra_cnt -= SUM_EXTRA_CNT; | |
82 | ret = non_perishable_cnt ? DR_NOT_EMPTY : DR_SUCCESS; | |
83 | ||
84 | if (!dirlist->used) | |
85 | @@ -104,7 +109,8 @@ static enum delret delete_dir_contents(char *fname, uint16 flags) | |
86 | if (S_ISDIR(fp->mode)) { | |
87 | if (delete_dir_contents(fname, flags | DEL_RECURSE) != DR_SUCCESS) | |
88 | ret = DR_NOT_EMPTY; | |
89 | - } | |
90 | + } else if (detect_renamed && S_ISREG(fp->mode)) | |
91 | + look_for_rename(fp, fname); | |
92 | if (delete_item(fname, fp->mode, flags) != DR_SUCCESS) | |
93 | ret = DR_NOT_EMPTY; | |
94 | } | |
95 | @@ -127,6 +133,8 @@ static enum delret delete_dir_contents(char *fname, uint16 flags) | |
96 | * | |
97 | * Note that fbuf must point to a MAXPATHLEN buffer if the mode indicates it's | |
98 | * a directory! (The buffer is used for recursion, but returned unchanged.) | |
99 | + * | |
100 | + * Also note: --detect-rename may use this routine with DEL_NO_DELETIONS set! | |
101 | */ | |
102 | enum delret delete_item(char *fbuf, uint16 mode, uint16 flags) | |
103 | { | |
cc3e685d WD |
104 | diff --git a/flist.c b/flist.c |
105 | --- a/flist.c | |
106 | +++ b/flist.c | |
c1ff70aa | 107 | @@ -63,6 +63,7 @@ extern int non_perishable_cnt; |
1fffd582 WD |
108 | extern int prune_empty_dirs; |
109 | extern int copy_links; | |
110 | extern int copy_unsafe_links; | |
111 | +extern int detect_renamed; | |
112 | extern int protocol_version; | |
113 | extern int sanitize_paths; | |
cc3e685d | 114 | extern int munge_symlinks; |
c1ff70aa | 115 | @@ -134,6 +135,8 @@ static int64 tmp_dev, tmp_ino; |
7b80cd0e | 116 | #endif |
87a38eea | 117 | static char tmp_sum[MAX_DIGEST_LEN]; |
1fffd582 WD |
118 | |
119 | +struct file_list the_fattr_list; | |
120 | + | |
87a38eea | 121 | static char empty_sum[MAX_DIGEST_LEN]; |
a47d1f86 | 122 | static int flist_count_offset; /* for --delete --progress */ |
fc557362 | 123 | |
c1ff70aa | 124 | @@ -301,6 +304,45 @@ static int is_excluded(const char *fname, int is_dir, int filter_level) |
73adde61 | 125 | return 0; |
1fffd582 WD |
126 | } |
127 | ||
128 | +static int fattr_compare(struct file_struct **file1, struct file_struct **file2) | |
129 | +{ | |
130 | + struct file_struct *f1 = *file1; | |
131 | + struct file_struct *f2 = *file2; | |
a47d1f86 | 132 | + int64 len1 = F_LENGTH(f1), len2 = F_LENGTH(f2); |
1fffd582 WD |
133 | + int diff; |
134 | + | |
a47d1f86 WD |
135 | + if (!f1->basename || !S_ISREG(f1->mode) || !len1) { |
136 | + if (!f2->basename || !S_ISREG(f2->mode) || !len2) | |
1fffd582 WD |
137 | + return 0; |
138 | + return 1; | |
139 | + } | |
a47d1f86 | 140 | + if (!f2->basename || !S_ISREG(f2->mode) || !len2) |
1fffd582 WD |
141 | + return -1; |
142 | + | |
143 | + /* Don't use diff for values that are longer than an int. */ | |
a47d1f86 WD |
144 | + if (len1 != len2) |
145 | + return len1 < len2 ? -1 : 1; | |
1fffd582 WD |
146 | + |
147 | + if (always_checksum) { | |
70891d26 | 148 | + diff = u_memcmp(F_SUM(f1), F_SUM(f2), checksum_len); |
1fffd582 WD |
149 | + if (diff) |
150 | + return diff; | |
151 | + } else if (f1->modtime != f2->modtime) | |
152 | + return f1->modtime < f2->modtime ? -1 : 1; | |
153 | + | |
154 | + diff = u_strcmp(f1->basename, f2->basename); | |
155 | + if (diff) | |
156 | + return diff; | |
157 | + | |
158 | + if (f1->dirname == f2->dirname) | |
159 | + return 0; | |
160 | + if (!f1->dirname) | |
161 | + return -1; | |
162 | + if (!f2->dirname) | |
163 | + return 1; | |
164 | + return u_strcmp(f1->dirname, f2->dirname); | |
165 | +} | |
166 | + | |
73adde61 | 167 | static void send_directory(int f, struct file_list *flist, |
fc068916 | 168 | char *fbuf, int len, int flags); |
1fffd582 | 169 | |
c1ff70aa | 170 | @@ -2548,6 +2590,25 @@ struct file_list *recv_file_list(int f) |
6fa0767f | 171 | |
f2863bc0 | 172 | flist_sort_and_clean(flist, relative_paths); |
1fffd582 WD |
173 | |
174 | + if (detect_renamed) { | |
7bfcb297 WD |
175 | + int j = flist->used; |
176 | + the_fattr_list.used = j; | |
1fffd582 WD |
177 | + the_fattr_list.files = new_array(struct file_struct *, j); |
178 | + if (!the_fattr_list.files) | |
70891d26 | 179 | + out_of_memory("recv_file_list"); |
1fffd582 WD |
180 | + memcpy(the_fattr_list.files, flist->files, |
181 | + j * sizeof (struct file_struct *)); | |
182 | + qsort(the_fattr_list.files, j, | |
fc068916 | 183 | + sizeof the_fattr_list.files[0], (int (*)())fattr_compare); |
1fffd582 WD |
184 | + the_fattr_list.low = 0; |
185 | + while (j-- > 0) { | |
186 | + struct file_struct *fp = the_fattr_list.files[j]; | |
a47d1f86 | 187 | + if (fp->basename && S_ISREG(fp->mode) && F_LENGTH(fp)) |
1fffd582 WD |
188 | + break; |
189 | + } | |
190 | + the_fattr_list.high = j; | |
191 | + } | |
192 | + | |
6fa0767f WD |
193 | if (protocol_version < 30) { |
194 | /* Recv the io_error flag */ | |
fc557362 | 195 | int err = read_int(f); |
cc3e685d WD |
196 | diff --git a/generator.c b/generator.c |
197 | --- a/generator.c | |
198 | +++ b/generator.c | |
c1ff70aa | 199 | @@ -80,6 +80,7 @@ extern char *partial_dir; |
1fffd582 WD |
200 | extern int compare_dest; |
201 | extern int copy_dest; | |
202 | extern int link_dest; | |
203 | +extern int detect_renamed; | |
204 | extern int whole_file; | |
205 | extern int list_only; | |
ccdb48f6 | 206 | extern int read_batch; |
c1ff70aa | 207 | @@ -97,10 +98,12 @@ extern char *tmpdir; |
fc557362 | 208 | extern char *basis_dir[MAX_BASIS_DIRS+1]; |
fc068916 | 209 | extern struct file_list *cur_flist, *first_flist, *dir_flist; |
7170ca8d | 210 | extern filter_rule_list filter_list, daemon_filter_list; |
fc068916 | 211 | +extern struct file_list the_fattr_list; |
d4dd2dd5 | 212 | |
fc557362 | 213 | int maybe_ATTRS_REPORT = 0; |
d16b5fd6 | 214 | |
fc068916 | 215 | static dev_t dev_zero; |
1fffd582 | 216 | +static int unexplored_dirs = 1; |
1071853f WD |
217 | static int deldelay_size = 0, deldelay_cnt = 0; |
218 | static char *deldelay_buf = NULL; | |
219 | static int deldelay_fd = -1; | |
c1ff70aa | 220 | @@ -180,6 +183,8 @@ static int remember_delete(struct file_struct *file, const char *fname, int flag |
fc557362 WD |
221 | if (!flush_delete_delay()) |
222 | return 0; | |
223 | } | |
224 | + if (flags & DEL_NO_DELETIONS) | |
225 | + return DR_SUCCESS; | |
1fffd582 | 226 | |
fc557362 WD |
227 | return 1; |
228 | } | |
c1ff70aa | 229 | @@ -271,13 +276,18 @@ static void do_delayed_deletions(char *delbuf) |
fc557362 WD |
230 | * all the --delete-WHEN options. Note that the fbuf pointer must point to a |
231 | * MAXPATHLEN buffer with the name of the directory in it (the functions we | |
232 | * call will append names onto the end, but the old dir value will be restored | |
233 | - * on exit). */ | |
234 | -static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev) | |
235 | + * on exit). | |
236 | + * | |
237 | + * Note: --detect-rename may use this routine with DEL_NO_DELETIONS set! | |
238 | + */ | |
239 | +static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev, | |
240 | + int del_flags) | |
241 | { | |
242 | static int already_warned = 0; | |
243 | struct file_list *dirlist; | |
244 | - char delbuf[MAXPATHLEN]; | |
245 | - int dlen, i; | |
246 | + char *p, delbuf[MAXPATHLEN]; | |
247 | + unsigned remainder; | |
248 | + int dlen, i, restore_dot = 0; | |
249 | int save_uid_ndx = uid_ndx; | |
1fffd582 | 250 | |
fc557362 | 251 | if (!fbuf) { |
c1ff70aa WD |
252 | @@ -292,17 +302,22 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev) |
253 | maybe_send_keepalive(time(NULL), True); | |
a5e6228a | 254 | |
fc557362 WD |
255 | if (io_error && !ignore_errors) { |
256 | - if (already_warned) | |
257 | + if (!already_warned) { | |
258 | + rprintf(FINFO, | |
259 | + "IO error encountered -- skipping file deletion\n"); | |
260 | + already_warned = 1; | |
261 | + } | |
262 | + if (!detect_renamed) | |
263 | return; | |
264 | - rprintf(FINFO, | |
265 | - "IO error encountered -- skipping file deletion\n"); | |
266 | - already_warned = 1; | |
267 | - return; | |
268 | + del_flags |= DEL_NO_DELETIONS; | |
269 | } | |
270 | ||
271 | dlen = strlen(fbuf); | |
272 | change_local_filter_dir(fbuf, dlen, F_DEPTH(file)); | |
273 | ||
274 | + if (detect_renamed) | |
275 | + unexplored_dirs--; | |
276 | + | |
277 | if (one_file_system) { | |
278 | if (file->flags & FLAG_TOP_DIR) | |
279 | filesystem_dev = *fs_dev; | |
c1ff70aa | 280 | @@ -315,6 +330,14 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev) |
fc557362 WD |
281 | |
282 | dirlist = get_dirlist(fbuf, dlen, 0); | |
283 | ||
284 | + p = fbuf + dlen; | |
285 | + if (dlen == 1 && *fbuf == '.') { | |
286 | + restore_dot = 1; | |
287 | + p = fbuf; | |
288 | + } else if (dlen != 1 || *fbuf != '/') | |
289 | + *p++ = '/'; | |
290 | + remainder = MAXPATHLEN - (p - fbuf); | |
291 | + | |
292 | /* If an item in dirlist is not found in flist, delete it | |
293 | * from the filesystem. */ | |
294 | for (i = dirlist->used; i--; ) { | |
c1ff70aa | 295 | @@ -327,6 +350,10 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev) |
fc557362 WD |
296 | f_name(fp, NULL)); |
297 | continue; | |
298 | } | |
299 | + if (detect_renamed && S_ISREG(fp->mode)) { | |
300 | + strlcpy(p, fp->basename, remainder); | |
301 | + look_for_rename(fp, fbuf); | |
302 | + } | |
303 | /* Here we want to match regardless of file type. Replacement | |
304 | * of a file with one of another type is handled separately by | |
305 | * a delete_item call with a DEL_MAKE_ROOM flag. */ | |
c1ff70aa | 306 | @@ -335,14 +362,19 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev) |
fc557362 WD |
307 | if (!(fp->mode & S_IWUSR) && !am_root && (uid_t)F_OWNER(fp) == our_uid) |
308 | flags |= DEL_NO_UID_WRITE; | |
309 | f_name(fp, delbuf); | |
310 | - if (delete_during == 2) { | |
311 | - if (!remember_delete(fp, delbuf, flags)) | |
312 | + if (delete_during == 2 && !(del_flags & DEL_NO_DELETIONS)) { | |
313 | + if (!remember_delete(fp, delbuf, del_flags | flags)) | |
314 | break; | |
315 | } else | |
316 | - delete_item(delbuf, fp->mode, flags); | |
317 | - } | |
318 | + delete_item(delbuf, fp->mode, del_flags | flags); | |
319 | + } else if (detect_renamed && S_ISDIR(fp->mode)) | |
320 | + unexplored_dirs++; | |
321 | } | |
322 | ||
323 | + if (restore_dot) | |
324 | + fbuf[0] = '.'; | |
325 | + fbuf[dlen] = '\0'; | |
326 | + | |
327 | flist_free(dirlist); | |
328 | ||
329 | if (!save_uid_ndx) { | |
c1ff70aa | 330 | @@ -380,14 +412,122 @@ static void do_delete_pass(void) |
fc557362 WD |
331 | || !S_ISDIR(st.st_mode)) |
332 | continue; | |
333 | ||
334 | - delete_in_dir(fbuf, file, &st.st_dev); | |
335 | + delete_in_dir(fbuf, file, &st.st_dev, 0); | |
336 | } | |
337 | - delete_in_dir(NULL, NULL, &dev_zero); | |
338 | + delete_in_dir(NULL, NULL, &dev_zero, 0); | |
339 | ||
340 | if (INFO_GTE(FLIST, 2) && !am_server) | |
341 | rprintf(FINFO, " \r"); | |
1fffd582 WD |
342 | } |
343 | ||
344 | +/* Search for a regular file that matches either (1) the size & modified | |
345 | + * time (plus the basename, if possible) or (2) the size & checksum. If | |
346 | + * we find an exact match down to the dirname, return -1 because we found | |
347 | + * an up-to-date file in the transfer, not a renamed file. */ | |
a47d1f86 | 348 | +static int fattr_find(struct file_struct *f, char *fname) |
1fffd582 WD |
349 | +{ |
350 | + int low = the_fattr_list.low, high = the_fattr_list.high; | |
351 | + int mid, ok_match = -1, good_match = -1; | |
352 | + struct file_struct *fmid; | |
353 | + int diff; | |
354 | + | |
355 | + while (low <= high) { | |
356 | + mid = (low + high) / 2; | |
357 | + fmid = the_fattr_list.files[mid]; | |
a47d1f86 WD |
358 | + if (F_LENGTH(fmid) != F_LENGTH(f)) { |
359 | + if (F_LENGTH(fmid) < F_LENGTH(f)) | |
1fffd582 WD |
360 | + low = mid + 1; |
361 | + else | |
362 | + high = mid - 1; | |
363 | + continue; | |
364 | + } | |
365 | + if (always_checksum) { | |
9bcaf4de | 366 | + /* We use the FLAG_FILE_SENT flag to indicate when we |
a47d1f86 | 367 | + * have computed the checksum for an entry. */ |
9bcaf4de | 368 | + if (!(f->flags & FLAG_FILE_SENT)) { |
1fffd582 WD |
369 | + if (fmid->modtime == f->modtime |
370 | + && f_name_cmp(fmid, f) == 0) | |
371 | + return -1; /* assume we can't help */ | |
d7d6347c | 372 | + file_checksum(fname, F_SUM(f), F_LENGTH(f)); |
9bcaf4de | 373 | + f->flags |= FLAG_FILE_SENT; |
1fffd582 | 374 | + } |
70891d26 | 375 | + diff = u_memcmp(F_SUM(fmid), F_SUM(f), checksum_len); |
1fffd582 WD |
376 | + if (diff) { |
377 | + if (diff < 0) | |
378 | + low = mid + 1; | |
379 | + else | |
380 | + high = mid - 1; | |
381 | + continue; | |
382 | + } | |
383 | + } else { | |
384 | + if (fmid->modtime != f->modtime) { | |
385 | + if (fmid->modtime < f->modtime) | |
386 | + low = mid + 1; | |
387 | + else | |
388 | + high = mid - 1; | |
389 | + continue; | |
390 | + } | |
391 | + } | |
392 | + ok_match = mid; | |
393 | + diff = u_strcmp(fmid->basename, f->basename); | |
394 | + if (diff == 0) { | |
395 | + good_match = mid; | |
396 | + if (fmid->dirname == f->dirname) | |
397 | + return -1; /* file is up-to-date */ | |
398 | + if (!fmid->dirname) { | |
399 | + low = mid + 1; | |
400 | + continue; | |
401 | + } | |
402 | + if (!f->dirname) { | |
403 | + high = mid - 1; | |
404 | + continue; | |
405 | + } | |
406 | + diff = u_strcmp(fmid->dirname, f->dirname); | |
407 | + if (diff == 0) | |
408 | + return -1; /* file is up-to-date */ | |
409 | + } | |
410 | + if (diff < 0) | |
411 | + low = mid + 1; | |
412 | + else | |
413 | + high = mid - 1; | |
414 | + } | |
415 | + | |
416 | + return good_match >= 0 ? good_match : ok_match; | |
417 | +} | |
418 | + | |
fc557362 | 419 | +void look_for_rename(struct file_struct *file, char *fname) |
1fffd582 WD |
420 | +{ |
421 | + struct file_struct *fp; | |
422 | + char *partialptr, *fn; | |
423 | + STRUCT_STAT st; | |
424 | + int ndx; | |
425 | + | |
6fa0767f | 426 | + if (!partial_dir || (ndx = fattr_find(file, fname)) < 0) |
1fffd582 WD |
427 | + return; |
428 | + | |
429 | + fp = the_fattr_list.files[ndx]; | |
430 | + fn = f_name(fp, NULL); | |
431 | + /* We don't provide an alternate-basis file if there is a basis file. */ | |
432 | + if (link_stat(fn, &st, 0) == 0) | |
433 | + return; | |
1fffd582 | 434 | + |
6fa0767f WD |
435 | + if (!dry_run) { |
436 | + if ((partialptr = partial_dir_fname(fn)) == NULL | |
437 | + || !handle_partial_dir(partialptr, PDIR_CREATE)) | |
438 | + return; | |
439 | + /* We only use the file if we can hard-link it into our tmp dir. */ | |
440 | + if (link(fname, partialptr) != 0) { | |
441 | + if (errno != EEXIST) | |
442 | + handle_partial_dir(partialptr, PDIR_DELETE); | |
443 | + return; | |
1fffd582 | 444 | + } |
1fffd582 WD |
445 | + } |
446 | + | |
6fa0767f | 447 | + /* I think this falls into the -vv category with "%s is uptodate", etc. */ |
fc557362 | 448 | + if (INFO_GTE(MISC, 2)) |
6fa0767f | 449 | + rprintf(FINFO, "found renamed: %s => %s\n", fname, fn); |
1fffd582 | 450 | +} |
87d0091c | 451 | + |
fc557362 | 452 | int unchanged_attrs(const char *fname, struct file_struct *file, stat_x *sxp) |
1fffd582 | 453 | { |
72e5645e | 454 | #ifndef CAN_SET_SYMLINK_TIMES |
c1ff70aa | 455 | @@ -1048,6 +1188,7 @@ static void list_file_entry(struct file_struct *f) |
cdcd2137 | 456 | } |
1fffd582 WD |
457 | } |
458 | ||
459 | +static struct bitbag *delayed_bits = NULL; | |
460 | static int phase = 0; | |
ffc18846 | 461 | static int dflt_perms; |
1fffd582 | 462 | |
c1ff70aa | 463 | @@ -1327,9 +1468,12 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx, |
fc068916 | 464 | } |
c0c7984e | 465 | else if (delete_during && f_out != -1 && !phase |
65ecbe35 WD |
466 | && !(file->flags & FLAG_MISSING_DIR)) { |
467 | - if (file->flags & FLAG_CONTENT_DIR) | |
468 | - delete_in_dir(fname, file, &real_sx.st.st_dev); | |
469 | - else | |
470 | + if (file->flags & FLAG_CONTENT_DIR) { | |
471 | + if (detect_renamed && real_ret != 0) | |
472 | + unexplored_dirs++; | |
473 | + delete_in_dir(fname, file, &real_sx.st.st_dev, | |
474 | + delete_during < 0 ? DEL_NO_DELETIONS : 0); | |
475 | + } else | |
476 | change_local_filter_dir(fname, strlen(fname), F_DEPTH(file)); | |
65ecbe35 | 477 | } |
cbdf862c | 478 | goto cleanup; |
c1ff70aa | 479 | @@ -1588,8 +1732,14 @@ static void recv_generator(char *fname, struct file_struct *file, int ndx, |
ffc18846 | 480 | goto cleanup; |
ccdb48f6 | 481 | } |
81172142 | 482 | #endif |
1fffd582 WD |
483 | - if (stat_errno == ENOENT) |
484 | + if (stat_errno == ENOENT) { | |
485 | + if (detect_renamed && unexplored_dirs > 0 | |
a47d1f86 | 486 | + && F_LENGTH(file)) { |
1fffd582 WD |
487 | + bitbag_set_bit(delayed_bits, ndx); |
488 | + return; | |
489 | + } | |
490 | goto notify_others; | |
491 | + } | |
cc3e685d | 492 | rsyserr(FERROR_XFER, stat_errno, "recv_generator: failed to stat %s", |
1fffd582 | 493 | full_fname(fname)); |
ffc18846 | 494 | goto cleanup; |
c1ff70aa | 495 | @@ -2056,6 +2206,12 @@ void generate_files(int f_out, const char *local_name) |
fc557362 | 496 | if (DEBUG_GTE(GENR, 1)) |
fc068916 | 497 | rprintf(FINFO, "generator starting pid=%ld\n", (long)getpid()); |
1fffd582 WD |
498 | |
499 | + if (detect_renamed) { | |
7bfcb297 | 500 | + delayed_bits = bitbag_create(cur_flist->used); |
1fffd582 WD |
501 | + if (!delete_before && !delete_during) |
502 | + delete_during = -1; | |
503 | + } | |
504 | + | |
9c85142a | 505 | if (delete_before && !solo_file && cur_flist->used > 0) |
73adde61 | 506 | do_delete_pass(); |
1071853f | 507 | if (delete_during == 2) { |
c1ff70aa | 508 | @@ -2066,7 +2222,7 @@ void generate_files(int f_out, const char *local_name) |
1071853f | 509 | } |
fc557362 | 510 | info_levels[INFO_FLIST] = info_levels[INFO_PROGRESS] = 0; |
1fffd582 | 511 | |
fc068916 WD |
512 | - if (append_mode > 0 || whole_file < 0) |
513 | + if (append_mode > 0 || detect_renamed || whole_file < 0) | |
1fffd582 | 514 | whole_file = 0; |
fc557362 | 515 | if (DEBUG_GTE(FLIST, 1)) { |
1fffd582 | 516 | rprintf(FINFO, "delta-transmission %s\n", |
c1ff70aa | 517 | @@ -2102,7 +2258,7 @@ void generate_files(int f_out, const char *local_name) |
4c107044 WD |
518 | dirdev = MAKEDEV(DEV_MAJOR(devp), DEV_MINOR(devp)); |
519 | } else | |
520 | dirdev = MAKEDEV(0, 0); | |
65ecbe35 WD |
521 | - delete_in_dir(fbuf, fp, &dirdev); |
522 | + delete_in_dir(fbuf, fp, &dirdev, 0); | |
523 | } else | |
524 | change_local_filter_dir(fbuf, strlen(fbuf), F_DEPTH(fp)); | |
fc068916 | 525 | } |
c1ff70aa | 526 | @@ -2149,7 +2305,21 @@ void generate_files(int f_out, const char *local_name) |
fc557362 | 527 | } while ((cur_flist = cur_flist->next) != NULL); |
fc068916 | 528 | |
2dbc45e7 | 529 | if (delete_during) |
73adde61 WD |
530 | - delete_in_dir(NULL, NULL, &dev_zero); |
531 | + delete_in_dir(NULL, NULL, &dev_zero, 0); | |
2dbc45e7 WD |
532 | + if (detect_renamed) { |
533 | + if (delete_during < 0) | |
534 | + delete_during = 0; | |
535 | + detect_renamed = 0; | |
1fffd582 | 536 | + |
2dbc45e7 | 537 | + for (i = -1; (i = bitbag_next_bit(delayed_bits, i)) >= 0; ) { |
9bcaf4de | 538 | + struct file_struct *file = cur_flist->files[i]; |
2dbc45e7 WD |
539 | + if (local_name) |
540 | + strlcpy(fbuf, local_name, sizeof fbuf); | |
541 | + else | |
542 | + f_name(file, fbuf); | |
543 | + recv_generator(fbuf, file, i, itemizing, code, f_out); | |
544 | + } | |
545 | + } | |
546 | phase++; | |
fc557362 | 547 | if (DEBUG_GTE(GENR, 1)) |
2dbc45e7 | 548 | rprintf(FINFO, "generate_files phase=%d\n", phase); |
cc3e685d WD |
549 | diff --git a/options.c b/options.c |
550 | --- a/options.c | |
551 | +++ b/options.c | |
fc557362 WD |
552 | @@ -80,6 +80,7 @@ int am_server = 0; |
553 | int am_sender = 0; | |
a94141d9 | 554 | int am_starting_up = 1; |
1fffd582 | 555 | int relative_paths = -1; |
1fffd582 | 556 | +int detect_renamed = 0; |
fc557362 WD |
557 | int implied_dirs = 1; |
558 | int missing_args = 0; /* 0 = FERROR_XFER, 1 = ignore, 2 = delete */ | |
1fffd582 | 559 | int numeric_ids = 0; |
72e5645e | 560 | @@ -743,6 +744,7 @@ void usage(enum logcode F) |
1fffd582 WD |
561 | rprintf(F," --modify-window=NUM compare mod-times with reduced accuracy\n"); |
562 | rprintf(F," -T, --temp-dir=DIR create temporary files in directory DIR\n"); | |
563 | rprintf(F," -y, --fuzzy find similar file for basis if no dest file\n"); | |
564 | + rprintf(F," --detect-renamed try to find renamed files to speed up the transfer\n"); | |
565 | rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n"); | |
566 | rprintf(F," --copy-dest=DIR ... and include copies of unchanged files\n"); | |
567 | rprintf(F," --link-dest=DIR hardlink to files in DIR when unchanged\n"); | |
72e5645e | 568 | @@ -938,6 +940,7 @@ static struct poptOption long_options[] = { |
1fffd582 WD |
569 | {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 }, |
570 | {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 }, | |
571 | {"link-dest", 0, POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 }, | |
572 | + {"detect-renamed", 0, POPT_ARG_NONE, &detect_renamed, 0, 0, 0 }, | |
c0c7984e WD |
573 | {"fuzzy", 'y', POPT_ARG_VAL, &fuzzy_basis, 1, 0, 0 }, |
574 | {"no-fuzzy", 0, POPT_ARG_VAL, &fuzzy_basis, 0, 0, 0 }, | |
575 | {"no-y", 0, POPT_ARG_VAL, &fuzzy_basis, 0, 0, 0 }, | |
c1ff70aa | 576 | @@ -2157,7 +2160,7 @@ int parse_arguments(int *argc_p, const char ***argv_p) |
1fffd582 WD |
577 | inplace = 1; |
578 | } | |
579 | ||
580 | - if (delay_updates && !partial_dir) | |
581 | + if ((delay_updates || detect_renamed) && !partial_dir) | |
582 | partial_dir = tmp_partialdir; | |
583 | ||
584 | if (inplace) { | |
c1ff70aa | 585 | @@ -2166,6 +2169,7 @@ int parse_arguments(int *argc_p, const char ***argv_p) |
1fffd582 WD |
586 | snprintf(err_buf, sizeof err_buf, |
587 | "--%s cannot be used with --%s\n", | |
588 | append_mode ? "append" : "inplace", | |
589 | + detect_renamed ? "detect-renamed" : | |
590 | delay_updates ? "delay-updates" : "partial-dir"); | |
591 | return 0; | |
592 | } | |
c1ff70aa | 593 | @@ -2528,6 +2532,8 @@ void server_options(char **args, int *argc_p) |
51bc0e89 WD |
594 | args[ac++] = "--super"; |
595 | if (size_only) | |
596 | args[ac++] = "--size-only"; | |
597 | + if (detect_renamed) | |
598 | + args[ac++] = "--detect-renamed"; | |
fc557362 WD |
599 | if (do_stats) |
600 | args[ac++] = "--stats"; | |
51bc0e89 | 601 | } else { |
fc557362 | 602 | diff --git a/rsync.h b/rsync.h |
fc557362 WD |
603 | --- a/rsync.h |
604 | +++ b/rsync.h | |
5214a41b | 605 | @@ -246,7 +246,7 @@ enum msgcode { |
72e5645e | 606 | #define NDX_DEL_STATS -3 |
fc557362 WD |
607 | #define NDX_FLIST_OFFSET -101 |
608 | ||
609 | -/* For calling delete_item() and delete_dir_contents(). */ | |
610 | +/* For calling delete_item(), delete_dir_contents(), and delete_in_dir(). */ | |
611 | #define DEL_NO_UID_WRITE (1<<0) /* file/dir has our uid w/o write perm */ | |
612 | #define DEL_RECURSE (1<<1) /* if dir, delete all contents */ | |
613 | #define DEL_DIR_IS_EMPTY (1<<2) /* internal delete_FUNCTIONS use only */ | |
5214a41b | 614 | @@ -256,6 +256,7 @@ enum msgcode { |
fc557362 WD |
615 | #define DEL_FOR_DEVICE (1<<6) /* making room for a replacement device */ |
616 | #define DEL_FOR_SPECIAL (1<<7) /* making room for a replacement special */ | |
617 | #define DEL_FOR_BACKUP (1<<8) /* the delete is for a backup operation */ | |
618 | +#define DEL_NO_DELETIONS (1<<9) /* just check for renames w/o deleting */ | |
619 | ||
620 | #define DEL_MAKE_ROOM (DEL_FOR_FILE|DEL_FOR_DIR|DEL_FOR_SYMLINK|DEL_FOR_DEVICE|DEL_FOR_SPECIAL) | |
621 | ||
cc3e685d WD |
622 | diff --git a/rsync.yo b/rsync.yo |
623 | --- a/rsync.yo | |
624 | +++ b/rsync.yo | |
fc557362 | 625 | @@ -397,6 +397,7 @@ to the detailed description below for a complete description. verb( |
1fffd582 WD |
626 | --modify-window=NUM compare mod-times with reduced accuracy |
627 | -T, --temp-dir=DIR create temporary files in directory DIR | |
628 | -y, --fuzzy find similar file for basis if no dest file | |
629 | + --detect-renamed try to find renamed files to speed the xfer | |
630 | --compare-dest=DIR also compare received files relative to DIR | |
631 | --copy-dest=DIR ... and include copies of unchanged files | |
632 | --link-dest=DIR hardlink to files in DIR when unchanged | |
c1ff70aa | 633 | @@ -1640,6 +1641,21 @@ Note that the use of the bf(--delete) option might get rid of any potential |
1fffd582 WD |
634 | fuzzy-match files, so either use bf(--delete-after) or specify some |
635 | filename exclusions if you need to prevent this. | |
636 | ||
6fa0767f WD |
637 | +dit(bf(--detect-renamed)) With this option, for each new source file |
638 | +(call it em(src/S)), rsync looks for a file em(dest/D) anywhere in the | |
639 | +destination that passes the quick check with em(src/S). If such a em(dest/D) | |
640 | +is found, rsync uses it as an alternate basis for transferring em(S). The | |
641 | +idea is that if em(src/S) was renamed from em(src/D) (as opposed to em(src/S) | |
642 | +passing the quick check with em(dest/D) by coincidence), the delta-transfer | |
643 | +algorithm will find that all the data matches between em(src/S) and em(dest/D), | |
644 | +and the transfer will be really fast. | |
645 | + | |
1fffd582 WD |
646 | +By default, alternate-basis files are hard-linked into a directory named |
647 | +".~tmp~" in each file's destination directory, but if you've specified | |
648 | +the bf(--partial-dir) option, that directory will be used instead. These | |
649 | +potential alternate-basis files will be removed as the transfer progresses. | |
650 | +This option conflicts with bf(--inplace) and bf(--append). | |
651 | + | |
652 | dit(bf(--compare-dest=DIR)) This option instructs rsync to use em(DIR) on | |
653 | the destination machine as an additional hierarchy to compare destination | |
654 | files against doing transfers (if the files are missing in the destination | |
cc3e685d WD |
655 | diff --git a/util.c b/util.c |
656 | --- a/util.c | |
657 | +++ b/util.c | |
72e5645e | 658 | @@ -1088,6 +1088,32 @@ char *normalize_path(char *path, BOOL force_newbuf, unsigned int *len_ptr) |
fc557362 | 659 | return path; |
1fffd582 WD |
660 | } |
661 | ||
662 | +/* We need to supply our own strcmp function for file list comparisons | |
663 | + * to ensure that signed/unsigned usage is consistent between machines. */ | |
664 | +int u_strcmp(const char *p1, const char *p2) | |
665 | +{ | |
666 | + for ( ; *p1; p1++, p2++) { | |
667 | + if (*p1 != *p2) | |
668 | + break; | |
669 | + } | |
670 | + | |
671 | + return (int)*(uchar*)p1 - (int)*(uchar*)p2; | |
672 | +} | |
673 | + | |
674 | +/* We need a memcmp function compares unsigned-byte values. */ | |
675 | +int u_memcmp(const void *p1, const void *p2, size_t len) | |
676 | +{ | |
677 | + const uchar *u1 = p1; | |
678 | + const uchar *u2 = p2; | |
679 | + | |
680 | + while (len--) { | |
681 | + if (*u1 != *u2) | |
682 | + return (int)*u1 - (int)*u2; | |
683 | + } | |
684 | + | |
685 | + return 0; | |
686 | +} | |
687 | + | |
688 | /** | |
fc557362 WD |
689 | * Return a quoted string with the full pathname of the indicated filename. |
690 | * The string " (in MODNAME)" may also be appended. The returned pointer |