Commit | Line | Data |
---|---|---|
8c5b8235 WD |
1 | Wayne Davison wrote: |
2 | ||
3 | I greatly simplified the changes to generator.c, making the patch | |
4 | easier to maintain and fixing the failing test in the testsuite. | |
58118c25 | 5 | Very lightly tested. |
241013b4 | 6 | |
58118c25 WD |
7 | --- generator.c 29 Jun 2004 19:19:00 -0000 1.92 |
8 | +++ generator.c 30 Jun 2004 07:35:56 -0000 | |
9 | @@ -41,6 +41,7 @@ extern int ignore_times; | |
10 | extern int size_only; | |
11 | extern int io_timeout; | |
12 | extern int protocol_version; | |
13 | +extern int fuzzy; | |
14 | extern int always_checksum; | |
15 | extern char *compare_dest; | |
16 | extern int link_dest; | |
17 | @@ -257,6 +258,94 @@ static void generate_and_send_sums(struc | |
18 | } | |
19 | ||
20 | ||
47dd7a31 WD |
21 | +static void split_names(char *fname, char **dirname, char **basename) |
22 | +{ | |
23 | + char *slash = strrchr(fname, '/'); | |
24 | + if (slash) { | |
25 | + *dirname = fname; | |
26 | + *slash = '\0'; | |
27 | + *basename = slash+1; | |
28 | + } else { | |
29 | + *basename = fname; | |
30 | + *dirname = "."; | |
31 | + } | |
32 | +} | |
33 | + | |
58118c25 | 34 | + |
47dd7a31 WD |
35 | +static unsigned int measure_name(const char *name, const char *basename, |
36 | + const char *ext) | |
37 | +{ | |
38 | + int namelen = strlen(name); | |
39 | + int extlen = strlen(ext); | |
40 | + unsigned int score = 0; | |
41 | + | |
42 | + /* Extensions must match */ | |
43 | + if (namelen <= extlen || strcmp(name + namelen - extlen, ext) != 0) | |
44 | + return 0; | |
45 | + | |
46 | + /* Now score depends on similarity of prefix */ | |
47 | + for (; *name == *basename && *name; name++, basename++) | |
48 | + score++; | |
49 | + return score; | |
50 | +} | |
51 | + | |
58118c25 WD |
52 | + |
53 | +static int find_fuzzy(char **fname_ptr, char *buf, STRUCT_STAT *st_ptr) | |
47dd7a31 WD |
54 | +{ |
55 | + DIR *d; | |
56 | + struct dirent *di; | |
57 | + char *basename, *dirname; | |
58 | + char mangled_name[MAXPATHLEN]; | |
59 | + char bestname[MAXPATHLEN]; | |
60 | + unsigned int bestscore = 0; | |
61 | + const char *ext; | |
62 | + | |
8c5b8235 | 63 | + strlcpy(mangled_name, *fname_ptr, sizeof mangled_name); |
47dd7a31 WD |
64 | + |
65 | + split_names(mangled_name, &dirname, &basename); | |
66 | + if (!(d = opendir(dirname))) { | |
67 | + rsyserr(FERROR, errno, "recv_generator opendir(%s)", dirname); | |
68 | + return -1; | |
69 | + } | |
70 | + | |
71 | + /* Get final extension, eg. .gz; never full basename though. */ | |
72 | + ext = strrchr(basename + 1, '.'); | |
73 | + if (!ext) | |
74 | + ext = basename + strlen(basename); /* ext = "" */ | |
75 | + | |
76 | + while ((di = readdir(d)) != NULL) { | |
77 | + const char *dname = d_name(di); | |
78 | + unsigned int score; | |
79 | + | |
80 | + if (dname[0] == '.' && (dname[1] == '\0' | |
81 | + || (dname[1] == '.' && dname[2] == '\0'))) | |
82 | + continue; | |
83 | + | |
84 | + score = measure_name(dname, basename, ext); | |
85 | + if (verbose > 4) { | |
8c5b8235 WD |
86 | + rprintf(FINFO, "[%s] fuzzy score for %s = %u\n", |
87 | + who_am_i(), dname, score); | |
47dd7a31 WD |
88 | + } |
89 | + if (score > bestscore) { | |
8c5b8235 | 90 | + strlcpy(bestname, dname, sizeof bestname); |
47dd7a31 WD |
91 | + bestscore = score; |
92 | + } | |
93 | + } | |
94 | + closedir(d); | |
95 | + | |
96 | + /* Found a candidate. */ | |
97 | + if (bestscore != 0) { | |
8c5b8235 | 98 | + pathjoin(buf, MAXPATHLEN, dirname, bestname); |
47dd7a31 | 99 | + if (verbose > 2) { |
8c5b8235 WD |
100 | + rprintf(FINFO, "[%s] fuzzy match %s->%s\n", |
101 | + who_am_i(), *fname_ptr, buf); | |
47dd7a31 | 102 | + } |
8c5b8235 | 103 | + *fname_ptr = buf; |
58118c25 | 104 | + return link_stat(buf, st_ptr, 0); |
47dd7a31 WD |
105 | + } |
106 | + return -1; | |
107 | +} | |
58118c25 WD |
108 | + |
109 | ||
110 | /* | |
111 | * Acts on file number @p i from @p flist, whose name is @p fname. | |
112 | @@ -267,12 +356,12 @@ static void generate_and_send_sums(struc | |
113 | * out. It might be wrong. | |
114 | */ | |
115 | static void recv_generator(char *fname, struct file_struct *file, int i, | |
116 | - int f_out) | |
117 | + int f_out, int f_nameout) | |
118 | { | |
8c5b8235 | 119 | int fd; |
f74d2272 WD |
120 | STRUCT_STAT st; |
121 | struct map_struct *mapbuf; | |
8c5b8235 WD |
122 | - int statret; |
123 | + int statret, fuzzy_file = 0; | |
124 | char *fnamecmp; | |
125 | char fnamecmpbuf[MAXPATHLEN]; | |
54691942 | 126 | |
58118c25 WD |
127 | @@ -431,8 +520,10 @@ static void recv_generator(char *fname, |
128 | statret = link_stat(fnamecmpbuf, &st, 0); | |
129 | if (!S_ISREG(st.st_mode)) | |
130 | statret = -1; | |
131 | - if (statret == -1) | |
132 | + if (statret < 0) { | |
133 | errno = saveerrno; | |
134 | + *fnamecmpbuf = '\0'; | |
135 | + } | |
136 | #if HAVE_LINK | |
137 | else if (link_dest && !dry_run) { | |
138 | if (do_link(fnamecmpbuf, fname) != 0) { | |
139 | @@ -440,18 +531,30 @@ static void recv_generator(char *fname, | |
140 | rsyserr(FINFO, errno, "link %s => %s", | |
141 | fnamecmpbuf, fname); | |
142 | } | |
143 | - } | |
144 | - fnamecmp = fnamecmpbuf; | |
145 | + fnamecmp = fnamecmpbuf; | |
146 | + } else | |
147 | + *fnamecmpbuf = '\0'; | |
148 | } | |
149 | #endif | |
150 | else | |
8c5b8235 | 151 | fnamecmp = fnamecmpbuf; |
58118c25 WD |
152 | + } else |
153 | + *fnamecmpbuf = '\0'; | |
154 | + | |
8c5b8235 | 155 | + if (statret == -1 && fuzzy) { |
58118c25 | 156 | + statret = find_fuzzy(&fnamecmp, fnamecmpbuf, &st); |
8c5b8235 WD |
157 | + if (!S_ISREG(st.st_mode)) |
158 | + statret = -1; | |
159 | + else | |
160 | + fuzzy_file = 1; | |
58118c25 WD |
161 | } |
162 | ||
8c5b8235 | 163 | if (statret == -1) { |
7628f156 WD |
164 | if (preserve_hard_links && hard_link_check(file, HL_SKIP)) |
165 | return; | |
58118c25 WD |
166 | if (errno == ENOENT) { |
167 | + if (f_nameout >= 0) | |
168 | + write(f_nameout, "", 1); | |
169 | write_int(f_out,i); | |
170 | if (!dry_run) | |
171 | write_sum_head(f_out, NULL); | |
172 | @@ -471,37 +574,43 @@ static void recv_generator(char *fname, | |
173 | /* now pretend the file didn't exist */ | |
174 | if (preserve_hard_links && hard_link_check(file, HL_SKIP)) | |
175 | return; | |
176 | + if (f_nameout >= 0) | |
177 | + write(f_nameout, "", 1); | |
178 | write_int(f_out,i); | |
179 | if (!dry_run) | |
180 | write_sum_head(f_out, NULL); | |
181 | return; | |
182 | } | |
183 | ||
184 | - if (opt_ignore_existing && fnamecmp == fname) { | |
185 | + if (opt_ignore_existing && !*fnamecmpbuf) { | |
186 | if (verbose > 1) | |
187 | rprintf(FINFO,"%s exists\n",fname); | |
188 | return; | |
189 | } | |
190 | ||
191 | - if (update_only && fnamecmp == fname | |
192 | + if (update_only && !*fnamecmpbuf | |
193 | && cmp_modtime(st.st_mtime, file->modtime) > 0) { | |
194 | if (verbose > 1) | |
195 | rprintf(FINFO,"%s is newer\n",fname); | |
241013b4 MP |
196 | return; |
197 | } | |
198 | ||
8c5b8235 | 199 | - if (skip_file(fname, file, &st)) { |
58118c25 | 200 | - if (fnamecmp == fname) |
8c5b8235 | 201 | + if (!fuzzy_file && skip_file(fname, file, &st)) { |
58118c25 | 202 | + if (!*fnamecmpbuf) |
8c5b8235 WD |
203 | set_perms(fname, file, &st, PERMS_REPORT); |
204 | return; | |
58118c25 WD |
205 | } |
206 | ||
207 | if (dry_run) { | |
208 | + if (f_nameout >= 0) | |
209 | + write(f_nameout, "", 1); | |
210 | write_int(f_out,i); | |
211 | return; | |
212 | } | |
213 | ||
214 | if (disable_deltas_p()) { | |
215 | + if (f_nameout >= 0) | |
216 | + write(f_nameout, "", 1); | |
217 | write_int(f_out,i); | |
218 | write_sum_head(f_out, NULL); | |
219 | return; | |
220 | @@ -516,6 +625,8 @@ static void recv_generator(char *fname, | |
221 | /* pretend the file didn't exist */ | |
222 | if (preserve_hard_links && hard_link_check(file, HL_SKIP)) | |
223 | return; | |
224 | + if (f_nameout >= 0) | |
225 | + write(f_nameout, "", 1); | |
226 | write_int(f_out,i); | |
227 | write_sum_head(f_out, NULL); | |
228 | return; | |
229 | @@ -534,6 +645,8 @@ static void recv_generator(char *fname, | |
230 | if (verbose > 2) | |
231 | rprintf(FINFO, "generating and sending sums for %d\n", i); | |
232 | ||
233 | + if (f_nameout >= 0) | |
234 | + write(f_nameout, fnamecmpbuf, strlen(fnamecmpbuf) + 1); | |
235 | write_int(f_out,i); | |
236 | generate_and_send_sums(mapbuf, st.st_size, f_out); | |
237 | ||
238 | @@ -543,10 +656,11 @@ static void recv_generator(char *fname, | |
239 | } | |
240 | ||
241 | ||
242 | -void generate_files(int f, struct file_list *flist, char *local_name) | |
243 | +void generate_files(int f, struct file_list *flist, char *local_name, | |
244 | + int f_nameout) | |
245 | { | |
246 | int i; | |
247 | - int phase=0; | |
248 | + int phase = 0; | |
249 | char fbuf[MAXPATHLEN]; | |
250 | ||
251 | if (verbose > 2) { | |
252 | @@ -584,7 +698,7 @@ void generate_files(int f, struct file_l | |
253 | } | |
254 | ||
255 | recv_generator(local_name ? local_name : f_name_to(file, fbuf), | |
256 | - file, i, f); | |
257 | + file, i, f, f_nameout); | |
258 | } | |
259 | ||
260 | phase++; | |
261 | @@ -601,7 +715,7 @@ void generate_files(int f, struct file_l | |
262 | while ((i = get_redo_num()) != -1) { | |
263 | struct file_struct *file = flist->files[i]; | |
264 | recv_generator(local_name ? local_name : f_name_to(file, fbuf), | |
265 | - file, i, f); | |
266 | + file, i, f, f_nameout); | |
267 | } | |
268 | ||
269 | phase++; | |
270 | @@ -620,7 +734,7 @@ void generate_files(int f, struct file_l | |
271 | if (!file->basename || !S_ISDIR(file->mode)) | |
272 | continue; | |
273 | recv_generator(local_name ? local_name : f_name(file), | |
274 | - file, i, -1); | |
275 | + file, i, -1, -1); | |
276 | } | |
277 | ||
278 | if (verbose > 2) | |
279 | --- main.c 28 Jun 2004 17:45:40 -0000 1.201 | |
280 | +++ main.c 30 Jun 2004 07:35:57 -0000 | |
281 | @@ -428,8 +428,8 @@ static void do_server_sender(int f_in, i | |
282 | static int do_recv(int f_in,int f_out,struct file_list *flist,char *local_name) | |
283 | { | |
284 | int pid; | |
285 | - int status=0; | |
286 | - int error_pipe[2]; | |
287 | + int status = 0; | |
288 | + int error_pipe[2], name_pipe[2]; | |
289 | ||
290 | if (preserve_hard_links) | |
291 | init_hard_links(flist); | |
292 | @@ -441,17 +441,19 @@ static int do_recv(int f_in,int f_out,st | |
293 | } | |
294 | } | |
295 | ||
296 | - if (fd_pair(error_pipe) < 0) { | |
297 | - rprintf(FERROR,"error pipe failed in do_recv\n"); | |
298 | + if (fd_pair(error_pipe) < 0 || fd_pair(name_pipe) < 0) { | |
299 | + rprintf(FERROR, "fd_pair() failed in do_recv\n"); | |
300 | exit_cleanup(RERR_SOCKETIO); | |
301 | } | |
302 | ||
303 | io_flush(NORMAL_FLUSH); | |
304 | ||
305 | - if ((pid=do_fork()) == 0) { | |
306 | + if ((pid = do_fork()) == 0) { | |
307 | close(error_pipe[0]); | |
308 | + close(name_pipe[1]); | |
309 | if (f_in != f_out) | |
310 | close(f_out); | |
311 | + set_blocking(name_pipe[0]); | |
312 | ||
313 | /* we can't let two processes write to the socket at one time */ | |
314 | io_multiplexing_close(); | |
315 | @@ -459,7 +461,7 @@ static int do_recv(int f_in,int f_out,st | |
316 | /* set place to send errors */ | |
317 | set_msg_fd_out(error_pipe[1]); | |
318 | ||
319 | - recv_files(f_in,flist,local_name); | |
320 | + recv_files(f_in, flist, local_name, name_pipe[0]); | |
321 | io_flush(FULL_FLUSH); | |
322 | report(f_in); | |
323 | ||
324 | @@ -475,14 +477,16 @@ static int do_recv(int f_in,int f_out,st | |
325 | am_generator = 1; | |
326 | ||
327 | close(error_pipe[1]); | |
328 | + close(name_pipe[0]); | |
329 | if (f_in != f_out) | |
330 | close(f_in); | |
331 | + set_blocking(name_pipe[1]); | |
332 | ||
333 | io_start_buffering_out(f_out); | |
334 | ||
335 | set_msg_fd_in(error_pipe[0]); | |
336 | ||
337 | - generate_files(f_out, flist, local_name); | |
338 | + generate_files(f_out, flist, local_name, name_pipe[1]); | |
339 | ||
340 | get_redo_num(); /* Read final MSG_DONE and any prior messages. */ | |
341 | report(-1); | |
47dd7a31 | 342 | --- options.c 20 Jun 2004 19:47:05 -0000 1.157 |
58118c25 | 343 | +++ options.c 30 Jun 2004 07:35:57 -0000 |
7628f156 | 344 | @@ -94,6 +94,7 @@ int ignore_errors = 0; |
f74d2272 WD |
345 | int modify_window = 0; |
346 | int blocking_io = -1; | |
347 | int checksum_seed = 0; | |
348 | +int fuzzy = 0; | |
349 | unsigned int block_size = 0; | |
241013b4 | 350 | |
241013b4 | 351 | |
7628f156 | 352 | @@ -270,6 +271,7 @@ void usage(enum logcode F) |
f0533c4c WD |
353 | rprintf(F," -T --temp-dir=DIR create temporary files in directory DIR\n"); |
354 | rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n"); | |
355 | rprintf(F," --link-dest=DIR create hardlinks to DIR for unchanged files\n"); | |
356 | + rprintf(F," --fuzzy use similar file as basis if basis doesn't exist\n"); | |
357 | rprintf(F," -P equivalent to --partial --progress\n"); | |
358 | rprintf(F," -z, --compress compress file data\n"); | |
359 | rprintf(F," -C, --cvs-exclude auto ignore files in the same way CVS does\n"); | |
7628f156 | 360 | @@ -368,6 +370,7 @@ static struct poptOption long_options[] |
f0533c4c WD |
361 | {"temp-dir", 'T', POPT_ARG_STRING, &tmpdir, 0, 0, 0 }, |
362 | {"compare-dest", 0, POPT_ARG_STRING, &compare_dest, 0, 0, 0 }, | |
363 | {"link-dest", 0, POPT_ARG_STRING, &compare_dest, OPT_LINK_DEST, 0, 0 }, | |
f74d2272 | 364 | + {"fuzzy", 0, POPT_ARG_NONE, &fuzzy, 0, 0, 0 }, |
f0533c4c WD |
365 | /* TODO: Should this take an optional int giving the compression level? */ |
366 | {"compress", 'z', POPT_ARG_NONE, &do_compression, 0, 0, 0 }, | |
367 | {"daemon", 0, POPT_ARG_NONE, &daemon_opt, 0, 0, 0 }, | |
7628f156 | 368 | @@ -989,6 +992,9 @@ void server_options(char **args,int *arg |
f74d2272 | 369 | } |
241013b4 | 370 | } |
7b675ff5 | 371 | |
241013b4 MP |
372 | + if (fuzzy && am_sender) |
373 | + args[ac++] = "--fuzzy"; | |
7b675ff5 | 374 | + |
241013b4 | 375 | *argc = ac; |
f74d2272 | 376 | return; |
7b675ff5 | 377 | |
47dd7a31 | 378 | --- receiver.c 29 Jun 2004 15:12:01 -0000 1.83 |
58118c25 WD |
379 | +++ receiver.c 30 Jun 2004 07:35:57 -0000 |
380 | @@ -36,7 +36,6 @@ extern int preserve_perms; | |
381 | extern int cvs_exclude; | |
382 | extern int io_error; | |
383 | extern char *tmpdir; | |
384 | -extern char *compare_dest; | |
385 | extern int make_backups; | |
386 | extern int do_progress; | |
387 | extern char *backup_dir; | |
388 | @@ -293,14 +292,15 @@ static int receive_data(int f_in,struct | |
389 | * main routine for receiver process. | |
390 | * | |
391 | * Receiver process runs on the same host as the generator process. */ | |
392 | -int recv_files(int f_in,struct file_list *flist,char *local_name) | |
393 | +int recv_files(int f_in, struct file_list *flist, char *local_name, | |
394 | + int f_name) | |
f74d2272 | 395 | { |
58118c25 WD |
396 | int fd1,fd2; |
397 | STRUCT_STAT st; | |
398 | char *fname, fbuf[MAXPATHLEN]; | |
399 | char template[MAXPATHLEN]; | |
400 | char fnametmp[MAXPATHLEN]; | |
401 | - char *fnamecmp; | |
402 | + char *fnamecmp, *cp; | |
403 | char fnamecmpbuf[MAXPATHLEN]; | |
404 | struct map_struct *mapbuf; | |
405 | struct file_struct *file; | |
406 | @@ -364,19 +364,19 @@ int recv_files(int f_in,struct file_list | |
407 | if (verbose > 2) | |
408 | rprintf(FINFO,"recv_files(%s)\n",fname); | |
241013b4 | 409 | |
58118c25 WD |
410 | - fnamecmp = fname; |
411 | + for (cp = fnamecmpbuf; ; cp++) { | |
412 | + if (read(f_name, cp, 1) <= 0) { | |
413 | + rsyserr(FERROR, errno, "fname-pipe read failed"); | |
414 | + exit_cleanup(RERR_PROTOCOL); | |
415 | + } | |
416 | + if (!*cp) | |
417 | + break; | |
8c5b8235 | 418 | + } |
58118c25 WD |
419 | + fnamecmp = *fnamecmpbuf ? fnamecmpbuf : fname; |
420 | ||
421 | /* open the file */ | |
422 | fd1 = do_open(fnamecmp, O_RDONLY, 0); | |
423 | ||
424 | - if (fd1 == -1 && compare_dest != NULL) { | |
425 | - /* try the file at compare_dest instead */ | |
426 | - pathjoin(fnamecmpbuf, sizeof fnamecmpbuf, | |
427 | - compare_dest, fname); | |
428 | - fnamecmp = fnamecmpbuf; | |
429 | - fd1 = do_open(fnamecmp, O_RDONLY, 0); | |
430 | - } | |
431 | - | |
241013b4 | 432 | if (fd1 != -1 && do_fstat(fd1,&st) != 0) { |
fe6407b5 | 433 | rsyserr(FERROR, errno, "fstat %s failed", |
8c5b8235 | 434 | full_fname(fnamecmp)); |
58118c25 WD |
435 | @@ -385,7 +385,7 @@ int recv_files(int f_in,struct file_list |
436 | continue; | |
437 | } | |
438 | ||
439 | - if (fd1 != -1 && S_ISDIR(st.st_mode) && fnamecmp == fname) { | |
440 | + if (fd1 != -1 && S_ISDIR(st.st_mode) && !*fnamecmpbuf) { | |
441 | /* this special handling for directories | |
442 | * wouldn't be necessary if robust_rename() | |
443 | * and the underlying robust_unlink could cope | |
7628f156 | 444 | --- rsync.yo 5 Jun 2004 16:16:30 -0000 1.171 |
58118c25 | 445 | +++ rsync.yo 30 Jun 2004 07:35:58 -0000 |
7628f156 | 446 | @@ -325,6 +325,7 @@ verb( |
f0533c4c WD |
447 | -T --temp-dir=DIR create temporary files in directory DIR |
448 | --compare-dest=DIR also compare received files relative to DIR | |
449 | --link-dest=DIR create hardlinks to DIR for unchanged files | |
450 | + --fuzzy use similar file as basis if basis is gone | |
451 | -P equivalent to --partial --progress | |
452 | -z, --compress compress file data | |
453 | -C, --cvs-exclude auto ignore files in the same way CVS does |