Incorporated the g2r-basis-filename.diff changes so that we don't
[rsync/rsync-patches.git] / fuzzy.diff
CommitLineData
8c5b8235
WD
1Wayne Davison wrote:
2
3I greatly simplified the changes to generator.c, making the patch
4easier to maintain and fixing the failing test in the testsuite.
58118c25 5Very lightly tested.
241013b4 6
58118c25
WD
7--- generator.c 29 Jun 2004 19:19:00 -0000 1.92
8+++ generator.c 30 Jun 2004 07:35:56 -0000
9@@ -41,6 +41,7 @@ extern int ignore_times;
10 extern int size_only;
11 extern int io_timeout;
12 extern int protocol_version;
13+extern int fuzzy;
14 extern int always_checksum;
15 extern char *compare_dest;
16 extern int link_dest;
17@@ -257,6 +258,94 @@ static void generate_and_send_sums(struc
18 }
19
20
47dd7a31
WD
21+static void split_names(char *fname, char **dirname, char **basename)
22+{
23+ char *slash = strrchr(fname, '/');
24+ if (slash) {
25+ *dirname = fname;
26+ *slash = '\0';
27+ *basename = slash+1;
28+ } else {
29+ *basename = fname;
30+ *dirname = ".";
31+ }
32+}
33+
58118c25 34+
47dd7a31
WD
35+static unsigned int measure_name(const char *name, const char *basename,
36+ const char *ext)
37+{
38+ int namelen = strlen(name);
39+ int extlen = strlen(ext);
40+ unsigned int score = 0;
41+
42+ /* Extensions must match */
43+ if (namelen <= extlen || strcmp(name + namelen - extlen, ext) != 0)
44+ return 0;
45+
46+ /* Now score depends on similarity of prefix */
47+ for (; *name == *basename && *name; name++, basename++)
48+ score++;
49+ return score;
50+}
51+
58118c25
WD
52+
53+static int find_fuzzy(char **fname_ptr, char *buf, STRUCT_STAT *st_ptr)
47dd7a31
WD
54+{
55+ DIR *d;
56+ struct dirent *di;
57+ char *basename, *dirname;
58+ char mangled_name[MAXPATHLEN];
59+ char bestname[MAXPATHLEN];
60+ unsigned int bestscore = 0;
61+ const char *ext;
62+
8c5b8235 63+ strlcpy(mangled_name, *fname_ptr, sizeof mangled_name);
47dd7a31
WD
64+
65+ split_names(mangled_name, &dirname, &basename);
66+ if (!(d = opendir(dirname))) {
67+ rsyserr(FERROR, errno, "recv_generator opendir(%s)", dirname);
68+ return -1;
69+ }
70+
71+ /* Get final extension, eg. .gz; never full basename though. */
72+ ext = strrchr(basename + 1, '.');
73+ if (!ext)
74+ ext = basename + strlen(basename); /* ext = "" */
75+
76+ while ((di = readdir(d)) != NULL) {
77+ const char *dname = d_name(di);
78+ unsigned int score;
79+
80+ if (dname[0] == '.' && (dname[1] == '\0'
81+ || (dname[1] == '.' && dname[2] == '\0')))
82+ continue;
83+
84+ score = measure_name(dname, basename, ext);
85+ if (verbose > 4) {
8c5b8235
WD
86+ rprintf(FINFO, "[%s] fuzzy score for %s = %u\n",
87+ who_am_i(), dname, score);
47dd7a31
WD
88+ }
89+ if (score > bestscore) {
8c5b8235 90+ strlcpy(bestname, dname, sizeof bestname);
47dd7a31
WD
91+ bestscore = score;
92+ }
93+ }
94+ closedir(d);
95+
96+ /* Found a candidate. */
97+ if (bestscore != 0) {
8c5b8235 98+ pathjoin(buf, MAXPATHLEN, dirname, bestname);
47dd7a31 99+ if (verbose > 2) {
8c5b8235
WD
100+ rprintf(FINFO, "[%s] fuzzy match %s->%s\n",
101+ who_am_i(), *fname_ptr, buf);
47dd7a31 102+ }
8c5b8235 103+ *fname_ptr = buf;
58118c25 104+ return link_stat(buf, st_ptr, 0);
47dd7a31
WD
105+ }
106+ return -1;
107+}
58118c25
WD
108+
109
110 /*
111 * Acts on file number @p i from @p flist, whose name is @p fname.
112@@ -267,12 +356,12 @@ static void generate_and_send_sums(struc
113 * out. It might be wrong.
114 */
115 static void recv_generator(char *fname, struct file_struct *file, int i,
116- int f_out)
117+ int f_out, int f_nameout)
118 {
8c5b8235 119 int fd;
f74d2272
WD
120 STRUCT_STAT st;
121 struct map_struct *mapbuf;
8c5b8235
WD
122- int statret;
123+ int statret, fuzzy_file = 0;
124 char *fnamecmp;
125 char fnamecmpbuf[MAXPATHLEN];
54691942 126
58118c25
WD
127@@ -431,8 +520,10 @@ static void recv_generator(char *fname,
128 statret = link_stat(fnamecmpbuf, &st, 0);
129 if (!S_ISREG(st.st_mode))
130 statret = -1;
131- if (statret == -1)
132+ if (statret < 0) {
133 errno = saveerrno;
134+ *fnamecmpbuf = '\0';
135+ }
136 #if HAVE_LINK
137 else if (link_dest && !dry_run) {
138 if (do_link(fnamecmpbuf, fname) != 0) {
139@@ -440,18 +531,30 @@ static void recv_generator(char *fname,
140 rsyserr(FINFO, errno, "link %s => %s",
141 fnamecmpbuf, fname);
142 }
143- }
144- fnamecmp = fnamecmpbuf;
145+ fnamecmp = fnamecmpbuf;
146+ } else
147+ *fnamecmpbuf = '\0';
148 }
149 #endif
150 else
8c5b8235 151 fnamecmp = fnamecmpbuf;
58118c25
WD
152+ } else
153+ *fnamecmpbuf = '\0';
154+
8c5b8235 155+ if (statret == -1 && fuzzy) {
58118c25 156+ statret = find_fuzzy(&fnamecmp, fnamecmpbuf, &st);
8c5b8235
WD
157+ if (!S_ISREG(st.st_mode))
158+ statret = -1;
159+ else
160+ fuzzy_file = 1;
58118c25
WD
161 }
162
8c5b8235 163 if (statret == -1) {
7628f156
WD
164 if (preserve_hard_links && hard_link_check(file, HL_SKIP))
165 return;
58118c25
WD
166 if (errno == ENOENT) {
167+ if (f_nameout >= 0)
168+ write(f_nameout, "", 1);
169 write_int(f_out,i);
170 if (!dry_run)
171 write_sum_head(f_out, NULL);
172@@ -471,37 +574,43 @@ static void recv_generator(char *fname,
173 /* now pretend the file didn't exist */
174 if (preserve_hard_links && hard_link_check(file, HL_SKIP))
175 return;
176+ if (f_nameout >= 0)
177+ write(f_nameout, "", 1);
178 write_int(f_out,i);
179 if (!dry_run)
180 write_sum_head(f_out, NULL);
181 return;
182 }
183
184- if (opt_ignore_existing && fnamecmp == fname) {
185+ if (opt_ignore_existing && !*fnamecmpbuf) {
186 if (verbose > 1)
187 rprintf(FINFO,"%s exists\n",fname);
188 return;
189 }
190
191- if (update_only && fnamecmp == fname
192+ if (update_only && !*fnamecmpbuf
193 && cmp_modtime(st.st_mtime, file->modtime) > 0) {
194 if (verbose > 1)
195 rprintf(FINFO,"%s is newer\n",fname);
241013b4
MP
196 return;
197 }
198
8c5b8235 199- if (skip_file(fname, file, &st)) {
58118c25 200- if (fnamecmp == fname)
8c5b8235 201+ if (!fuzzy_file && skip_file(fname, file, &st)) {
58118c25 202+ if (!*fnamecmpbuf)
8c5b8235
WD
203 set_perms(fname, file, &st, PERMS_REPORT);
204 return;
58118c25
WD
205 }
206
207 if (dry_run) {
208+ if (f_nameout >= 0)
209+ write(f_nameout, "", 1);
210 write_int(f_out,i);
211 return;
212 }
213
214 if (disable_deltas_p()) {
215+ if (f_nameout >= 0)
216+ write(f_nameout, "", 1);
217 write_int(f_out,i);
218 write_sum_head(f_out, NULL);
219 return;
220@@ -516,6 +625,8 @@ static void recv_generator(char *fname,
221 /* pretend the file didn't exist */
222 if (preserve_hard_links && hard_link_check(file, HL_SKIP))
223 return;
224+ if (f_nameout >= 0)
225+ write(f_nameout, "", 1);
226 write_int(f_out,i);
227 write_sum_head(f_out, NULL);
228 return;
229@@ -534,6 +645,8 @@ static void recv_generator(char *fname,
230 if (verbose > 2)
231 rprintf(FINFO, "generating and sending sums for %d\n", i);
232
233+ if (f_nameout >= 0)
234+ write(f_nameout, fnamecmpbuf, strlen(fnamecmpbuf) + 1);
235 write_int(f_out,i);
236 generate_and_send_sums(mapbuf, st.st_size, f_out);
237
238@@ -543,10 +656,11 @@ static void recv_generator(char *fname,
239 }
240
241
242-void generate_files(int f, struct file_list *flist, char *local_name)
243+void generate_files(int f, struct file_list *flist, char *local_name,
244+ int f_nameout)
245 {
246 int i;
247- int phase=0;
248+ int phase = 0;
249 char fbuf[MAXPATHLEN];
250
251 if (verbose > 2) {
252@@ -584,7 +698,7 @@ void generate_files(int f, struct file_l
253 }
254
255 recv_generator(local_name ? local_name : f_name_to(file, fbuf),
256- file, i, f);
257+ file, i, f, f_nameout);
258 }
259
260 phase++;
261@@ -601,7 +715,7 @@ void generate_files(int f, struct file_l
262 while ((i = get_redo_num()) != -1) {
263 struct file_struct *file = flist->files[i];
264 recv_generator(local_name ? local_name : f_name_to(file, fbuf),
265- file, i, f);
266+ file, i, f, f_nameout);
267 }
268
269 phase++;
270@@ -620,7 +734,7 @@ void generate_files(int f, struct file_l
271 if (!file->basename || !S_ISDIR(file->mode))
272 continue;
273 recv_generator(local_name ? local_name : f_name(file),
274- file, i, -1);
275+ file, i, -1, -1);
276 }
277
278 if (verbose > 2)
279--- main.c 28 Jun 2004 17:45:40 -0000 1.201
280+++ main.c 30 Jun 2004 07:35:57 -0000
281@@ -428,8 +428,8 @@ static void do_server_sender(int f_in, i
282 static int do_recv(int f_in,int f_out,struct file_list *flist,char *local_name)
283 {
284 int pid;
285- int status=0;
286- int error_pipe[2];
287+ int status = 0;
288+ int error_pipe[2], name_pipe[2];
289
290 if (preserve_hard_links)
291 init_hard_links(flist);
292@@ -441,17 +441,19 @@ static int do_recv(int f_in,int f_out,st
293 }
294 }
295
296- if (fd_pair(error_pipe) < 0) {
297- rprintf(FERROR,"error pipe failed in do_recv\n");
298+ if (fd_pair(error_pipe) < 0 || fd_pair(name_pipe) < 0) {
299+ rprintf(FERROR, "fd_pair() failed in do_recv\n");
300 exit_cleanup(RERR_SOCKETIO);
301 }
302
303 io_flush(NORMAL_FLUSH);
304
305- if ((pid=do_fork()) == 0) {
306+ if ((pid = do_fork()) == 0) {
307 close(error_pipe[0]);
308+ close(name_pipe[1]);
309 if (f_in != f_out)
310 close(f_out);
311+ set_blocking(name_pipe[0]);
312
313 /* we can't let two processes write to the socket at one time */
314 io_multiplexing_close();
315@@ -459,7 +461,7 @@ static int do_recv(int f_in,int f_out,st
316 /* set place to send errors */
317 set_msg_fd_out(error_pipe[1]);
318
319- recv_files(f_in,flist,local_name);
320+ recv_files(f_in, flist, local_name, name_pipe[0]);
321 io_flush(FULL_FLUSH);
322 report(f_in);
323
324@@ -475,14 +477,16 @@ static int do_recv(int f_in,int f_out,st
325 am_generator = 1;
326
327 close(error_pipe[1]);
328+ close(name_pipe[0]);
329 if (f_in != f_out)
330 close(f_in);
331+ set_blocking(name_pipe[1]);
332
333 io_start_buffering_out(f_out);
334
335 set_msg_fd_in(error_pipe[0]);
336
337- generate_files(f_out, flist, local_name);
338+ generate_files(f_out, flist, local_name, name_pipe[1]);
339
340 get_redo_num(); /* Read final MSG_DONE and any prior messages. */
341 report(-1);
47dd7a31 342--- options.c 20 Jun 2004 19:47:05 -0000 1.157
58118c25 343+++ options.c 30 Jun 2004 07:35:57 -0000
7628f156 344@@ -94,6 +94,7 @@ int ignore_errors = 0;
f74d2272
WD
345 int modify_window = 0;
346 int blocking_io = -1;
347 int checksum_seed = 0;
348+int fuzzy = 0;
349 unsigned int block_size = 0;
241013b4 350
241013b4 351
7628f156 352@@ -270,6 +271,7 @@ void usage(enum logcode F)
f0533c4c
WD
353 rprintf(F," -T --temp-dir=DIR create temporary files in directory DIR\n");
354 rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n");
355 rprintf(F," --link-dest=DIR create hardlinks to DIR for unchanged files\n");
356+ rprintf(F," --fuzzy use similar file as basis if basis doesn't exist\n");
357 rprintf(F," -P equivalent to --partial --progress\n");
358 rprintf(F," -z, --compress compress file data\n");
359 rprintf(F," -C, --cvs-exclude auto ignore files in the same way CVS does\n");
7628f156 360@@ -368,6 +370,7 @@ static struct poptOption long_options[]
f0533c4c
WD
361 {"temp-dir", 'T', POPT_ARG_STRING, &tmpdir, 0, 0, 0 },
362 {"compare-dest", 0, POPT_ARG_STRING, &compare_dest, 0, 0, 0 },
363 {"link-dest", 0, POPT_ARG_STRING, &compare_dest, OPT_LINK_DEST, 0, 0 },
f74d2272 364+ {"fuzzy", 0, POPT_ARG_NONE, &fuzzy, 0, 0, 0 },
f0533c4c
WD
365 /* TODO: Should this take an optional int giving the compression level? */
366 {"compress", 'z', POPT_ARG_NONE, &do_compression, 0, 0, 0 },
367 {"daemon", 0, POPT_ARG_NONE, &daemon_opt, 0, 0, 0 },
7628f156 368@@ -989,6 +992,9 @@ void server_options(char **args,int *arg
f74d2272 369 }
241013b4 370 }
7b675ff5 371
241013b4
MP
372+ if (fuzzy && am_sender)
373+ args[ac++] = "--fuzzy";
7b675ff5 374+
241013b4 375 *argc = ac;
f74d2272 376 return;
7b675ff5 377
47dd7a31 378--- receiver.c 29 Jun 2004 15:12:01 -0000 1.83
58118c25
WD
379+++ receiver.c 30 Jun 2004 07:35:57 -0000
380@@ -36,7 +36,6 @@ extern int preserve_perms;
381 extern int cvs_exclude;
382 extern int io_error;
383 extern char *tmpdir;
384-extern char *compare_dest;
385 extern int make_backups;
386 extern int do_progress;
387 extern char *backup_dir;
388@@ -293,14 +292,15 @@ static int receive_data(int f_in,struct
389 * main routine for receiver process.
390 *
391 * Receiver process runs on the same host as the generator process. */
392-int recv_files(int f_in,struct file_list *flist,char *local_name)
393+int recv_files(int f_in, struct file_list *flist, char *local_name,
394+ int f_name)
f74d2272 395 {
58118c25
WD
396 int fd1,fd2;
397 STRUCT_STAT st;
398 char *fname, fbuf[MAXPATHLEN];
399 char template[MAXPATHLEN];
400 char fnametmp[MAXPATHLEN];
401- char *fnamecmp;
402+ char *fnamecmp, *cp;
403 char fnamecmpbuf[MAXPATHLEN];
404 struct map_struct *mapbuf;
405 struct file_struct *file;
406@@ -364,19 +364,19 @@ int recv_files(int f_in,struct file_list
407 if (verbose > 2)
408 rprintf(FINFO,"recv_files(%s)\n",fname);
241013b4 409
58118c25
WD
410- fnamecmp = fname;
411+ for (cp = fnamecmpbuf; ; cp++) {
412+ if (read(f_name, cp, 1) <= 0) {
413+ rsyserr(FERROR, errno, "fname-pipe read failed");
414+ exit_cleanup(RERR_PROTOCOL);
415+ }
416+ if (!*cp)
417+ break;
8c5b8235 418+ }
58118c25
WD
419+ fnamecmp = *fnamecmpbuf ? fnamecmpbuf : fname;
420
421 /* open the file */
422 fd1 = do_open(fnamecmp, O_RDONLY, 0);
423
424- if (fd1 == -1 && compare_dest != NULL) {
425- /* try the file at compare_dest instead */
426- pathjoin(fnamecmpbuf, sizeof fnamecmpbuf,
427- compare_dest, fname);
428- fnamecmp = fnamecmpbuf;
429- fd1 = do_open(fnamecmp, O_RDONLY, 0);
430- }
431-
241013b4 432 if (fd1 != -1 && do_fstat(fd1,&st) != 0) {
fe6407b5 433 rsyserr(FERROR, errno, "fstat %s failed",
8c5b8235 434 full_fname(fnamecmp));
58118c25
WD
435@@ -385,7 +385,7 @@ int recv_files(int f_in,struct file_list
436 continue;
437 }
438
439- if (fd1 != -1 && S_ISDIR(st.st_mode) && fnamecmp == fname) {
440+ if (fd1 != -1 && S_ISDIR(st.st_mode) && !*fnamecmpbuf) {
441 /* this special handling for directories
442 * wouldn't be necessary if robust_rename()
443 * and the underlying robust_unlink could cope
7628f156 444--- rsync.yo 5 Jun 2004 16:16:30 -0000 1.171
58118c25 445+++ rsync.yo 30 Jun 2004 07:35:58 -0000
7628f156 446@@ -325,6 +325,7 @@ verb(
f0533c4c
WD
447 -T --temp-dir=DIR create temporary files in directory DIR
448 --compare-dest=DIR also compare received files relative to DIR
449 --link-dest=DIR create hardlinks to DIR for unchanged files
450+ --fuzzy use similar file as basis if basis is gone
451 -P equivalent to --partial --progress
452 -z, --compress compress file data
453 -C, --cvs-exclude auto ignore files in the same way CVS does