Transformed shell script into perl script and improved it to allow
[rsync/rsync-patches.git] / fuzzy.diff
CommitLineData
8c5b8235
WD
1Wayne Davison wrote:
2
3I greatly simplified the changes to generator.c, making the patch
4easier to maintain and fixing the failing test in the testsuite.
58118c25 5Very lightly tested.
241013b4 6
8cec1ead
WD
7--- generator.c 30 Jun 2004 07:27:30 -0000 1.93
8+++ generator.c 30 Jun 2004 07:43:46 -0000
58118c25
WD
9@@ -41,6 +41,7 @@ extern int ignore_times;
10 extern int size_only;
11 extern int io_timeout;
12 extern int protocol_version;
13+extern int fuzzy;
14 extern int always_checksum;
15 extern char *compare_dest;
16 extern int link_dest;
17@@ -257,6 +258,94 @@ static void generate_and_send_sums(struc
18 }
19
20
47dd7a31
WD
21+static void split_names(char *fname, char **dirname, char **basename)
22+{
23+ char *slash = strrchr(fname, '/');
24+ if (slash) {
25+ *dirname = fname;
26+ *slash = '\0';
27+ *basename = slash+1;
28+ } else {
29+ *basename = fname;
30+ *dirname = ".";
31+ }
32+}
33+
58118c25 34+
47dd7a31
WD
35+static unsigned int measure_name(const char *name, const char *basename,
36+ const char *ext)
37+{
38+ int namelen = strlen(name);
39+ int extlen = strlen(ext);
40+ unsigned int score = 0;
41+
42+ /* Extensions must match */
43+ if (namelen <= extlen || strcmp(name + namelen - extlen, ext) != 0)
44+ return 0;
45+
46+ /* Now score depends on similarity of prefix */
47+ for (; *name == *basename && *name; name++, basename++)
48+ score++;
49+ return score;
50+}
51+
58118c25
WD
52+
53+static int find_fuzzy(char **fname_ptr, char *buf, STRUCT_STAT *st_ptr)
47dd7a31
WD
54+{
55+ DIR *d;
56+ struct dirent *di;
57+ char *basename, *dirname;
58+ char mangled_name[MAXPATHLEN];
59+ char bestname[MAXPATHLEN];
60+ unsigned int bestscore = 0;
61+ const char *ext;
62+
8c5b8235 63+ strlcpy(mangled_name, *fname_ptr, sizeof mangled_name);
47dd7a31
WD
64+
65+ split_names(mangled_name, &dirname, &basename);
66+ if (!(d = opendir(dirname))) {
67+ rsyserr(FERROR, errno, "recv_generator opendir(%s)", dirname);
68+ return -1;
69+ }
70+
71+ /* Get final extension, eg. .gz; never full basename though. */
72+ ext = strrchr(basename + 1, '.');
73+ if (!ext)
74+ ext = basename + strlen(basename); /* ext = "" */
75+
76+ while ((di = readdir(d)) != NULL) {
77+ const char *dname = d_name(di);
78+ unsigned int score;
79+
80+ if (dname[0] == '.' && (dname[1] == '\0'
81+ || (dname[1] == '.' && dname[2] == '\0')))
82+ continue;
83+
84+ score = measure_name(dname, basename, ext);
85+ if (verbose > 4) {
8c5b8235
WD
86+ rprintf(FINFO, "[%s] fuzzy score for %s = %u\n",
87+ who_am_i(), dname, score);
47dd7a31
WD
88+ }
89+ if (score > bestscore) {
8c5b8235 90+ strlcpy(bestname, dname, sizeof bestname);
47dd7a31
WD
91+ bestscore = score;
92+ }
93+ }
94+ closedir(d);
95+
96+ /* Found a candidate. */
97+ if (bestscore != 0) {
8c5b8235 98+ pathjoin(buf, MAXPATHLEN, dirname, bestname);
47dd7a31 99+ if (verbose > 2) {
8c5b8235
WD
100+ rprintf(FINFO, "[%s] fuzzy match %s->%s\n",
101+ who_am_i(), *fname_ptr, buf);
47dd7a31 102+ }
8c5b8235 103+ *fname_ptr = buf;
58118c25 104+ return link_stat(buf, st_ptr, 0);
47dd7a31
WD
105+ }
106+ return -1;
107+}
58118c25
WD
108+
109
110 /*
111 * Acts on file number @p i from @p flist, whose name is @p fname.
112@@ -267,12 +356,12 @@ static void generate_and_send_sums(struc
113 * out. It might be wrong.
114 */
115 static void recv_generator(char *fname, struct file_struct *file, int i,
116- int f_out)
117+ int f_out, int f_nameout)
118 {
8c5b8235 119 int fd;
f74d2272
WD
120 STRUCT_STAT st;
121 struct map_struct *mapbuf;
8c5b8235
WD
122- int statret;
123+ int statret, fuzzy_file = 0;
124 char *fnamecmp;
125 char fnamecmpbuf[MAXPATHLEN];
54691942 126
58118c25
WD
127@@ -431,8 +520,10 @@ static void recv_generator(char *fname,
128 statret = link_stat(fnamecmpbuf, &st, 0);
129 if (!S_ISREG(st.st_mode))
130 statret = -1;
131- if (statret == -1)
132+ if (statret < 0) {
133 errno = saveerrno;
134+ *fnamecmpbuf = '\0';
135+ }
136 #if HAVE_LINK
137 else if (link_dest && !dry_run) {
138 if (do_link(fnamecmpbuf, fname) != 0) {
139@@ -440,18 +531,30 @@ static void recv_generator(char *fname,
140 rsyserr(FINFO, errno, "link %s => %s",
141 fnamecmpbuf, fname);
142 }
143- }
144- fnamecmp = fnamecmpbuf;
145+ fnamecmp = fnamecmpbuf;
146+ } else
147+ *fnamecmpbuf = '\0';
148 }
149 #endif
150 else
8c5b8235 151 fnamecmp = fnamecmpbuf;
58118c25
WD
152+ } else
153+ *fnamecmpbuf = '\0';
154+
8c5b8235 155+ if (statret == -1 && fuzzy) {
58118c25 156+ statret = find_fuzzy(&fnamecmp, fnamecmpbuf, &st);
8c5b8235
WD
157+ if (!S_ISREG(st.st_mode))
158+ statret = -1;
159+ else
160+ fuzzy_file = 1;
58118c25
WD
161 }
162
8c5b8235 163 if (statret == -1) {
7628f156
WD
164 if (preserve_hard_links && hard_link_check(file, HL_SKIP))
165 return;
58118c25
WD
166 if (errno == ENOENT) {
167+ if (f_nameout >= 0)
168+ write(f_nameout, "", 1);
169 write_int(f_out,i);
170 if (!dry_run)
171 write_sum_head(f_out, NULL);
172@@ -471,37 +574,43 @@ static void recv_generator(char *fname,
173 /* now pretend the file didn't exist */
174 if (preserve_hard_links && hard_link_check(file, HL_SKIP))
175 return;
176+ if (f_nameout >= 0)
177+ write(f_nameout, "", 1);
178 write_int(f_out,i);
179 if (!dry_run)
180 write_sum_head(f_out, NULL);
181 return;
182 }
183
184- if (opt_ignore_existing && fnamecmp == fname) {
185+ if (opt_ignore_existing && !*fnamecmpbuf) {
186 if (verbose > 1)
187 rprintf(FINFO,"%s exists\n",fname);
188 return;
189 }
190
191- if (update_only && fnamecmp == fname
192+ if (update_only && !*fnamecmpbuf
193 && cmp_modtime(st.st_mtime, file->modtime) > 0) {
194 if (verbose > 1)
195 rprintf(FINFO,"%s is newer\n",fname);
241013b4
MP
196 return;
197 }
198
8c5b8235 199- if (skip_file(fname, file, &st)) {
58118c25 200- if (fnamecmp == fname)
8c5b8235 201+ if (!fuzzy_file && skip_file(fname, file, &st)) {
58118c25 202+ if (!*fnamecmpbuf)
8c5b8235
WD
203 set_perms(fname, file, &st, PERMS_REPORT);
204 return;
58118c25
WD
205 }
206
207 if (dry_run) {
208+ if (f_nameout >= 0)
209+ write(f_nameout, "", 1);
210 write_int(f_out,i);
211 return;
212 }
213
214 if (disable_deltas_p()) {
215+ if (f_nameout >= 0)
216+ write(f_nameout, "", 1);
217 write_int(f_out,i);
218 write_sum_head(f_out, NULL);
219 return;
220@@ -516,6 +625,8 @@ static void recv_generator(char *fname,
221 /* pretend the file didn't exist */
222 if (preserve_hard_links && hard_link_check(file, HL_SKIP))
223 return;
224+ if (f_nameout >= 0)
225+ write(f_nameout, "", 1);
226 write_int(f_out,i);
227 write_sum_head(f_out, NULL);
228 return;
229@@ -534,6 +645,8 @@ static void recv_generator(char *fname,
230 if (verbose > 2)
231 rprintf(FINFO, "generating and sending sums for %d\n", i);
232
233+ if (f_nameout >= 0)
234+ write(f_nameout, fnamecmpbuf, strlen(fnamecmpbuf) + 1);
235 write_int(f_out,i);
236 generate_and_send_sums(mapbuf, st.st_size, f_out);
237
8cec1ead 238@@ -543,7 +656,8 @@ static void recv_generator(char *fname,
58118c25
WD
239 }
240
241
242-void generate_files(int f, struct file_list *flist, char *local_name)
243+void generate_files(int f, struct file_list *flist, char *local_name,
244+ int f_nameout)
245 {
246 int i;
8cec1ead 247 int phase = 0;
58118c25
WD
248@@ -584,7 +698,7 @@ void generate_files(int f, struct file_l
249 }
250
251 recv_generator(local_name ? local_name : f_name_to(file, fbuf),
252- file, i, f);
253+ file, i, f, f_nameout);
254 }
255
256 phase++;
257@@ -601,7 +715,7 @@ void generate_files(int f, struct file_l
258 while ((i = get_redo_num()) != -1) {
259 struct file_struct *file = flist->files[i];
260 recv_generator(local_name ? local_name : f_name_to(file, fbuf),
261- file, i, f);
262+ file, i, f, f_nameout);
263 }
264
265 phase++;
266@@ -620,7 +734,7 @@ void generate_files(int f, struct file_l
267 if (!file->basename || !S_ISDIR(file->mode))
268 continue;
269 recv_generator(local_name ? local_name : f_name(file),
270- file, i, -1);
271+ file, i, -1, -1);
272 }
273
274 if (verbose > 2)
8cec1ead
WD
275--- main.c 30 Jun 2004 07:27:30 -0000 1.202
276+++ main.c 30 Jun 2004 07:43:47 -0000
277@@ -429,7 +429,7 @@ static int do_recv(int f_in,int f_out,st
58118c25
WD
278 {
279 int pid;
8cec1ead 280 int status = 0;
58118c25 281- int error_pipe[2];
58118c25
WD
282+ int error_pipe[2], name_pipe[2];
283
284 if (preserve_hard_links)
285 init_hard_links(flist);
8cec1ead 286@@ -441,8 +441,8 @@ static int do_recv(int f_in,int f_out,st
58118c25
WD
287 }
288 }
289
290- if (fd_pair(error_pipe) < 0) {
291- rprintf(FERROR,"error pipe failed in do_recv\n");
292+ if (fd_pair(error_pipe) < 0 || fd_pair(name_pipe) < 0) {
293+ rprintf(FERROR, "fd_pair() failed in do_recv\n");
294 exit_cleanup(RERR_SOCKETIO);
295 }
296
8cec1ead 297@@ -450,8 +450,10 @@ static int do_recv(int f_in,int f_out,st
58118c25 298
8cec1ead 299 if ((pid = do_fork()) == 0) {
58118c25
WD
300 close(error_pipe[0]);
301+ close(name_pipe[1]);
302 if (f_in != f_out)
303 close(f_out);
304+ set_blocking(name_pipe[0]);
305
306 /* we can't let two processes write to the socket at one time */
307 io_multiplexing_close();
308@@ -459,7 +461,7 @@ static int do_recv(int f_in,int f_out,st
309 /* set place to send errors */
310 set_msg_fd_out(error_pipe[1]);
311
312- recv_files(f_in,flist,local_name);
313+ recv_files(f_in, flist, local_name, name_pipe[0]);
314 io_flush(FULL_FLUSH);
315 report(f_in);
316
317@@ -475,14 +477,16 @@ static int do_recv(int f_in,int f_out,st
318 am_generator = 1;
319
320 close(error_pipe[1]);
321+ close(name_pipe[0]);
322 if (f_in != f_out)
323 close(f_in);
324+ set_blocking(name_pipe[1]);
325
326 io_start_buffering_out(f_out);
327
328 set_msg_fd_in(error_pipe[0]);
329
330- generate_files(f_out, flist, local_name);
331+ generate_files(f_out, flist, local_name, name_pipe[1]);
332
333 get_redo_num(); /* Read final MSG_DONE and any prior messages. */
334 report(-1);
47dd7a31 335--- options.c 20 Jun 2004 19:47:05 -0000 1.157
8cec1ead 336+++ options.c 30 Jun 2004 07:43:47 -0000
7628f156 337@@ -94,6 +94,7 @@ int ignore_errors = 0;
f74d2272
WD
338 int modify_window = 0;
339 int blocking_io = -1;
340 int checksum_seed = 0;
341+int fuzzy = 0;
342 unsigned int block_size = 0;
241013b4 343
241013b4 344
7628f156 345@@ -270,6 +271,7 @@ void usage(enum logcode F)
f0533c4c
WD
346 rprintf(F," -T --temp-dir=DIR create temporary files in directory DIR\n");
347 rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n");
348 rprintf(F," --link-dest=DIR create hardlinks to DIR for unchanged files\n");
349+ rprintf(F," --fuzzy use similar file as basis if basis doesn't exist\n");
350 rprintf(F," -P equivalent to --partial --progress\n");
351 rprintf(F," -z, --compress compress file data\n");
352 rprintf(F," -C, --cvs-exclude auto ignore files in the same way CVS does\n");
7628f156 353@@ -368,6 +370,7 @@ static struct poptOption long_options[]
f0533c4c
WD
354 {"temp-dir", 'T', POPT_ARG_STRING, &tmpdir, 0, 0, 0 },
355 {"compare-dest", 0, POPT_ARG_STRING, &compare_dest, 0, 0, 0 },
356 {"link-dest", 0, POPT_ARG_STRING, &compare_dest, OPT_LINK_DEST, 0, 0 },
f74d2272 357+ {"fuzzy", 0, POPT_ARG_NONE, &fuzzy, 0, 0, 0 },
f0533c4c
WD
358 /* TODO: Should this take an optional int giving the compression level? */
359 {"compress", 'z', POPT_ARG_NONE, &do_compression, 0, 0, 0 },
360 {"daemon", 0, POPT_ARG_NONE, &daemon_opt, 0, 0, 0 },
7628f156 361@@ -989,6 +992,9 @@ void server_options(char **args,int *arg
f74d2272 362 }
241013b4 363 }
7b675ff5 364
241013b4
MP
365+ if (fuzzy && am_sender)
366+ args[ac++] = "--fuzzy";
7b675ff5 367+
241013b4 368 *argc = ac;
f74d2272 369 return;
7b675ff5 370
8cec1ead
WD
371--- receiver.c 30 Jun 2004 07:27:30 -0000 1.84
372+++ receiver.c 30 Jun 2004 07:43:47 -0000
58118c25
WD
373@@ -36,7 +36,6 @@ extern int preserve_perms;
374 extern int cvs_exclude;
375 extern int io_error;
376 extern char *tmpdir;
377-extern char *compare_dest;
378 extern int make_backups;
379 extern int do_progress;
380 extern char *backup_dir;
381@@ -293,14 +292,15 @@ static int receive_data(int f_in,struct
382 * main routine for receiver process.
383 *
384 * Receiver process runs on the same host as the generator process. */
385-int recv_files(int f_in,struct file_list *flist,char *local_name)
386+int recv_files(int f_in, struct file_list *flist, char *local_name,
387+ int f_name)
f74d2272 388 {
58118c25
WD
389 int fd1,fd2;
390 STRUCT_STAT st;
391 char *fname, fbuf[MAXPATHLEN];
392 char template[MAXPATHLEN];
393 char fnametmp[MAXPATHLEN];
394- char *fnamecmp;
395+ char *fnamecmp, *cp;
396 char fnamecmpbuf[MAXPATHLEN];
397 struct map_struct *mapbuf;
398 struct file_struct *file;
399@@ -364,19 +364,19 @@ int recv_files(int f_in,struct file_list
400 if (verbose > 2)
401 rprintf(FINFO,"recv_files(%s)\n",fname);
241013b4 402
58118c25
WD
403- fnamecmp = fname;
404+ for (cp = fnamecmpbuf; ; cp++) {
405+ if (read(f_name, cp, 1) <= 0) {
406+ rsyserr(FERROR, errno, "fname-pipe read failed");
407+ exit_cleanup(RERR_PROTOCOL);
408+ }
409+ if (!*cp)
410+ break;
8c5b8235 411+ }
58118c25
WD
412+ fnamecmp = *fnamecmpbuf ? fnamecmpbuf : fname;
413
414 /* open the file */
415 fd1 = do_open(fnamecmp, O_RDONLY, 0);
416
417- if (fd1 == -1 && compare_dest != NULL) {
418- /* try the file at compare_dest instead */
419- pathjoin(fnamecmpbuf, sizeof fnamecmpbuf,
420- compare_dest, fname);
421- fnamecmp = fnamecmpbuf;
422- fd1 = do_open(fnamecmp, O_RDONLY, 0);
423- }
424-
241013b4 425 if (fd1 != -1 && do_fstat(fd1,&st) != 0) {
fe6407b5 426 rsyserr(FERROR, errno, "fstat %s failed",
8c5b8235 427 full_fname(fnamecmp));
58118c25
WD
428@@ -385,7 +385,7 @@ int recv_files(int f_in,struct file_list
429 continue;
430 }
431
432- if (fd1 != -1 && S_ISDIR(st.st_mode) && fnamecmp == fname) {
433+ if (fd1 != -1 && S_ISDIR(st.st_mode) && !*fnamecmpbuf) {
434 /* this special handling for directories
435 * wouldn't be necessary if robust_rename()
436 * and the underlying robust_unlink could cope
7628f156 437--- rsync.yo 5 Jun 2004 16:16:30 -0000 1.171
8cec1ead 438+++ rsync.yo 30 Jun 2004 07:43:48 -0000
7628f156 439@@ -325,6 +325,7 @@ verb(
f0533c4c
WD
440 -T --temp-dir=DIR create temporary files in directory DIR
441 --compare-dest=DIR also compare received files relative to DIR
442 --link-dest=DIR create hardlinks to DIR for unchanged files
443+ --fuzzy use similar file as basis if basis is gone
444 -P equivalent to --partial --progress
445 -z, --compress compress file data
446 -C, --cvs-exclude auto ignore files in the same way CVS does