Added back the alternate.c file that got lost from the patch.
[rsync/rsync-patches.git] / fuzzy.diff
CommitLineData
47dd7a31
WD
1Updated for current CVS version by Wayne Davison. Passes *MOST* of the
2test suite, but otherwise UNTESTED.
241013b4 3
fe6407b5 4--- Makefile.in 15 May 2004 00:48:11 -0000 1.101
47dd7a31 5+++ Makefile.in 29 Jun 2004 15:14:48 -0000
54691942 6@@ -32,7 +32,7 @@ ZLIBOBJ=zlib/deflate.o zlib/infblock.o z
241013b4 7 zlib/inflate.o zlib/inftrees.o zlib/infutil.o zlib/trees.o \
f74d2272
WD
8 zlib/zutil.o zlib/adler32.o
9 OBJS1=rsync.o generator.o receiver.o cleanup.o sender.o exclude.o util.o \
10- main.o checksum.o match.o syscall.o log.o backup.o
11+ main.o checksum.o match.o syscall.o log.o backup.o alternate.o
12 OBJS2=options.o flist.o io.o compat.o hlink.o token.o uidlist.o socket.o \
13 fileio.o batch.o clientname.o
14 OBJS3=progress.o pipe.o
47dd7a31
WD
15--- /dev/null 1 Jan 1970 00:00:00 -0000
16+++ alternate.c 29 Jun 2004 15:14:48 -0000
17@@ -0,0 +1,114 @@
18+#include "rsync.h"
19+
20+extern char *compare_dest;
21+extern int verbose;
22+
23+/* Alternate methods for opening files, if local doesn't exist */
24+/* Sanity check that we are about to open regular file */
25+int do_open_regular(char *fname)
26+{
27+ STRUCT_STAT st;
28+
29+ if (do_stat(fname, &st) == 0 && S_ISREG(st.st_mode))
30+ return do_open(fname, O_RDONLY, 0);
31+
32+ return -1;
33+}
34+
35+static void split_names(char *fname, char **dirname, char **basename)
36+{
37+ char *slash = strrchr(fname, '/');
38+ if (slash) {
39+ *dirname = fname;
40+ *slash = '\0';
41+ *basename = slash+1;
42+ } else {
43+ *basename = fname;
44+ *dirname = ".";
45+ }
46+}
47+
48+static unsigned int measure_name(const char *name, const char *basename,
49+ const char *ext)
50+{
51+ int namelen = strlen(name);
52+ int extlen = strlen(ext);
53+ unsigned int score = 0;
54+
55+ /* Extensions must match */
56+ if (namelen <= extlen || strcmp(name + namelen - extlen, ext) != 0)
57+ return 0;
58+
59+ /* Now score depends on similarity of prefix */
60+ for (; *name == *basename && *name; name++, basename++)
61+ score++;
62+ return score;
63+}
64+
65+int open_alternate_base_fuzzy(const char *fname)
66+{
67+ DIR *d;
68+ struct dirent *di;
69+ char *basename, *dirname;
70+ char mangled_name[MAXPATHLEN];
71+ char bestname[MAXPATHLEN];
72+ unsigned int bestscore = 0;
73+ const char *ext;
74+
75+ strlcpy(mangled_name, fname, sizeof mangled_name);
76+
77+ split_names(mangled_name, &dirname, &basename);
78+ if (!(d = opendir(dirname))) {
79+ rsyserr(FERROR, errno, "recv_generator opendir(%s)", dirname);
80+ return -1;
81+ }
82+
83+ /* Get final extension, eg. .gz; never full basename though. */
84+ ext = strrchr(basename + 1, '.');
85+ if (!ext)
86+ ext = basename + strlen(basename); /* ext = "" */
87+
88+ while ((di = readdir(d)) != NULL) {
89+ const char *dname = d_name(di);
90+ unsigned int score;
91+
92+ if (dname[0] == '.' && (dname[1] == '\0'
93+ || (dname[1] == '.' && dname[2] == '\0')))
94+ continue;
95+
96+ score = measure_name(dname, basename, ext);
97+ if (verbose > 4) {
98+ rprintf(FINFO,"fuzzy score for %s = %u\n",
99+ dname, score);
100+ }
101+ if (score > bestscore) {
102+ strcpy(bestname, dname);
103+ bestscore = score;
104+ }
105+ }
106+ closedir(d);
107+
108+ /* Found a candidate. */
109+ if (bestscore != 0) {
110+ char fuzzyname[MAXPATHLEN];
111+
112+ pathjoin(fuzzyname,sizeof fuzzyname, dirname, bestname);
113+ if (verbose > 2) {
114+ rprintf(FINFO, "fuzzy match %s->%s\n",
115+ fname, fuzzyname);
116+ }
117+ return do_open_regular(fuzzyname);
118+ }
119+ return -1;
120+}
121+
122+int open_alternate_base_comparedir(const char *fname)
123+{
124+ char fnamebuf[MAXPATHLEN];
125+
126+ /* try the file at compare_dest instead */
127+ pathjoin(fnamebuf, sizeof fnamebuf, compare_dest, fname);
128+
129+ /* FIXME: now follows symlinks... */
130+ return do_open_regular(fnamebuf);
131+}
132--- generator.c 23 Jun 2004 21:21:19 -0000 1.90
133+++ generator.c 29 Jun 2004 15:14:48 -0000
7628f156
WD
134@@ -41,6 +41,7 @@ extern int ignore_times;
135 extern int size_only;
136 extern int io_timeout;
137 extern int protocol_version;
241013b4 138+extern int fuzzy;
7628f156
WD
139 extern int always_checksum;
140 extern char *compare_dest;
141 extern int link_dest;
47dd7a31 142@@ -256,7 +257,61 @@ static void generate_and_send_sums(struc
f74d2272 143 }
241013b4
MP
144 }
145
146+/* Returns -1 for can't open (null file), -2 for skip */
f74d2272 147+static int open_base_file(struct file_struct *file, char *fname, int statret,
241013b4
MP
148+ STRUCT_STAT *st)
149+{
150+ int fd = -1;
7628f156 151
241013b4
MP
152+ if (statret == 0) {
153+ if (S_ISREG(st->st_mode)) {
154+ if (update_only
155+ && cmp_modtime(st->st_mtime, file->modtime) > 0) {
156+ if (verbose > 1)
f74d2272 157+ rprintf(FINFO, "%s is newer\n", fname);
241013b4
MP
158+ return -2;
159+ }
160+ if (skip_file(fname, file, st)) {
7f2baf27 161+ set_perms(fname, file, st, PERMS_REPORT);
241013b4
MP
162+ return -2;
163+ }
164+ fd = do_open(fname, O_RDONLY, 0);
165+ if (fd == -1) {
fe6407b5
WD
166+ rsyserr(FERROR, errno, "failed to open %s, continuing",
167+ full_fname(fname));
241013b4 168+ return -1;
f74d2272
WD
169+ }
170+ return fd;
241013b4
MP
171+ } else {
172+ /* Try to use symlink contents */
173+ if (S_ISLNK(st->st_mode)) {
174+ fd = do_open_regular(fname);
175+ /* Don't delete yet; receiver will need it */
176+ } else {
177+ if (delete_file(fname) != 0) {
178+ if (fd != -1)
179+ close(fd);
180+ return -2;
181+ }
182+ }
183+ }
184+ }
185+
186+ if (fd == -1 && compare_dest != NULL)
187+ fd = open_alternate_base_comparedir(fname);
7b675ff5 188+
241013b4
MP
189+ if (fd == -1 && fuzzy)
190+ fd = open_alternate_base_fuzzy(fname);
191+
192+ /* Update stat to understand size */
193+ if (fd != -1) {
f74d2272 194+ if (do_fstat(fd, st) != 0) {
fe6407b5 195+ rsyserr(FERROR, errno, "fstat %s", full_fname(fname));
f74d2272 196+ }
241013b4 197+ }
7628f156 198+
241013b4
MP
199+ return fd;
200+}
201
f74d2272
WD
202 /**
203 * Acts on file number @p i from @p flist, whose name is @p fname.
47dd7a31 204@@ -272,8 +327,6 @@ void recv_generator(char *fname, struct
f74d2272
WD
205 STRUCT_STAT st;
206 struct map_struct *mapbuf;
241013b4 207 int statret;
241013b4
MP
208- char *fnamecmp;
209- char fnamecmpbuf[MAXPATHLEN];
54691942
WD
210
211 if (list_only)
212 return;
47dd7a31 213@@ -413,109 +466,39 @@ void recv_generator(char *fname, struct
241013b4 214 }
f74d2272 215 #endif
241013b4 216
f74d2272
WD
217- if (preserve_hard_links && hard_link_check(file, HL_CHECK_MASTER))
218- return;
219-
220- if (!S_ISREG(file->mode)) {
221- rprintf(FINFO, "skipping non-regular file \"%s\"\n",fname);
222- return;
223- }
224-
241013b4
MP
225- fnamecmp = fname;
226-
f74d2272 227- if (statret == -1 && compare_dest != NULL) {
241013b4
MP
228- /* try the file at compare_dest instead */
229- int saveerrno = errno;
f74d2272 230- pathjoin(fnamecmpbuf, sizeof fnamecmpbuf, compare_dest, fname);
7628f156 231- statret = link_stat(fnamecmpbuf, &st, 0);
241013b4
MP
232- if (!S_ISREG(st.st_mode))
233- statret = -1;
234- if (statret == -1)
235- errno = saveerrno;
f74d2272
WD
236-#if HAVE_LINK
237- else if (link_dest && !dry_run) {
238- if (do_link(fnamecmpbuf, fname) != 0) {
239- if (verbose > 0) {
fe6407b5
WD
240- rsyserr(FINFO, errno, "link %s => %s",
241- fnamecmpbuf, fname);
f74d2272
WD
242- }
243- }
244- fnamecmp = fnamecmpbuf;
245- }
246-#endif
241013b4
MP
247- else
248- fnamecmp = fnamecmpbuf;
249- }
250-
251- if (statret == -1) {
f74d2272
WD
252- if (preserve_hard_links && hard_link_check(file, HL_SKIP))
253- return;
241013b4
MP
254- if (errno == ENOENT) {
255- write_int(f_out,i);
7628f156
WD
256- if (!dry_run)
257- write_sum_head(f_out, NULL);
f74d2272
WD
258- } else if (verbose > 1) {
259+ /* Failed to stat for some reason besides "not found". */
260+ if (statret == -1 && errno != ENOENT) {
fe6407b5
WD
261+ if (verbose > 1) {
262 rsyserr(FERROR, errno,
263- "recv_generator: failed to open %s",
264+ "recv_generator failed to stat %s",
265 full_fname(fname));
266 }
f74d2272
WD
267 return;
268 }
269
241013b4
MP
270- if (!S_ISREG(st.st_mode)) {
271- if (delete_file(fname) != 0) {
272- return;
273- }
7628f156
WD
274+ if ((fd = open_base_file(file, fname, statret, &st)) == -2)
275+ return;
276
241013b4 277- /* now pretend the file didn't exist */
7628f156
WD
278+ if ((disable_deltas_p() || dry_run) && fd != -1) {
279+ close(fd);
280+ fd = -1;
281+ }
282+
283+ if (fd == -1) {
284+ /* the file didn't exist, or we can pretend it doesn't */
285 if (preserve_hard_links && hard_link_check(file, HL_SKIP))
286 return;
241013b4 287- write_int(f_out,i);
7628f156
WD
288+ write_int(f_out, i);
289 if (!dry_run)
290 write_sum_head(f_out, NULL);
fe6407b5 291- return;
241013b4
MP
292- }
293-
f74d2272 294- if (opt_ignore_existing && fnamecmp == fname) {
241013b4
MP
295- if (verbose > 1)
296- rprintf(FINFO,"%s exists\n",fname);
7b675ff5 297- return;
f74d2272 298- }
7628f156
WD
299+ return;
300+ }
301
241013b4 302- if (update_only && cmp_modtime(st.st_mtime,file->modtime)>0 && fnamecmp == fname) {
f74d2272 303- if (verbose > 1)
241013b4 304- rprintf(FINFO,"%s is newer\n",fname);
f74d2272 305- return;
7b675ff5
WD
306- }
307-
241013b4
MP
308- if (skip_file(fname, file, &st)) {
309- if (fnamecmp == fname)
7f2baf27 310- set_perms(fname, file, &st, PERMS_REPORT);
241013b4
MP
311- return;
312- }
313-
314- if (dry_run) {
315- write_int(f_out,i);
7628f156 316- return;
241013b4 317- }
7628f156 318-
f74d2272 319- if (disable_deltas_p()) {
241013b4 320- write_int(f_out,i);
f74d2272 321- write_sum_head(f_out, NULL);
7628f156
WD
322+ if (preserve_hard_links && hard_link_check(file, HL_CHECK_MASTER))
323 return;
324- }
7b675ff5 325
f74d2272 326- /* open the file */
241013b4 327- fd = do_open(fnamecmp, O_RDONLY, 0);
f74d2272 328-
7628f156 329- if (fd == -1) {
fe6407b5
WD
330- rsyserr(FERROR, errno, "failed to open %s, continuing",
331- full_fname(fnamecmp));
241013b4 332- /* pretend the file didn't exist */
7628f156
WD
333- if (preserve_hard_links && hard_link_check(file, HL_SKIP))
334- return;
f74d2272
WD
335- write_int(f_out,i);
336- write_sum_head(f_out, NULL);
f74d2272
WD
337+ if (!S_ISREG(file->mode)) {
338+ rprintf(FINFO, "skipping non-regular file \"%s\"\n",fname);
241013b4
MP
339 return;
340 }
341
47dd7a31 342@@ -525,7 +508,7 @@ void recv_generator(char *fname, struct
f74d2272 343 mapbuf = NULL;
241013b4 344
f74d2272
WD
345 if (verbose > 3) {
346- rprintf(FINFO,"gen mapped %s of size %.0f\n", fnamecmp,
347+ rprintf(FINFO, "gen mapped %s of size %.0f\n", fname,
348 (double)st.st_size);
349 }
241013b4 350
47dd7a31
WD
351--- options.c 20 Jun 2004 19:47:05 -0000 1.157
352+++ options.c 29 Jun 2004 15:14:48 -0000
7628f156 353@@ -94,6 +94,7 @@ int ignore_errors = 0;
f74d2272
WD
354 int modify_window = 0;
355 int blocking_io = -1;
356 int checksum_seed = 0;
357+int fuzzy = 0;
358 unsigned int block_size = 0;
241013b4 359
241013b4 360
7628f156 361@@ -270,6 +271,7 @@ void usage(enum logcode F)
f0533c4c
WD
362 rprintf(F," -T --temp-dir=DIR create temporary files in directory DIR\n");
363 rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n");
364 rprintf(F," --link-dest=DIR create hardlinks to DIR for unchanged files\n");
365+ rprintf(F," --fuzzy use similar file as basis if basis doesn't exist\n");
366 rprintf(F," -P equivalent to --partial --progress\n");
367 rprintf(F," -z, --compress compress file data\n");
368 rprintf(F," -C, --cvs-exclude auto ignore files in the same way CVS does\n");
7628f156 369@@ -368,6 +370,7 @@ static struct poptOption long_options[]
f0533c4c
WD
370 {"temp-dir", 'T', POPT_ARG_STRING, &tmpdir, 0, 0, 0 },
371 {"compare-dest", 0, POPT_ARG_STRING, &compare_dest, 0, 0, 0 },
372 {"link-dest", 0, POPT_ARG_STRING, &compare_dest, OPT_LINK_DEST, 0, 0 },
f74d2272 373+ {"fuzzy", 0, POPT_ARG_NONE, &fuzzy, 0, 0, 0 },
f0533c4c
WD
374 /* TODO: Should this take an optional int giving the compression level? */
375 {"compress", 'z', POPT_ARG_NONE, &do_compression, 0, 0, 0 },
376 {"daemon", 0, POPT_ARG_NONE, &daemon_opt, 0, 0, 0 },
7628f156 377@@ -989,6 +992,9 @@ void server_options(char **args,int *arg
f74d2272 378 }
241013b4 379 }
7b675ff5 380
241013b4
MP
381+ if (fuzzy && am_sender)
382+ args[ac++] = "--fuzzy";
7b675ff5 383+
241013b4 384 *argc = ac;
f74d2272 385 return;
7b675ff5 386
47dd7a31
WD
387--- receiver.c 29 Jun 2004 15:12:01 -0000 1.83
388+++ receiver.c 29 Jun 2004 15:14:48 -0000
7628f156 389@@ -48,6 +48,7 @@ extern int ignore_errors;
54691942 390 extern int orig_umask;
7f2baf27 391 extern int keep_partial;
7b675ff5 392 extern int checksum_seed;
241013b4
MP
393+extern int fuzzy;
394
f74d2272
WD
395 static void delete_one(char *fn, int is_dir)
396 {
7628f156 397@@ -300,8 +301,6 @@ int recv_files(int f_in,struct file_list
f74d2272 398 char *fname, fbuf[MAXPATHLEN];
241013b4
MP
399 char template[MAXPATHLEN];
400 char fnametmp[MAXPATHLEN];
401- char *fnamecmp;
402- char fnamecmpbuf[MAXPATHLEN];
f74d2272 403 struct map_struct *mapbuf;
241013b4 404 struct file_struct *file;
7628f156
WD
405 struct stats initial_stats;
406@@ -364,35 +363,31 @@ int recv_files(int f_in,struct file_list
241013b4
MP
407 if (verbose > 2)
408 rprintf(FINFO,"recv_files(%s)\n",fname);
409
410- fnamecmp = fname;
411-
f74d2272 412 /* open the file */
241013b4
MP
413- fd1 = do_open(fnamecmp, O_RDONLY, 0);
414+ fd1 = do_open(fname, O_RDONLY, 0);
415
f74d2272 416- if (fd1 == -1 && compare_dest != NULL) {
241013b4 417- /* try the file at compare_dest instead */
f74d2272
WD
418- pathjoin(fnamecmpbuf, sizeof fnamecmpbuf,
419- compare_dest, fname);
241013b4
MP
420- fnamecmp = fnamecmpbuf;
421- fd1 = do_open(fnamecmp, O_RDONLY, 0);
422- }
423+ if (fd1 == -1 && compare_dest != NULL)
424+ fd1 = open_alternate_base_comparedir(fname);
425+
426+ if (fd1 == -1 && fuzzy)
427+ fd1 = open_alternate_base_fuzzy(fname);
428
429 if (fd1 != -1 && do_fstat(fd1,&st) != 0) {
fe6407b5
WD
430 rsyserr(FERROR, errno, "fstat %s failed",
431- full_fname(fnamecmp));
432+ full_fname(fname));
241013b4
MP
433 receive_data(f_in,NULL,-1,NULL,file->length);
434 close(fd1);
435 continue;
436 }
437
f74d2272
WD
438- if (fd1 != -1 && S_ISDIR(st.st_mode) && fnamecmp == fname) {
439+ if (fd1 != -1 && S_ISDIR(st.st_mode)) {
440 /* this special handling for directories
441 * wouldn't be necessary if robust_rename()
442 * and the underlying robust_unlink could cope
443 * with directories
444 */
445 rprintf(FERROR,"recv_files: %s is a directory\n",
446- full_fname(fnamecmp));
447+ full_fname(fname));
448 receive_data(f_in, NULL, -1, NULL, file->length);
241013b4
MP
449 close(fd1);
450 continue;
47dd7a31 451@@ -415,7 +410,7 @@ int recv_files(int f_in,struct file_list
f74d2272 452 mapbuf = map_file(fd1,st.st_size);
47dd7a31
WD
453 if (verbose > 2) {
454 rprintf(FINFO, "recv mapped %s of size %.0f\n",
455- fnamecmp, (double)st.st_size);
456+ fname, (double)st.st_size);
457 }
f74d2272
WD
458 } else
459 mapbuf = NULL;
7628f156 460--- rsync.yo 5 Jun 2004 16:16:30 -0000 1.171
47dd7a31 461+++ rsync.yo 29 Jun 2004 15:14:49 -0000
7628f156 462@@ -325,6 +325,7 @@ verb(
f0533c4c
WD
463 -T --temp-dir=DIR create temporary files in directory DIR
464 --compare-dest=DIR also compare received files relative to DIR
465 --link-dest=DIR create hardlinks to DIR for unchanged files
466+ --fuzzy use similar file as basis if basis is gone
467 -P equivalent to --partial --progress
468 -z, --compress compress file data
469 -C, --cvs-exclude auto ignore files in the same way CVS does