Merged in the g2r-basis-filename.diff changes so that the receiver
[rsync/rsync-patches.git] / fuzzy.diff
CommitLineData
8c5b8235
WD
1Wayne Davison wrote:
2
3I greatly simplified the changes to generator.c, making the patch
4easier to maintain and fixing the failing test in the testsuite.
5Very lightly tested (by me).
241013b4 6
fe6407b5 7--- Makefile.in 15 May 2004 00:48:11 -0000 1.101
8c5b8235 8+++ Makefile.in 29 Jun 2004 17:46:12 -0000
54691942 9@@ -32,7 +32,7 @@ ZLIBOBJ=zlib/deflate.o zlib/infblock.o z
241013b4 10 zlib/inflate.o zlib/inftrees.o zlib/infutil.o zlib/trees.o \
f74d2272
WD
11 zlib/zutil.o zlib/adler32.o
12 OBJS1=rsync.o generator.o receiver.o cleanup.o sender.o exclude.o util.o \
13- main.o checksum.o match.o syscall.o log.o backup.o
14+ main.o checksum.o match.o syscall.o log.o backup.o alternate.o
15 OBJS2=options.o flist.o io.o compat.o hlink.o token.o uidlist.o socket.o \
16 fileio.o batch.o clientname.o
17 OBJS3=progress.o pipe.o
47dd7a31 18--- /dev/null 1 Jan 1970 00:00:00 -0000
8c5b8235
WD
19+++ alternate.c 29 Jun 2004 17:46:12 -0000
20@@ -0,0 +1,105 @@
47dd7a31
WD
21+#include "rsync.h"
22+
23+extern char *compare_dest;
24+extern int verbose;
25+
26+/* Alternate methods for opening files, if local doesn't exist */
27+/* Sanity check that we are about to open regular file */
8c5b8235 28+static int do_open_regular(char *fname)
47dd7a31
WD
29+{
30+ STRUCT_STAT st;
31+
32+ if (do_stat(fname, &st) == 0 && S_ISREG(st.st_mode))
33+ return do_open(fname, O_RDONLY, 0);
34+
35+ return -1;
36+}
37+
38+static void split_names(char *fname, char **dirname, char **basename)
39+{
40+ char *slash = strrchr(fname, '/');
41+ if (slash) {
42+ *dirname = fname;
43+ *slash = '\0';
44+ *basename = slash+1;
45+ } else {
46+ *basename = fname;
47+ *dirname = ".";
48+ }
49+}
50+
51+static unsigned int measure_name(const char *name, const char *basename,
52+ const char *ext)
53+{
54+ int namelen = strlen(name);
55+ int extlen = strlen(ext);
56+ unsigned int score = 0;
57+
58+ /* Extensions must match */
59+ if (namelen <= extlen || strcmp(name + namelen - extlen, ext) != 0)
60+ return 0;
61+
62+ /* Now score depends on similarity of prefix */
63+ for (; *name == *basename && *name; name++, basename++)
64+ score++;
65+ return score;
66+}
67+
8c5b8235
WD
68+int check_alternate_base_fuzzy(char **fname_ptr, char *buf,
69+ STRUCT_STAT *st_ptr)
47dd7a31
WD
70+{
71+ DIR *d;
72+ struct dirent *di;
73+ char *basename, *dirname;
74+ char mangled_name[MAXPATHLEN];
75+ char bestname[MAXPATHLEN];
76+ unsigned int bestscore = 0;
77+ const char *ext;
78+
8c5b8235 79+ strlcpy(mangled_name, *fname_ptr, sizeof mangled_name);
47dd7a31
WD
80+
81+ split_names(mangled_name, &dirname, &basename);
82+ if (!(d = opendir(dirname))) {
83+ rsyserr(FERROR, errno, "recv_generator opendir(%s)", dirname);
84+ return -1;
85+ }
86+
87+ /* Get final extension, eg. .gz; never full basename though. */
88+ ext = strrchr(basename + 1, '.');
89+ if (!ext)
90+ ext = basename + strlen(basename); /* ext = "" */
91+
92+ while ((di = readdir(d)) != NULL) {
93+ const char *dname = d_name(di);
94+ unsigned int score;
95+
96+ if (dname[0] == '.' && (dname[1] == '\0'
97+ || (dname[1] == '.' && dname[2] == '\0')))
98+ continue;
99+
100+ score = measure_name(dname, basename, ext);
101+ if (verbose > 4) {
8c5b8235
WD
102+ rprintf(FINFO, "[%s] fuzzy score for %s = %u\n",
103+ who_am_i(), dname, score);
47dd7a31
WD
104+ }
105+ if (score > bestscore) {
8c5b8235 106+ strlcpy(bestname, dname, sizeof bestname);
47dd7a31
WD
107+ bestscore = score;
108+ }
109+ }
110+ closedir(d);
111+
112+ /* Found a candidate. */
113+ if (bestscore != 0) {
8c5b8235 114+ pathjoin(buf, MAXPATHLEN, dirname, bestname);
47dd7a31 115+ if (verbose > 2) {
8c5b8235
WD
116+ rprintf(FINFO, "[%s] fuzzy match %s->%s\n",
117+ who_am_i(), *fname_ptr, buf);
47dd7a31 118+ }
8c5b8235
WD
119+ *fname_ptr = buf;
120+ if (st_ptr)
121+ return link_stat(buf, st_ptr, 0);
122+ return do_open_regular(buf);
47dd7a31
WD
123+ }
124+ return -1;
125+}
8c5b8235
WD
126--- generator.c 29 Jun 2004 16:22:54 -0000 1.91
127+++ generator.c 29 Jun 2004 17:46:12 -0000
7628f156
WD
128@@ -41,6 +41,7 @@ extern int ignore_times;
129 extern int size_only;
130 extern int io_timeout;
131 extern int protocol_version;
241013b4 132+extern int fuzzy;
7628f156
WD
133 extern int always_checksum;
134 extern char *compare_dest;
135 extern int link_dest;
8c5b8235
WD
136@@ -271,7 +272,7 @@ void recv_generator(char *fname, struct
137 int fd;
f74d2272
WD
138 STRUCT_STAT st;
139 struct map_struct *mapbuf;
8c5b8235
WD
140- int statret;
141+ int statret, fuzzy_file = 0;
142 char *fnamecmp;
143 char fnamecmpbuf[MAXPATHLEN];
54691942 144
8c5b8235
WD
145@@ -447,6 +448,15 @@ void recv_generator(char *fname, struct
146 fnamecmp = fnamecmpbuf;
241013b4
MP
147 }
148
8c5b8235
WD
149+ if (statret == -1 && fuzzy) {
150+ statret = check_alternate_base_fuzzy(&fnamecmp, fnamecmpbuf,
151+ &st);
152+ if (!S_ISREG(st.st_mode))
153+ statret = -1;
154+ else
155+ fuzzy_file = 1;
7628f156
WD
156+ }
157+
8c5b8235 158 if (statret == -1) {
7628f156
WD
159 if (preserve_hard_links && hard_link_check(file, HL_SKIP))
160 return;
8c5b8235 161@@ -489,7 +499,7 @@ void recv_generator(char *fname, struct
241013b4
MP
162 return;
163 }
164
8c5b8235
WD
165- if (skip_file(fname, file, &st)) {
166+ if (!fuzzy_file && skip_file(fname, file, &st)) {
167 if (fnamecmp == fname)
168 set_perms(fname, file, &st, PERMS_REPORT);
169 return;
47dd7a31 170--- options.c 20 Jun 2004 19:47:05 -0000 1.157
8c5b8235 171+++ options.c 29 Jun 2004 17:46:13 -0000
7628f156 172@@ -94,6 +94,7 @@ int ignore_errors = 0;
f74d2272
WD
173 int modify_window = 0;
174 int blocking_io = -1;
175 int checksum_seed = 0;
176+int fuzzy = 0;
177 unsigned int block_size = 0;
241013b4 178
241013b4 179
7628f156 180@@ -270,6 +271,7 @@ void usage(enum logcode F)
f0533c4c
WD
181 rprintf(F," -T --temp-dir=DIR create temporary files in directory DIR\n");
182 rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n");
183 rprintf(F," --link-dest=DIR create hardlinks to DIR for unchanged files\n");
184+ rprintf(F," --fuzzy use similar file as basis if basis doesn't exist\n");
185 rprintf(F," -P equivalent to --partial --progress\n");
186 rprintf(F," -z, --compress compress file data\n");
187 rprintf(F," -C, --cvs-exclude auto ignore files in the same way CVS does\n");
7628f156 188@@ -368,6 +370,7 @@ static struct poptOption long_options[]
f0533c4c
WD
189 {"temp-dir", 'T', POPT_ARG_STRING, &tmpdir, 0, 0, 0 },
190 {"compare-dest", 0, POPT_ARG_STRING, &compare_dest, 0, 0, 0 },
191 {"link-dest", 0, POPT_ARG_STRING, &compare_dest, OPT_LINK_DEST, 0, 0 },
f74d2272 192+ {"fuzzy", 0, POPT_ARG_NONE, &fuzzy, 0, 0, 0 },
f0533c4c
WD
193 /* TODO: Should this take an optional int giving the compression level? */
194 {"compress", 'z', POPT_ARG_NONE, &do_compression, 0, 0, 0 },
195 {"daemon", 0, POPT_ARG_NONE, &daemon_opt, 0, 0, 0 },
7628f156 196@@ -989,6 +992,9 @@ void server_options(char **args,int *arg
f74d2272 197 }
241013b4 198 }
7b675ff5 199
241013b4
MP
200+ if (fuzzy && am_sender)
201+ args[ac++] = "--fuzzy";
7b675ff5 202+
241013b4 203 *argc = ac;
f74d2272 204 return;
7b675ff5 205
47dd7a31 206--- receiver.c 29 Jun 2004 15:12:01 -0000 1.83
8c5b8235 207+++ receiver.c 29 Jun 2004 17:46:13 -0000
7628f156 208@@ -48,6 +48,7 @@ extern int ignore_errors;
54691942 209 extern int orig_umask;
7f2baf27 210 extern int keep_partial;
7b675ff5 211 extern int checksum_seed;
241013b4
MP
212+extern int fuzzy;
213
f74d2272
WD
214 static void delete_one(char *fn, int is_dir)
215 {
8c5b8235
WD
216@@ -377,6 +378,11 @@ int recv_files(int f_in,struct file_list
217 fd1 = do_open(fnamecmp, O_RDONLY, 0);
218 }
241013b4 219
8c5b8235
WD
220+ if (fd1 == -1 && fuzzy) {
221+ fd1 = check_alternate_base_fuzzy(&fnamecmp, fnamecmpbuf,
222+ NULL);
223+ }
241013b4 224+
241013b4 225 if (fd1 != -1 && do_fstat(fd1,&st) != 0) {
fe6407b5 226 rsyserr(FERROR, errno, "fstat %s failed",
8c5b8235 227 full_fname(fnamecmp));
7628f156 228--- rsync.yo 5 Jun 2004 16:16:30 -0000 1.171
8c5b8235 229+++ rsync.yo 29 Jun 2004 17:46:14 -0000
7628f156 230@@ -325,6 +325,7 @@ verb(
f0533c4c
WD
231 -T --temp-dir=DIR create temporary files in directory DIR
232 --compare-dest=DIR also compare received files relative to DIR
233 --link-dest=DIR create hardlinks to DIR for unchanged files
234+ --fuzzy use similar file as basis if basis is gone
235 -P equivalent to --partial --progress
236 -z, --compress compress file data
237 -C, --cvs-exclude auto ignore files in the same way CVS does