Updated to handle --read-batch mode. This actually fixes a potential
[rsync/rsync-patches.git] / fuzzy.diff
CommitLineData
824abc86 1Depends-On-Patch: g2r-basis-filename.diff
8c5b8235 2
824abc86 3The changes to generator.c were greatly simplified, making the patch
8c5b8235 4easier to maintain and fixing the failing test in the testsuite.
58118c25 5Very lightly tested.
241013b4 6
824abc86
WD
7Be sure to run "make proto" before "make".
8
9--- orig/generator.c 2004-07-03 20:08:07
10+++ generator.c 2004-07-03 20:09:05
58118c25
WD
11@@ -41,6 +41,7 @@ extern int ignore_times;
12 extern int size_only;
13 extern int io_timeout;
14 extern int protocol_version;
15+extern int fuzzy;
16 extern int always_checksum;
17 extern char *compare_dest;
18 extern int link_dest;
f6c3b300 19@@ -241,6 +242,94 @@ static void generate_and_send_sums(struc
58118c25
WD
20 }
21
22
47dd7a31
WD
23+static void split_names(char *fname, char **dirname, char **basename)
24+{
25+ char *slash = strrchr(fname, '/');
26+ if (slash) {
27+ *dirname = fname;
28+ *slash = '\0';
29+ *basename = slash+1;
30+ } else {
31+ *basename = fname;
32+ *dirname = ".";
33+ }
34+}
35+
58118c25 36+
47dd7a31
WD
37+static unsigned int measure_name(const char *name, const char *basename,
38+ const char *ext)
39+{
40+ int namelen = strlen(name);
41+ int extlen = strlen(ext);
42+ unsigned int score = 0;
43+
44+ /* Extensions must match */
45+ if (namelen <= extlen || strcmp(name + namelen - extlen, ext) != 0)
46+ return 0;
47+
48+ /* Now score depends on similarity of prefix */
49+ for (; *name == *basename && *name; name++, basename++)
50+ score++;
51+ return score;
52+}
53+
58118c25
WD
54+
55+static int find_fuzzy(char **fname_ptr, char *buf, STRUCT_STAT *st_ptr)
47dd7a31
WD
56+{
57+ DIR *d;
58+ struct dirent *di;
59+ char *basename, *dirname;
60+ char mangled_name[MAXPATHLEN];
61+ char bestname[MAXPATHLEN];
62+ unsigned int bestscore = 0;
63+ const char *ext;
64+
8c5b8235 65+ strlcpy(mangled_name, *fname_ptr, sizeof mangled_name);
47dd7a31
WD
66+
67+ split_names(mangled_name, &dirname, &basename);
68+ if (!(d = opendir(dirname))) {
69+ rsyserr(FERROR, errno, "recv_generator opendir(%s)", dirname);
70+ return -1;
71+ }
72+
73+ /* Get final extension, eg. .gz; never full basename though. */
74+ ext = strrchr(basename + 1, '.');
75+ if (!ext)
76+ ext = basename + strlen(basename); /* ext = "" */
77+
78+ while ((di = readdir(d)) != NULL) {
79+ const char *dname = d_name(di);
80+ unsigned int score;
81+
82+ if (dname[0] == '.' && (dname[1] == '\0'
83+ || (dname[1] == '.' && dname[2] == '\0')))
84+ continue;
85+
86+ score = measure_name(dname, basename, ext);
87+ if (verbose > 4) {
8c5b8235
WD
88+ rprintf(FINFO, "[%s] fuzzy score for %s = %u\n",
89+ who_am_i(), dname, score);
47dd7a31
WD
90+ }
91+ if (score > bestscore) {
8c5b8235 92+ strlcpy(bestname, dname, sizeof bestname);
47dd7a31
WD
93+ bestscore = score;
94+ }
95+ }
96+ closedir(d);
97+
98+ /* Found a candidate. */
99+ if (bestscore != 0) {
8c5b8235 100+ pathjoin(buf, MAXPATHLEN, dirname, bestname);
47dd7a31 101+ if (verbose > 2) {
8c5b8235
WD
102+ rprintf(FINFO, "[%s] fuzzy match %s->%s\n",
103+ who_am_i(), *fname_ptr, buf);
47dd7a31 104+ }
8c5b8235 105+ *fname_ptr = buf;
58118c25 106+ return link_stat(buf, st_ptr, 0);
47dd7a31
WD
107+ }
108+ return -1;
109+}
58118c25
WD
110+
111
112 /*
113 * Acts on file number @p i from @p flist, whose name is @p fname.
f6c3b300 114@@ -256,7 +345,7 @@ static void recv_generator(char *fname,
8c5b8235 115 int fd;
f74d2272
WD
116 STRUCT_STAT st;
117 struct map_struct *mapbuf;
8c5b8235
WD
118- int statret;
119+ int statret, fuzzy_file = 0;
120 char *fnamecmp;
121 char fnamecmpbuf[MAXPATHLEN];
54691942 122
f6c3b300 123@@ -439,6 +528,14 @@ static void recv_generator(char *fname,
824abc86
WD
124 } else
125 *fnamecmpbuf = '\0';
126
8c5b8235 127+ if (statret == -1 && fuzzy) {
58118c25 128+ statret = find_fuzzy(&fnamecmp, fnamecmpbuf, &st);
8c5b8235
WD
129+ if (!S_ISREG(st.st_mode))
130+ statret = -1;
131+ else
132+ fuzzy_file = 1;
824abc86
WD
133+ }
134+
8c5b8235 135 if (statret == -1) {
7628f156
WD
136 if (preserve_hard_links && hard_link_check(file, HL_SKIP))
137 return;
f6c3b300 138@@ -485,7 +582,7 @@ static void recv_generator(char *fname,
241013b4
MP
139 return;
140 }
141
8c5b8235
WD
142- if (skip_file(fname, file, &st)) {
143+ if (!fuzzy_file && skip_file(fname, file, &st)) {
824abc86 144 if (!*fnamecmpbuf)
8c5b8235
WD
145 set_perms(fname, file, &st, PERMS_REPORT);
146 return;
f6c3b300
WD
147--- orig/options.c 2004-07-16 20:07:22
148+++ options.c 2004-07-16 20:14:12
149@@ -85,6 +85,7 @@ int safe_symlinks = 0;
150 int copy_unsafe_links = 0;
151 int size_only = 0;
152 int bwlimit = 0;
f74d2272 153+int fuzzy = 0;
f6c3b300
WD
154 size_t bwlimit_writemax = 0;
155 int delete_after = 0;
156 int only_existing = 0;
157@@ -276,6 +277,7 @@ void usage(enum logcode F)
f0533c4c
WD
158 rprintf(F," -T --temp-dir=DIR create temporary files in directory DIR\n");
159 rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n");
160 rprintf(F," --link-dest=DIR create hardlinks to DIR for unchanged files\n");
161+ rprintf(F," --fuzzy use similar file as basis if basis doesn't exist\n");
162 rprintf(F," -P equivalent to --partial --progress\n");
163 rprintf(F," -z, --compress compress file data\n");
164 rprintf(F," -C, --cvs-exclude auto ignore files in the same way CVS does\n");
f6c3b300 165@@ -375,6 +377,7 @@ static struct poptOption long_options[]
f0533c4c
WD
166 {"temp-dir", 'T', POPT_ARG_STRING, &tmpdir, 0, 0, 0 },
167 {"compare-dest", 0, POPT_ARG_STRING, &compare_dest, 0, 0, 0 },
168 {"link-dest", 0, POPT_ARG_STRING, &compare_dest, OPT_LINK_DEST, 0, 0 },
f74d2272 169+ {"fuzzy", 0, POPT_ARG_NONE, &fuzzy, 0, 0, 0 },
f0533c4c
WD
170 /* TODO: Should this take an optional int giving the compression level? */
171 {"compress", 'z', POPT_ARG_NONE, &do_compression, 0, 0, 0 },
172 {"daemon", 0, POPT_ARG_NONE, &daemon_opt, 0, 0, 0 },
f6c3b300 173@@ -1019,6 +1022,9 @@ void server_options(char **args,int *arg
f74d2272 174 }
241013b4 175 }
7b675ff5 176
241013b4
MP
177+ if (fuzzy && am_sender)
178+ args[ac++] = "--fuzzy";
7b675ff5 179+
241013b4 180 *argc = ac;
f74d2272 181 return;
7b675ff5 182
824abc86
WD
183--- orig/receiver.c 2004-07-03 20:08:07
184+++ receiver.c 2004-07-03 20:09:05
58118c25
WD
185@@ -36,7 +36,6 @@ extern int preserve_perms;
186 extern int cvs_exclude;
187 extern int io_error;
188 extern char *tmpdir;
189-extern char *compare_dest;
190 extern int make_backups;
191 extern int do_progress;
192 extern char *backup_dir;
f6c3b300 193--- orig/rsync.yo 2004-07-16 20:07:23
824abc86 194+++ rsync.yo 2004-07-03 19:27:25
f6c3b300 195@@ -326,6 +326,7 @@ verb(
f0533c4c
WD
196 -T --temp-dir=DIR create temporary files in directory DIR
197 --compare-dest=DIR also compare received files relative to DIR
198 --link-dest=DIR create hardlinks to DIR for unchanged files
199+ --fuzzy use similar file as basis if basis is gone
200 -P equivalent to --partial --progress
201 -z, --compress compress file data
202 -C, --cvs-exclude auto ignore files in the same way CVS does