Working with the new g2r-basis-filename patch, we just transmit the
[rsync/rsync-patches.git] / fuzzy.diff
... / ...
CommitLineData
1Depends-On-Patch: g2r-basis-filename.diff
2
3The changes to generator.c were greatly simplified, making the patch
4easier to maintain and fixing the failing test in the testsuite.
5Very lightly tested.
6
7Be sure to run "make proto" before "make".
8
9--- orig/generator.c 2004-07-28 10:14:15
10+++ generator.c 2004-07-28 10:23:12
11@@ -41,6 +41,7 @@ extern int ignore_times;
12 extern int size_only;
13 extern int io_timeout;
14 extern int protocol_version;
15+extern int fuzzy;
16 extern int always_checksum;
17 extern char *partial_dir;
18 extern char *compare_dest;
19@@ -240,6 +241,94 @@ static void generate_and_send_sums(int f
20 }
21
22
23+static void split_names(char *fname, char **dirname, char **basename)
24+{
25+ char *slash = strrchr(fname, '/');
26+ if (slash) {
27+ *dirname = fname;
28+ *slash = '\0';
29+ *basename = slash+1;
30+ } else {
31+ *basename = fname;
32+ *dirname = ".";
33+ }
34+}
35+
36+
37+static unsigned int measure_name(const char *name, const char *basename,
38+ const char *ext)
39+{
40+ int namelen = strlen(name);
41+ int extlen = strlen(ext);
42+ unsigned int score = 0;
43+
44+ /* Extensions must match */
45+ if (namelen <= extlen || strcmp(name + namelen - extlen, ext) != 0)
46+ return 0;
47+
48+ /* Now score depends on similarity of prefix */
49+ for (; *name == *basename && *name; name++, basename++)
50+ score++;
51+ return score;
52+}
53+
54+
55+static int find_fuzzy(char **fname_ptr, char *buf, STRUCT_STAT *st_ptr)
56+{
57+ DIR *d;
58+ struct dirent *di;
59+ char *basename, *dirname;
60+ char mangled_name[MAXPATHLEN];
61+ char bestname[MAXPATHLEN];
62+ unsigned int bestscore = 0;
63+ const char *ext;
64+
65+ strlcpy(mangled_name, *fname_ptr, sizeof mangled_name);
66+
67+ split_names(mangled_name, &dirname, &basename);
68+ if (!(d = opendir(dirname))) {
69+ rsyserr(FERROR, errno, "recv_generator opendir(%s)", dirname);
70+ return -1;
71+ }
72+
73+ /* Get final extension, eg. .gz; never full basename though. */
74+ ext = strrchr(basename + 1, '.');
75+ if (!ext)
76+ ext = basename + strlen(basename); /* ext = "" */
77+
78+ while ((di = readdir(d)) != NULL) {
79+ const char *dname = d_name(di);
80+ unsigned int score;
81+
82+ if (dname[0] == '.' && (dname[1] == '\0'
83+ || (dname[1] == '.' && dname[2] == '\0')))
84+ continue;
85+
86+ score = measure_name(dname, basename, ext);
87+ if (verbose > 4) {
88+ rprintf(FINFO, "[%s] fuzzy score for %s = %u\n",
89+ who_am_i(), dname, score);
90+ }
91+ if (score > bestscore) {
92+ strlcpy(bestname, dname, sizeof bestname);
93+ bestscore = score;
94+ }
95+ }
96+ closedir(d);
97+
98+ /* Found a candidate. */
99+ if (bestscore != 0) {
100+ pathjoin(buf, MAXPATHLEN, dirname, bestname);
101+ if (verbose > 2) {
102+ rprintf(FINFO, "[%s] fuzzy match %s->%s\n",
103+ who_am_i(), *fname_ptr, buf);
104+ }
105+ *fname_ptr = buf;
106+ return link_stat(buf, st_ptr, 0);
107+ }
108+ return -1;
109+}
110+
111
112 /*
113 * Acts on file number @p i from @p flist, whose name is @p fname.
114@@ -254,7 +343,7 @@ static void recv_generator(char *fname,
115 {
116 int fd = -1;
117 STRUCT_STAT st;
118- int statret, stat_errno;
119+ int statret, stat_errno, fuzzy_file = 0;
120 char *fnamecmp;
121 char fnamecmpbuf[MAXPATHLEN];
122
123@@ -439,6 +528,14 @@ static void recv_generator(char *fname,
124 } else
125 *fnamecmpbuf = '\0';
126
127+ if (statret == -1 && fuzzy) {
128+ statret = find_fuzzy(&fnamecmp, fnamecmpbuf, &st);
129+ if (!S_ISREG(st.st_mode))
130+ statret = -1;
131+ else
132+ fuzzy_file = 1;
133+ }
134+
135 if (statret == 0 && !S_ISREG(st.st_mode)) {
136 if (delete_file(fname) != 0)
137 return;
138@@ -472,7 +569,7 @@ static void recv_generator(char *fname,
139 return;
140 }
141
142- if (skip_file(fnamecmp, file, &st)) {
143+ if (!fuzzy_file && skip_file(fnamecmp, file, &st)) {
144 if (!*fnamecmpbuf)
145 set_perms(fname, file, &st, PERMS_REPORT);
146 return;
147--- orig/main.c 2004-07-22 00:10:43
148+++ main.c 2004-07-22 00:32:31
149@@ -47,6 +47,7 @@ extern int keep_dirlinks;
150 extern int preserve_hard_links;
151 extern int protocol_version;
152 extern int recurse;
153+extern int fuzzy;
154 extern int relative_paths;
155 extern int rsync_port;
156 extern int whole_file;
157@@ -458,7 +459,7 @@ static int do_recv(int f_in,int f_out,st
158 int pid;
159 int status = 0;
160 int error_pipe[2], name_pipe[2];
161- BOOL need_name_pipe = compare_dest && !dry_run;
162+ BOOL need_name_pipe = (compare_dest || fuzzy) && !dry_run;
163
164 if (preserve_hard_links)
165 init_hard_links(flist);
166--- orig/options.c 2004-07-29 16:08:03
167+++ options.c 2004-07-16 20:14:12
168@@ -85,6 +85,7 @@ int safe_symlinks = 0;
169 int copy_unsafe_links = 0;
170 int size_only = 0;
171 int bwlimit = 0;
172+int fuzzy = 0;
173 size_t bwlimit_writemax = 0;
174 int delete_after = 0;
175 int only_existing = 0;
176@@ -279,6 +280,7 @@ void usage(enum logcode F)
177 rprintf(F," -T, --temp-dir=DIR create temporary files in directory DIR\n");
178 rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n");
179 rprintf(F," --link-dest=DIR create hardlinks to DIR for unchanged files\n");
180+ rprintf(F," --fuzzy use similar file as basis if basis doesn't exist\n");
181 rprintf(F," -P equivalent to --partial --progress\n");
182 rprintf(F," -z, --compress compress file data\n");
183 rprintf(F," -C, --cvs-exclude auto ignore files in the same way CVS does\n");
184@@ -378,6 +380,7 @@ static struct poptOption long_options[]
185 {"temp-dir", 'T', POPT_ARG_STRING, &tmpdir, 0, 0, 0 },
186 {"compare-dest", 0, POPT_ARG_STRING, &compare_dest, 0, 0, 0 },
187 {"link-dest", 0, POPT_ARG_STRING, &compare_dest, OPT_LINK_DEST, 0, 0 },
188+ {"fuzzy", 0, POPT_ARG_NONE, &fuzzy, 0, 0, 0 },
189 /* TODO: Should this take an optional int giving the compression level? */
190 {"compress", 'z', POPT_ARG_NONE, &do_compression, 0, 0, 0 },
191 {"daemon", 0, POPT_ARG_NONE, &daemon_opt, 0, 0, 0 },
192@@ -1042,6 +1045,9 @@ void server_options(char **args,int *arg
193 }
194 }
195
196+ if (fuzzy && am_sender)
197+ args[ac++] = "--fuzzy";
198+
199 *argc = ac;
200 return;
201
202--- orig/receiver.c 2004-07-23 21:59:07
203+++ receiver.c 2004-07-23 22:08:03
204@@ -39,7 +39,6 @@ extern int cvs_exclude;
205 extern int io_error;
206 extern char *tmpdir;
207 extern char *partial_dir;
208-extern char *compare_dest;
209 extern int make_backups;
210 extern int do_progress;
211 extern char *backup_dir;
212--- orig/rsync.yo 2004-07-29 16:08:04
213+++ rsync.yo 2004-07-03 19:27:25
214@@ -327,6 +327,7 @@ verb(
215 -T --temp-dir=DIR create temporary files in directory DIR
216 --compare-dest=DIR also compare received files relative to DIR
217 --link-dest=DIR create hardlinks to DIR for unchanged files
218+ --fuzzy use similar file as basis if basis is gone
219 -P equivalent to --partial --progress
220 -z, --compress compress file data
221 -C, --cvs-exclude auto ignore files in the same way CVS does