1 The changes to generator.c were greatly simplified, making the patch
2 easier to maintain and fixing the failing test in the testsuite.
5 Be sure to run "make proto" before "make".
7 --- orig/generator.c 2005-01-17 23:11:45
8 +++ generator.c 2005-01-17 23:38:46
9 @@ -44,6 +44,7 @@ extern int size_only;
10 extern OFF_T max_size;
11 extern int io_timeout;
12 extern int protocol_version;
13 +extern int fuzzy_basis;
14 extern int always_checksum;
15 extern char *partial_dir;
16 extern char *basis_dir[];
17 @@ -242,6 +243,85 @@ static void generate_and_send_sums(int f
21 +static unsigned int measure_name(const char *name, const char *basename,
24 + int namelen = strlen(name);
25 + int extlen = strlen(ext);
26 + unsigned int score = 0;
28 + /* Extensions must match */
29 + if (namelen <= extlen || strcmp(name + namelen - extlen, ext) != 0)
32 + /* Now score depends on similarity of prefix */
33 + for (; *name == *basename && *name; name++, basename++)
39 +static int find_fuzzy(const char *fname, char *buf, STRUCT_STAT *st_ptr)
43 + char *basename, *dirname, *slash;
44 + char bestname[MAXPATHLEN];
45 + unsigned int bestscore = 0;
48 + strlcpy(buf, fname, MAXPATHLEN);
49 + if ((slash = strrchr(buf, '/')) != NULL) {
52 + basename = slash + 1;
58 + if (!(d = opendir(dirname))) {
59 + rsyserr(FERROR, errno, "recv_generator opendir(%s)", dirname);
65 + /* Get final extension, eg. .gz; never full basename though. */
66 + for (ext = basename; *ext == '.'; ext++) {}
67 + if (!(ext = strrchr(ext, '.')))
68 + ext = basename + strlen(basename); /* ext = "" */
70 + while ((di = readdir(d)) != NULL) {
71 + const char *dname = d_name(di);
74 + if (dname[0] == '.' && (dname[1] == '\0'
75 + || (dname[1] == '.' && dname[2] == '\0')))
78 + score = measure_name(dname, basename, ext);
80 + rprintf(FINFO, "fuzzy score for %s = %u\n",
83 + if (score > bestscore) {
84 + strlcpy(bestname, dname, sizeof bestname);
90 + /* Found a candidate. */
91 + if (bestscore != 0) {
92 + strlcpy(basename, bestname, MAXPATHLEN - (basename - buf));
94 + rprintf(FINFO, "fuzzy match %s->%s\n", fname, buf);
95 + return link_stat(buf, st_ptr, 0);
102 * Acts on file number @p i from @p flist, whose name is @p fname.
103 @@ -496,6 +576,15 @@ static void recv_generator(char *fname,
107 + if (statret == -1 && fuzzy_basis) {
108 + if (find_fuzzy(fname, fnamecmpbuf, &st) == 0
109 + && S_ISREG(st.st_mode)) {
111 + fnamecmp = fnamecmpbuf;
112 + fnamecmp_type = FNAMECMP_FUZZY;
117 if (preserve_hard_links && hard_link_check(file, HL_SKIP))
119 @@ -524,6 +613,8 @@ static void recv_generator(char *fname,
121 if (!compare_dest && fnamecmp_type <= FNAMECMP_BASIS_DIR_HIGH)
123 + else if (fnamecmp_type == FNAMECMP_FUZZY)
125 else if (unchanged_file(fnamecmp, file, &st)) {
126 if (fnamecmp_type == FNAMECMP_FNAME)
127 set_perms(fname, file, &st, PERMS_REPORT);
128 @@ -598,8 +689,24 @@ notify_others:
130 if (protocol_version >= 29 && inplace && !read_batch)
131 write_byte(f_out, fnamecmp_type);
132 - if (f_out_name >= 0)
133 + if (f_out_name >= 0) {
134 write_byte(f_out_name, fnamecmp_type);
135 + if (fnamecmp_type == FNAMECMP_FUZZY) {
136 + uchar lenbuf[3], *lb = lenbuf;
137 + int len = strlen(fnamecmpbuf);
139 +#if MAXPATHLEN > 0x7FFF
140 + *lb++ = len / 0x10000 + 0x80;
141 + *lb++ = len / 0x100;
143 + *lb++ = len / 0x100 + 0x80;
147 + write_buf(f_out_name, lenbuf, lb - lenbuf + 1);
148 + write_buf(f_out_name, fnamecmpbuf, len);
152 if (dry_run || read_batch)
154 --- orig/main.c 2005-01-17 23:11:45
155 +++ main.c 2005-01-14 18:33:15
156 @@ -48,6 +48,7 @@ extern int keep_dirlinks;
157 extern int preserve_hard_links;
158 extern int protocol_version;
160 +extern int fuzzy_basis;
161 extern int relative_paths;
162 extern int rsync_port;
163 extern int whole_file;
164 @@ -464,7 +465,8 @@ static int do_recv(int f_in,int f_out,st
167 int error_pipe[2], name_pipe[2];
168 - BOOL need_name_pipe = (basis_dir[0] || partial_dir) && !dry_run;
169 + BOOL need_name_pipe = (basis_dir[0] || partial_dir || fuzzy_basis)
172 /* The receiving side mustn't obey this, or an existing symlink that
173 * points to an identical file won't be replaced by the referent. */
174 --- orig/options.c 2005-01-17 23:11:45
175 +++ options.c 2005-01-15 21:08:13
176 @@ -86,6 +86,7 @@ int copy_unsafe_links = 0;
178 int daemon_bwlimit = 0;
180 +int fuzzy_basis = 0;
181 size_t bwlimit_writemax = 0;
182 int delete_after = 0;
183 int only_existing = 0;
184 @@ -288,6 +289,7 @@ void usage(enum logcode F)
185 rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n");
186 rprintf(F," --copy-dest=DIR ... and include copies of unchanged files\n");
187 rprintf(F," --link-dest=DIR hardlink to files in DIR when unchanged\n");
188 + rprintf(F," --fuzzy find similar file for basis when no dest file\n");
189 rprintf(F," -P equivalent to --partial --progress\n");
190 rprintf(F," -z, --compress compress file data\n");
191 rprintf(F," -C, --cvs-exclude auto ignore files in the same way CVS does\n");
192 @@ -384,6 +386,7 @@ static struct poptOption long_options[]
193 {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
194 {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
195 {"link-dest", 0, POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 },
196 + {"fuzzy", 0, POPT_ARG_NONE, &fuzzy_basis, 0, 0, 0 },
197 /* TODO: Should this take an optional int giving the compression level? */
198 {"compress", 'z', POPT_ARG_NONE, &do_compression, 0, 0, 0 },
199 {"stats", 0, POPT_ARG_NONE, &do_stats, 0, 0, 0 },
200 @@ -1234,6 +1237,9 @@ void server_options(char **args,int *arg
201 args[ac++] = "--no-relative";
204 + if (fuzzy_basis && am_sender)
205 + args[ac++] = "--fuzzy";
210 --- orig/receiver.c 2005-01-17 23:11:45
211 +++ receiver.c 2005-01-15 21:21:02
212 @@ -324,6 +324,27 @@ static int receive_data(int f_in, char *
216 +static void read_gen_name(int fd, char *buf)
218 + int len = read_byte(fd);
220 +#if MAXPATHLEN > 32767
222 + read_buf(fd, (char *)lenbuf, 2);
223 + len = (len & ~0x80) * 0x10000 + lenbuf[0] * 0x100 + lenbuf[1];
225 + len = (len & ~0x80) * 0x100 + read_byte(fd);
228 + if (len >= MAXPATHLEN) {
229 + rprintf(FERROR, "bogus data on generator name pipe\n");
230 + exit_cleanup(RERR_PROTOCOL);
233 + read_sbuf(fd, buf, len);
237 static void discard_receive_data(int f_in, OFF_T length)
239 receive_data(f_in, NULL, -1, 0, NULL, -1, length);
240 @@ -454,6 +475,10 @@ int recv_files(int f_in, struct file_lis
241 case FNAMECMP_BACKUP:
242 fnamecmp = get_backup_name(fname);
244 + case FNAMECMP_FUZZY:
245 + read_gen_name(f_in_name, fnamecmpbuf);
246 + fnamecmp = fnamecmpbuf;
249 if (j >= basis_dir_cnt) {
251 --- orig/rsync.h 2005-01-17 23:11:45
252 +++ rsync.h 2005-01-15 21:24:09
254 #define FNAMECMP_FNAME 0x80
255 #define FNAMECMP_PARTIAL_DIR 0x81
256 #define FNAMECMP_BACKUP 0x82
257 +#define FNAMECMP_FUZZY 0x83
260 /* Log-message categories. FLOG is only used on the daemon side to
261 --- orig/rsync.yo 2005-01-17 23:11:46
262 +++ rsync.yo 2005-01-15 21:48:52
263 @@ -358,6 +358,7 @@ verb(
264 --compare-dest=DIR also compare received files relative to DIR
265 --copy-dest=DIR ... and include copies of unchanged files
266 --link-dest=DIR hardlink to files in DIR when unchanged
267 + --fuzzy find similar file for basis when no dest
268 -P equivalent to --partial --progress
269 -z, --compress compress file data
270 -C, --cvs-exclude auto ignore files in the same way CVS does
271 @@ -878,6 +879,11 @@ Note that rsync versions prior to 2.6.1
272 (or implied by -a). You can work-around this bug by avoiding the -o option
273 when sending to an old rsync.
275 +dit(bf(--fuzzy)) This option tells rsync that it should look around for a
276 +basis file for any destination file that is missing. The current algorithm
277 +looks for a similarly-named file in the same directory as the destination
278 +file, and, if found, uses that to try to speed up the transfer.
280 dit(bf(-z, --compress)) With this option, rsync compresses any data from
281 the files that it sends to the destination machine. This
282 option is useful on slow connections. The compression method used is the