Added back the alternate.c file that got lost from the patch.
[rsync/rsync-patches.git] / fuzzy.diff
1 Updated for current CVS version by Wayne Davison.  Passes *MOST* of the
2 test suite, but otherwise UNTESTED.
3
4 --- Makefile.in 15 May 2004 00:48:11 -0000      1.101
5 +++ Makefile.in 29 Jun 2004 15:14:48 -0000
6 @@ -32,7 +32,7 @@ ZLIBOBJ=zlib/deflate.o zlib/infblock.o z
7         zlib/inflate.o zlib/inftrees.o zlib/infutil.o zlib/trees.o \
8         zlib/zutil.o zlib/adler32.o
9  OBJS1=rsync.o generator.o receiver.o cleanup.o sender.o exclude.o util.o \
10 -       main.o checksum.o match.o syscall.o log.o backup.o
11 +       main.o checksum.o match.o syscall.o log.o backup.o alternate.o
12  OBJS2=options.o flist.o io.o compat.o hlink.o token.o uidlist.o socket.o \
13         fileio.o batch.o clientname.o
14  OBJS3=progress.o pipe.o
15 --- /dev/null   1 Jan 1970 00:00:00 -0000
16 +++ alternate.c 29 Jun 2004 15:14:48 -0000
17 @@ -0,0 +1,114 @@
18 +#include "rsync.h"
19 +
20 +extern char *compare_dest;
21 +extern int verbose;
22 +
23 +/* Alternate methods for opening files, if local doesn't exist */
24 +/* Sanity check that we are about to open regular file */
25 +int do_open_regular(char *fname)
26 +{
27 +       STRUCT_STAT st;
28 +
29 +       if (do_stat(fname, &st) == 0 && S_ISREG(st.st_mode))
30 +               return do_open(fname, O_RDONLY, 0);
31 +
32 +       return -1;
33 +}
34 +
35 +static void split_names(char *fname, char **dirname, char **basename)
36 +{
37 +       char *slash = strrchr(fname, '/');
38 +       if (slash) {
39 +               *dirname = fname;
40 +               *slash = '\0';
41 +               *basename = slash+1;
42 +       } else {
43 +               *basename = fname;
44 +               *dirname = ".";
45 +       }
46 +}
47 +
48 +static unsigned int measure_name(const char *name, const char *basename,
49 +                                const char *ext)
50 +{
51 +       int namelen = strlen(name);
52 +       int extlen = strlen(ext);
53 +       unsigned int score = 0;
54 +
55 +       /* Extensions must match */
56 +       if (namelen <= extlen || strcmp(name + namelen - extlen, ext) != 0)
57 +               return 0;
58 +
59 +       /* Now score depends on similarity of prefix */
60 +       for (; *name == *basename && *name; name++, basename++)
61 +               score++;
62 +       return score;
63 +}
64 +
65 +int open_alternate_base_fuzzy(const char *fname)
66 +{
67 +       DIR *d;
68 +       struct dirent *di;
69 +       char *basename, *dirname;
70 +       char mangled_name[MAXPATHLEN];
71 +       char bestname[MAXPATHLEN];
72 +       unsigned int bestscore = 0;
73 +       const char *ext;
74 +
75 +       strlcpy(mangled_name, fname, sizeof mangled_name);
76 +
77 +       split_names(mangled_name, &dirname, &basename);
78 +       if (!(d = opendir(dirname))) {
79 +               rsyserr(FERROR, errno, "recv_generator opendir(%s)", dirname);
80 +               return -1;
81 +       }
82 +
83 +       /* Get final extension, eg. .gz; never full basename though. */
84 +       ext = strrchr(basename + 1, '.');
85 +       if (!ext)
86 +               ext = basename + strlen(basename); /* ext = "" */
87 +
88 +       while ((di = readdir(d)) != NULL) {
89 +               const char *dname = d_name(di);
90 +               unsigned int score;
91 +
92 +               if (dname[0] == '.' && (dname[1] == '\0'
93 +                   || (dname[1] == '.' && dname[2] == '\0')))
94 +                       continue;
95 +
96 +               score = measure_name(dname, basename, ext);
97 +               if (verbose > 4) {
98 +                       rprintf(FINFO,"fuzzy score for %s = %u\n",
99 +                               dname, score);
100 +               }
101 +               if (score > bestscore) {
102 +                       strcpy(bestname, dname);
103 +                       bestscore = score;
104 +               }
105 +       }
106 +       closedir(d);
107 +
108 +       /* Found a candidate. */
109 +       if (bestscore != 0) {
110 +               char fuzzyname[MAXPATHLEN];
111 +
112 +               pathjoin(fuzzyname,sizeof fuzzyname, dirname, bestname);
113 +               if (verbose > 2) {
114 +                       rprintf(FINFO, "fuzzy match %s->%s\n",
115 +                               fname, fuzzyname);
116 +               }
117 +               return do_open_regular(fuzzyname);
118 +       }
119 +       return -1;
120 +}
121 +
122 +int open_alternate_base_comparedir(const char *fname)
123 +{
124 +       char fnamebuf[MAXPATHLEN];
125 +
126 +       /* try the file at compare_dest instead */
127 +       pathjoin(fnamebuf, sizeof fnamebuf, compare_dest, fname);
128 +
129 +       /* FIXME: now follows symlinks... */
130 +       return do_open_regular(fnamebuf);
131 +}
132 --- generator.c 23 Jun 2004 21:21:19 -0000      1.90
133 +++ generator.c 29 Jun 2004 15:14:48 -0000
134 @@ -41,6 +41,7 @@ extern int ignore_times;
135  extern int size_only;
136  extern int io_timeout;
137  extern int protocol_version;
138 +extern int fuzzy;
139  extern int always_checksum;
140  extern char *compare_dest;
141  extern int link_dest;
142 @@ -256,7 +257,61 @@ static void generate_and_send_sums(struc
143         }
144  }
145  
146 +/* Returns -1 for can't open (null file), -2 for skip */
147 +static int open_base_file(struct file_struct *file, char *fname, int statret,
148 +                         STRUCT_STAT *st)
149 +{
150 +       int fd = -1;
151  
152 +       if (statret == 0) {
153 +               if (S_ISREG(st->st_mode)) {
154 +                       if (update_only
155 +                           && cmp_modtime(st->st_mtime, file->modtime) > 0) {
156 +                               if (verbose > 1)
157 +                                       rprintf(FINFO, "%s is newer\n", fname);
158 +                               return -2;
159 +                       }
160 +                       if (skip_file(fname, file, st)) {
161 +                               set_perms(fname, file, st, PERMS_REPORT);
162 +                               return -2;
163 +                       }
164 +                       fd = do_open(fname, O_RDONLY, 0);
165 +                       if (fd == -1) {
166 +                               rsyserr(FERROR, errno, "failed to open %s, continuing",
167 +                                   full_fname(fname));
168 +                               return -1;
169 +                       }
170 +                       return fd;
171 +               } else {
172 +                       /* Try to use symlink contents */
173 +                       if (S_ISLNK(st->st_mode)) {
174 +                               fd = do_open_regular(fname);
175 +                               /* Don't delete yet; receiver will need it */
176 +                       } else {
177 +                               if (delete_file(fname) != 0) {
178 +                                       if (fd != -1)
179 +                                               close(fd);
180 +                                       return -2;
181 +                               }
182 +                       }
183 +               }
184 +       }
185 +
186 +       if (fd == -1 && compare_dest != NULL)
187 +               fd = open_alternate_base_comparedir(fname);
188 +
189 +       if (fd == -1 && fuzzy)
190 +               fd = open_alternate_base_fuzzy(fname);
191 +
192 +       /* Update stat to understand size */
193 +       if (fd != -1) {
194 +               if (do_fstat(fd, st) != 0) {
195 +                       rsyserr(FERROR, errno, "fstat %s", full_fname(fname));
196 +               }
197 +       }
198 +
199 +       return fd;
200 +}
201  
202  /**
203   * Acts on file number @p i from @p flist, whose name is @p fname.
204 @@ -272,8 +327,6 @@ void recv_generator(char *fname, struct 
205         STRUCT_STAT st;
206         struct map_struct *mapbuf;
207         int statret;
208 -       char *fnamecmp;
209 -       char fnamecmpbuf[MAXPATHLEN];
210  
211         if (list_only)
212                 return;
213 @@ -413,109 +466,39 @@ void recv_generator(char *fname, struct 
214         }
215  #endif
216  
217 -       if (preserve_hard_links && hard_link_check(file, HL_CHECK_MASTER))
218 -               return;
219 -
220 -       if (!S_ISREG(file->mode)) {
221 -               rprintf(FINFO, "skipping non-regular file \"%s\"\n",fname);
222 -               return;
223 -       }
224 -
225 -       fnamecmp = fname;
226 -
227 -       if (statret == -1 && compare_dest != NULL) {
228 -               /* try the file at compare_dest instead */
229 -               int saveerrno = errno;
230 -               pathjoin(fnamecmpbuf, sizeof fnamecmpbuf, compare_dest, fname);
231 -               statret = link_stat(fnamecmpbuf, &st, 0);
232 -               if (!S_ISREG(st.st_mode))
233 -                       statret = -1;
234 -               if (statret == -1)
235 -                       errno = saveerrno;
236 -#if HAVE_LINK
237 -               else if (link_dest && !dry_run) {
238 -                       if (do_link(fnamecmpbuf, fname) != 0) {
239 -                               if (verbose > 0) {
240 -                                       rsyserr(FINFO, errno, "link %s => %s",
241 -                                               fnamecmpbuf, fname);
242 -                               }
243 -                       }
244 -                       fnamecmp = fnamecmpbuf;
245 -               }
246 -#endif
247 -               else
248 -                       fnamecmp = fnamecmpbuf;
249 -       }
250 -
251 -       if (statret == -1) {
252 -               if (preserve_hard_links && hard_link_check(file, HL_SKIP))
253 -                       return;
254 -               if (errno == ENOENT) {
255 -                       write_int(f_out,i);
256 -                       if (!dry_run)
257 -                               write_sum_head(f_out, NULL);
258 -               } else if (verbose > 1) {
259 +       /* Failed to stat for some reason besides "not found". */
260 +       if (statret == -1 && errno != ENOENT) {
261 +               if (verbose > 1) {
262                         rsyserr(FERROR, errno,
263 -                               "recv_generator: failed to open %s",
264 +                               "recv_generator failed to stat %s",
265                                 full_fname(fname));
266                 }
267                 return;
268         }
269  
270 -       if (!S_ISREG(st.st_mode)) {
271 -               if (delete_file(fname) != 0) {
272 -                       return;
273 -               }
274 +       if ((fd = open_base_file(file, fname, statret, &st)) == -2)
275 +               return;
276  
277 -               /* now pretend the file didn't exist */
278 +       if ((disable_deltas_p() || dry_run) && fd != -1) {
279 +               close(fd);
280 +               fd = -1;
281 +       }
282 +
283 +       if (fd == -1) {
284 +               /* the file didn't exist, or we can pretend it doesn't */
285                 if (preserve_hard_links && hard_link_check(file, HL_SKIP))
286                         return;
287 -               write_int(f_out,i);
288 +               write_int(f_out, i);
289                 if (!dry_run)
290                         write_sum_head(f_out, NULL);
291 -               return;
292 -       }
293 -
294 -       if (opt_ignore_existing && fnamecmp == fname) {
295 -               if (verbose > 1)
296 -                       rprintf(FINFO,"%s exists\n",fname);
297 -               return;
298 -       }
299 +               return;
300 +       }
301  
302 -       if (update_only && cmp_modtime(st.st_mtime,file->modtime)>0 && fnamecmp == fname) {
303 -               if (verbose > 1)
304 -                       rprintf(FINFO,"%s is newer\n",fname);
305 -               return;
306 -       }
307 -
308 -       if (skip_file(fname, file, &st)) {
309 -               if (fnamecmp == fname)
310 -                       set_perms(fname, file, &st, PERMS_REPORT);
311 -               return;
312 -       }
313 -
314 -       if (dry_run) {
315 -               write_int(f_out,i);
316 -               return;
317 -       }
318 -
319 -       if (disable_deltas_p()) {
320 -               write_int(f_out,i);
321 -               write_sum_head(f_out, NULL);
322 +       if (preserve_hard_links && hard_link_check(file, HL_CHECK_MASTER))
323                 return;
324 -       }
325  
326 -       /* open the file */
327 -       fd = do_open(fnamecmp, O_RDONLY, 0);
328 -
329 -       if (fd == -1) {
330 -               rsyserr(FERROR, errno, "failed to open %s, continuing",
331 -                       full_fname(fnamecmp));
332 -               /* pretend the file didn't exist */
333 -               if (preserve_hard_links && hard_link_check(file, HL_SKIP))
334 -                       return;
335 -               write_int(f_out,i);
336 -               write_sum_head(f_out, NULL);
337 +       if (!S_ISREG(file->mode)) {
338 +               rprintf(FINFO, "skipping non-regular file \"%s\"\n",fname);
339                 return;
340         }
341  
342 @@ -525,7 +508,7 @@ void recv_generator(char *fname, struct 
343                 mapbuf = NULL;
344  
345         if (verbose > 3) {
346 -               rprintf(FINFO,"gen mapped %s of size %.0f\n", fnamecmp,
347 +               rprintf(FINFO, "gen mapped %s of size %.0f\n", fname,
348                         (double)st.st_size);
349         }
350  
351 --- options.c   20 Jun 2004 19:47:05 -0000      1.157
352 +++ options.c   29 Jun 2004 15:14:48 -0000
353 @@ -94,6 +94,7 @@ int ignore_errors = 0;
354  int modify_window = 0;
355  int blocking_io = -1;
356  int checksum_seed = 0;
357 +int fuzzy = 0;
358  unsigned int block_size = 0;
359  
360  
361 @@ -270,6 +271,7 @@ void usage(enum logcode F)
362    rprintf(F," -T  --temp-dir=DIR          create temporary files in directory DIR\n");
363    rprintf(F,"     --compare-dest=DIR      also compare destination files relative to DIR\n");
364    rprintf(F,"     --link-dest=DIR         create hardlinks to DIR for unchanged files\n");
365 +  rprintf(F,"     --fuzzy                 use similar file as basis if basis doesn't exist\n");
366    rprintf(F," -P                          equivalent to --partial --progress\n");
367    rprintf(F," -z, --compress              compress file data\n");
368    rprintf(F," -C, --cvs-exclude           auto ignore files in the same way CVS does\n");
369 @@ -368,6 +370,7 @@ static struct poptOption long_options[] 
370    {"temp-dir",        'T', POPT_ARG_STRING, &tmpdir, 0, 0, 0 },
371    {"compare-dest",     0,  POPT_ARG_STRING, &compare_dest, 0, 0, 0 },
372    {"link-dest",        0,  POPT_ARG_STRING, &compare_dest,  OPT_LINK_DEST, 0, 0 },
373 +  {"fuzzy",            0,  POPT_ARG_NONE,   &fuzzy, 0, 0, 0 },
374    /* TODO: Should this take an optional int giving the compression level? */
375    {"compress",        'z', POPT_ARG_NONE,   &do_compression, 0, 0, 0 },
376    {"daemon",           0,  POPT_ARG_NONE,   &daemon_opt, 0, 0, 0 },
377 @@ -989,6 +992,9 @@ void server_options(char **args,int *arg
378                 }
379         }
380  
381 +       if (fuzzy && am_sender)
382 +               args[ac++] = "--fuzzy";
383 +
384         *argc = ac;
385         return;
386  
387 --- receiver.c  29 Jun 2004 15:12:01 -0000      1.83
388 +++ receiver.c  29 Jun 2004 15:14:48 -0000
389 @@ -48,6 +48,7 @@ extern int ignore_errors;
390  extern int orig_umask;
391  extern int keep_partial;
392  extern int checksum_seed;
393 +extern int fuzzy;
394  
395  static void delete_one(char *fn, int is_dir)
396  {
397 @@ -300,8 +301,6 @@ int recv_files(int f_in,struct file_list
398         char *fname, fbuf[MAXPATHLEN];
399         char template[MAXPATHLEN];
400         char fnametmp[MAXPATHLEN];
401 -       char *fnamecmp;
402 -       char fnamecmpbuf[MAXPATHLEN];
403         struct map_struct *mapbuf;
404         struct file_struct *file;
405         struct stats initial_stats;
406 @@ -364,35 +363,31 @@ int recv_files(int f_in,struct file_list
407                 if (verbose > 2)
408                         rprintf(FINFO,"recv_files(%s)\n",fname);
409  
410 -               fnamecmp = fname;
411 -
412                 /* open the file */
413 -               fd1 = do_open(fnamecmp, O_RDONLY, 0);
414 +               fd1 = do_open(fname, O_RDONLY, 0);
415  
416 -               if (fd1 == -1 && compare_dest != NULL) {
417 -                       /* try the file at compare_dest instead */
418 -                       pathjoin(fnamecmpbuf, sizeof fnamecmpbuf,
419 -                                compare_dest, fname);
420 -                       fnamecmp = fnamecmpbuf;
421 -                       fd1 = do_open(fnamecmp, O_RDONLY, 0);
422 -               }
423 +               if (fd1 == -1 && compare_dest != NULL)
424 +                       fd1 = open_alternate_base_comparedir(fname);
425 +
426 +               if (fd1 == -1 && fuzzy)
427 +                       fd1 = open_alternate_base_fuzzy(fname);
428  
429                 if (fd1 != -1 && do_fstat(fd1,&st) != 0) {
430                         rsyserr(FERROR, errno, "fstat %s failed",
431 -                               full_fname(fnamecmp));
432 +                               full_fname(fname));
433                         receive_data(f_in,NULL,-1,NULL,file->length);
434                         close(fd1);
435                         continue;
436                 }
437  
438 -               if (fd1 != -1 && S_ISDIR(st.st_mode) && fnamecmp == fname) {
439 +               if (fd1 != -1 && S_ISDIR(st.st_mode)) {
440                         /* this special handling for directories
441                          * wouldn't be necessary if robust_rename()
442                          * and the underlying robust_unlink could cope
443                          * with directories
444                          */
445                         rprintf(FERROR,"recv_files: %s is a directory\n",
446 -                               full_fname(fnamecmp));
447 +                               full_fname(fname));
448                         receive_data(f_in, NULL, -1, NULL, file->length);
449                         close(fd1);
450                         continue;
451 @@ -415,7 +410,7 @@ int recv_files(int f_in,struct file_list
452                         mapbuf = map_file(fd1,st.st_size);
453                         if (verbose > 2) {
454                                 rprintf(FINFO, "recv mapped %s of size %.0f\n",
455 -                                       fnamecmp, (double)st.st_size);
456 +                                       fname, (double)st.st_size);
457                         }
458                 } else
459                         mapbuf = NULL;
460 --- rsync.yo    5 Jun 2004 16:16:30 -0000       1.171
461 +++ rsync.yo    29 Jun 2004 15:14:49 -0000
462 @@ -325,6 +325,7 @@ verb(
463   -T  --temp-dir=DIR          create temporary files in directory DIR
464       --compare-dest=DIR      also compare received files relative to DIR
465       --link-dest=DIR         create hardlinks to DIR for unchanged files
466 +     --fuzzy                 use similar file as basis if basis is gone
467   -P                          equivalent to --partial --progress
468   -z, --compress              compress file data
469   -C, --cvs-exclude           auto ignore files in the same way CVS does