Wayne Davison wrote: I greatly simplified the changes to generator.c, making the patch easier to maintain and fixing the failing test in the testsuite. Very lightly tested (by me). --- Makefile.in 15 May 2004 00:48:11 -0000 1.101 +++ Makefile.in 29 Jun 2004 17:46:12 -0000 @@ -32,7 +32,7 @@ ZLIBOBJ=zlib/deflate.o zlib/infblock.o z zlib/inflate.o zlib/inftrees.o zlib/infutil.o zlib/trees.o \ zlib/zutil.o zlib/adler32.o OBJS1=rsync.o generator.o receiver.o cleanup.o sender.o exclude.o util.o \ - main.o checksum.o match.o syscall.o log.o backup.o + main.o checksum.o match.o syscall.o log.o backup.o alternate.o OBJS2=options.o flist.o io.o compat.o hlink.o token.o uidlist.o socket.o \ fileio.o batch.o clientname.o OBJS3=progress.o pipe.o --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ alternate.c 29 Jun 2004 17:46:12 -0000 @@ -0,0 +1,105 @@ +#include "rsync.h" + +extern char *compare_dest; +extern int verbose; + +/* Alternate methods for opening files, if local doesn't exist */ +/* Sanity check that we are about to open regular file */ +static int do_open_regular(char *fname) +{ + STRUCT_STAT st; + + if (do_stat(fname, &st) == 0 && S_ISREG(st.st_mode)) + return do_open(fname, O_RDONLY, 0); + + return -1; +} + +static void split_names(char *fname, char **dirname, char **basename) +{ + char *slash = strrchr(fname, '/'); + if (slash) { + *dirname = fname; + *slash = '\0'; + *basename = slash+1; + } else { + *basename = fname; + *dirname = "."; + } +} + +static unsigned int measure_name(const char *name, const char *basename, + const char *ext) +{ + int namelen = strlen(name); + int extlen = strlen(ext); + unsigned int score = 0; + + /* Extensions must match */ + if (namelen <= extlen || strcmp(name + namelen - extlen, ext) != 0) + return 0; + + /* Now score depends on similarity of prefix */ + for (; *name == *basename && *name; name++, basename++) + score++; + return score; +} + +int check_alternate_base_fuzzy(char **fname_ptr, char *buf, + STRUCT_STAT *st_ptr) +{ + DIR *d; + struct dirent *di; + char *basename, *dirname; + char mangled_name[MAXPATHLEN]; + char bestname[MAXPATHLEN]; + unsigned int bestscore = 0; + const char *ext; + + strlcpy(mangled_name, *fname_ptr, sizeof mangled_name); + + split_names(mangled_name, &dirname, &basename); + if (!(d = opendir(dirname))) { + rsyserr(FERROR, errno, "recv_generator opendir(%s)", dirname); + return -1; + } + + /* Get final extension, eg. .gz; never full basename though. */ + ext = strrchr(basename + 1, '.'); + if (!ext) + ext = basename + strlen(basename); /* ext = "" */ + + while ((di = readdir(d)) != NULL) { + const char *dname = d_name(di); + unsigned int score; + + if (dname[0] == '.' && (dname[1] == '\0' + || (dname[1] == '.' && dname[2] == '\0'))) + continue; + + score = measure_name(dname, basename, ext); + if (verbose > 4) { + rprintf(FINFO, "[%s] fuzzy score for %s = %u\n", + who_am_i(), dname, score); + } + if (score > bestscore) { + strlcpy(bestname, dname, sizeof bestname); + bestscore = score; + } + } + closedir(d); + + /* Found a candidate. */ + if (bestscore != 0) { + pathjoin(buf, MAXPATHLEN, dirname, bestname); + if (verbose > 2) { + rprintf(FINFO, "[%s] fuzzy match %s->%s\n", + who_am_i(), *fname_ptr, buf); + } + *fname_ptr = buf; + if (st_ptr) + return link_stat(buf, st_ptr, 0); + return do_open_regular(buf); + } + return -1; +} --- generator.c 29 Jun 2004 16:22:54 -0000 1.91 +++ generator.c 29 Jun 2004 17:46:12 -0000 @@ -41,6 +41,7 @@ extern int ignore_times; extern int size_only; extern int io_timeout; extern int protocol_version; +extern int fuzzy; extern int always_checksum; extern char *compare_dest; extern int link_dest; @@ -271,7 +272,7 @@ void recv_generator(char *fname, struct int fd; STRUCT_STAT st; struct map_struct *mapbuf; - int statret; + int statret, fuzzy_file = 0; char *fnamecmp; char fnamecmpbuf[MAXPATHLEN]; @@ -447,6 +448,15 @@ void recv_generator(char *fname, struct fnamecmp = fnamecmpbuf; } + if (statret == -1 && fuzzy) { + statret = check_alternate_base_fuzzy(&fnamecmp, fnamecmpbuf, + &st); + if (!S_ISREG(st.st_mode)) + statret = -1; + else + fuzzy_file = 1; + } + if (statret == -1) { if (preserve_hard_links && hard_link_check(file, HL_SKIP)) return; @@ -489,7 +499,7 @@ void recv_generator(char *fname, struct return; } - if (skip_file(fname, file, &st)) { + if (!fuzzy_file && skip_file(fname, file, &st)) { if (fnamecmp == fname) set_perms(fname, file, &st, PERMS_REPORT); return; --- options.c 20 Jun 2004 19:47:05 -0000 1.157 +++ options.c 29 Jun 2004 17:46:13 -0000 @@ -94,6 +94,7 @@ int ignore_errors = 0; int modify_window = 0; int blocking_io = -1; int checksum_seed = 0; +int fuzzy = 0; unsigned int block_size = 0; @@ -270,6 +271,7 @@ void usage(enum logcode F) rprintf(F," -T --temp-dir=DIR create temporary files in directory DIR\n"); rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n"); rprintf(F," --link-dest=DIR create hardlinks to DIR for unchanged files\n"); + rprintf(F," --fuzzy use similar file as basis if basis doesn't exist\n"); rprintf(F," -P equivalent to --partial --progress\n"); rprintf(F," -z, --compress compress file data\n"); rprintf(F," -C, --cvs-exclude auto ignore files in the same way CVS does\n"); @@ -368,6 +370,7 @@ static struct poptOption long_options[] {"temp-dir", 'T', POPT_ARG_STRING, &tmpdir, 0, 0, 0 }, {"compare-dest", 0, POPT_ARG_STRING, &compare_dest, 0, 0, 0 }, {"link-dest", 0, POPT_ARG_STRING, &compare_dest, OPT_LINK_DEST, 0, 0 }, + {"fuzzy", 0, POPT_ARG_NONE, &fuzzy, 0, 0, 0 }, /* TODO: Should this take an optional int giving the compression level? */ {"compress", 'z', POPT_ARG_NONE, &do_compression, 0, 0, 0 }, {"daemon", 0, POPT_ARG_NONE, &daemon_opt, 0, 0, 0 }, @@ -989,6 +992,9 @@ void server_options(char **args,int *arg } } + if (fuzzy && am_sender) + args[ac++] = "--fuzzy"; + *argc = ac; return; --- receiver.c 29 Jun 2004 15:12:01 -0000 1.83 +++ receiver.c 29 Jun 2004 17:46:13 -0000 @@ -48,6 +48,7 @@ extern int ignore_errors; extern int orig_umask; extern int keep_partial; extern int checksum_seed; +extern int fuzzy; static void delete_one(char *fn, int is_dir) { @@ -377,6 +378,11 @@ int recv_files(int f_in,struct file_list fd1 = do_open(fnamecmp, O_RDONLY, 0); } + if (fd1 == -1 && fuzzy) { + fd1 = check_alternate_base_fuzzy(&fnamecmp, fnamecmpbuf, + NULL); + } + if (fd1 != -1 && do_fstat(fd1,&st) != 0) { rsyserr(FERROR, errno, "fstat %s failed", full_fname(fnamecmp)); --- rsync.yo 5 Jun 2004 16:16:30 -0000 1.171 +++ rsync.yo 29 Jun 2004 17:46:14 -0000 @@ -325,6 +325,7 @@ verb( -T --temp-dir=DIR create temporary files in directory DIR --compare-dest=DIR also compare received files relative to DIR --link-dest=DIR create hardlinks to DIR for unchanged files + --fuzzy use similar file as basis if basis is gone -P equivalent to --partial --progress -z, --compress compress file data -C, --cvs-exclude auto ignore files in the same way CVS does