Depends-On-Patch: g2r-basis-filename.diff The changes to generator.c were greatly simplified, making the patch easier to maintain and fixing the failing test in the testsuite. Very lightly tested. Be sure to run "make proto" before "make". --- orig/generator.c 2004-07-17 15:50:09 +++ generator.c 2004-07-20 21:49:24 @@ -41,6 +41,7 @@ extern int ignore_times; extern int size_only; extern int io_timeout; extern int protocol_version; +extern int fuzzy; extern int always_checksum; extern char *compare_dest; extern int link_dest; @@ -248,6 +249,94 @@ static void generate_and_send_sums(int f } +static void split_names(char *fname, char **dirname, char **basename) +{ + char *slash = strrchr(fname, '/'); + if (slash) { + *dirname = fname; + *slash = '\0'; + *basename = slash+1; + } else { + *basename = fname; + *dirname = "."; + } +} + + +static unsigned int measure_name(const char *name, const char *basename, + const char *ext) +{ + int namelen = strlen(name); + int extlen = strlen(ext); + unsigned int score = 0; + + /* Extensions must match */ + if (namelen <= extlen || strcmp(name + namelen - extlen, ext) != 0) + return 0; + + /* Now score depends on similarity of prefix */ + for (; *name == *basename && *name; name++, basename++) + score++; + return score; +} + + +static int find_fuzzy(char **fname_ptr, char *buf, STRUCT_STAT *st_ptr) +{ + DIR *d; + struct dirent *di; + char *basename, *dirname; + char mangled_name[MAXPATHLEN]; + char bestname[MAXPATHLEN]; + unsigned int bestscore = 0; + const char *ext; + + strlcpy(mangled_name, *fname_ptr, sizeof mangled_name); + + split_names(mangled_name, &dirname, &basename); + if (!(d = opendir(dirname))) { + rsyserr(FERROR, errno, "recv_generator opendir(%s)", dirname); + return -1; + } + + /* Get final extension, eg. .gz; never full basename though. */ + ext = strrchr(basename + 1, '.'); + if (!ext) + ext = basename + strlen(basename); /* ext = "" */ + + while ((di = readdir(d)) != NULL) { + const char *dname = d_name(di); + unsigned int score; + + if (dname[0] == '.' && (dname[1] == '\0' + || (dname[1] == '.' && dname[2] == '\0'))) + continue; + + score = measure_name(dname, basename, ext); + if (verbose > 4) { + rprintf(FINFO, "[%s] fuzzy score for %s = %u\n", + who_am_i(), dname, score); + } + if (score > bestscore) { + strlcpy(bestname, dname, sizeof bestname); + bestscore = score; + } + } + closedir(d); + + /* Found a candidate. */ + if (bestscore != 0) { + pathjoin(buf, MAXPATHLEN, dirname, bestname); + if (verbose > 2) { + rprintf(FINFO, "[%s] fuzzy match %s->%s\n", + who_am_i(), *fname_ptr, buf); + } + *fname_ptr = buf; + return link_stat(buf, st_ptr, 0); + } + return -1; +} + /* * Acts on file number @p i from @p flist, whose name is @p fname. @@ -262,7 +351,7 @@ static void recv_generator(char *fname, { int fd = -1; STRUCT_STAT st; - int statret; + int statret, fuzzy_file = 0; char *fnamecmp; char fnamecmpbuf[MAXPATHLEN]; @@ -442,6 +531,14 @@ static void recv_generator(char *fname, } else *fnamecmpbuf = '\0'; + if (statret == -1 && fuzzy) { + statret = find_fuzzy(&fnamecmp, fnamecmpbuf, &st); + if (!S_ISREG(st.st_mode)) + statret = -1; + else + fuzzy_file = 1; + } + if (statret == -1) { if (preserve_hard_links && hard_link_check(file, HL_SKIP)) return; @@ -479,7 +576,7 @@ static void recv_generator(char *fname, return; } - if (skip_file(fname, file, &st)) { + if (!fuzzy_file && skip_file(fname, file, &st)) { if (!*fnamecmpbuf) set_perms(fname, file, &st, PERMS_REPORT); return; --- orig/main.c 2004-07-17 15:58:11 +++ main.c 2004-07-17 16:32:39 @@ -47,6 +47,7 @@ extern int keep_dirlinks; extern int preserve_hard_links; extern int protocol_version; extern int recurse; +extern int fuzzy; extern int relative_paths; extern int rsync_port; extern int whole_file; @@ -446,7 +447,7 @@ static int do_recv(int f_in,int f_out,st int pid; int status = 0; int error_pipe[2], name_pipe[2]; - int need_name_pipe = compare_dest || read_batch; + int need_name_pipe = compare_dest || fuzzy || read_batch; if (preserve_hard_links) init_hard_links(flist); --- orig/options.c 2004-07-20 21:36:07 +++ options.c 2004-07-16 20:14:12 @@ -85,6 +85,7 @@ int safe_symlinks = 0; int copy_unsafe_links = 0; int size_only = 0; int bwlimit = 0; +int fuzzy = 0; size_t bwlimit_writemax = 0; int delete_after = 0; int only_existing = 0; @@ -276,6 +277,7 @@ void usage(enum logcode F) rprintf(F," -T --temp-dir=DIR create temporary files in directory DIR\n"); rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n"); rprintf(F," --link-dest=DIR create hardlinks to DIR for unchanged files\n"); + rprintf(F," --fuzzy use similar file as basis if basis doesn't exist\n"); rprintf(F," -P equivalent to --partial --progress\n"); rprintf(F," -z, --compress compress file data\n"); rprintf(F," -C, --cvs-exclude auto ignore files in the same way CVS does\n"); @@ -375,6 +377,7 @@ static struct poptOption long_options[] {"temp-dir", 'T', POPT_ARG_STRING, &tmpdir, 0, 0, 0 }, {"compare-dest", 0, POPT_ARG_STRING, &compare_dest, 0, 0, 0 }, {"link-dest", 0, POPT_ARG_STRING, &compare_dest, OPT_LINK_DEST, 0, 0 }, + {"fuzzy", 0, POPT_ARG_NONE, &fuzzy, 0, 0, 0 }, /* TODO: Should this take an optional int giving the compression level? */ {"compress", 'z', POPT_ARG_NONE, &do_compression, 0, 0, 0 }, {"daemon", 0, POPT_ARG_NONE, &daemon_opt, 0, 0, 0 }, @@ -1019,6 +1022,9 @@ void server_options(char **args,int *arg } } + if (fuzzy && am_sender) + args[ac++] = "--fuzzy"; + *argc = ac; return; --- orig/receiver.c 2004-07-19 16:44:39 +++ receiver.c 2004-07-03 20:09:05 @@ -37,7 +37,6 @@ extern int preserve_perms; extern int cvs_exclude; extern int io_error; extern char *tmpdir; -extern char *compare_dest; extern int make_backups; extern int do_progress; extern char *backup_dir; --- orig/rsync.yo 2004-07-20 21:36:08 +++ rsync.yo 2004-07-03 19:27:25 @@ -326,6 +326,7 @@ verb( -T --temp-dir=DIR create temporary files in directory DIR --compare-dest=DIR also compare received files relative to DIR --link-dest=DIR create hardlinks to DIR for unchanged files + --fuzzy use similar file as basis if basis is gone -P equivalent to --partial --progress -z, --compress compress file data -C, --cvs-exclude auto ignore files in the same way CVS does