X-Git-Url: https://mattmccutchen.net/rsync/rsync-patches.git/blobdiff_plain/ec8473c68113a0605f71ace3a21662578319f07c..0f6260347bf6c29859ed4808f04222f84c2a8817:/fuzzy.diff diff --git a/fuzzy.diff b/fuzzy.diff index 5b3b7da..73d6b74 100644 --- a/fuzzy.diff +++ b/fuzzy.diff @@ -1,39 +1,23 @@ -Depends-On-Patch: g2r-basis-filename.diff - The changes to generator.c were greatly simplified, making the patch easier to maintain and fixing the failing test in the testsuite. -Very lightly tested. +Lightly tested. Be sure to run "make proto" before "make". ---- orig/generator.c 2004-07-28 10:14:15 -+++ generator.c 2004-07-28 10:23:12 -@@ -41,6 +41,7 @@ extern int ignore_times; - extern int size_only; +--- orig/generator.c 2005-01-17 23:11:45 ++++ generator.c 2005-01-16 02:16:38 +@@ -44,6 +44,7 @@ extern int size_only; + extern OFF_T max_size; extern int io_timeout; extern int protocol_version; -+extern int fuzzy; ++extern int fuzzy_basis; extern int always_checksum; extern char *partial_dir; - extern char *compare_dest; -@@ -240,6 +241,94 @@ static void generate_and_send_sums(int f + extern char *basis_dir[]; +@@ -242,6 +243,83 @@ static void generate_and_send_sums(int f } -+static void split_names(char *fname, char **dirname, char **basename) -+{ -+ char *slash = strrchr(fname, '/'); -+ if (slash) { -+ *dirname = fname; -+ *slash = '\0'; -+ *basename = slash+1; -+ } else { -+ *basename = fname; -+ *dirname = "."; -+ } -+} -+ -+ +static unsigned int measure_name(const char *name, const char *basename, + const char *ext) +{ @@ -52,27 +36,33 @@ Be sure to run "make proto" before "make". +} + + -+static int find_fuzzy(char **fname_ptr, char *buf, STRUCT_STAT *st_ptr) ++static int find_fuzzy(const char *fname, char *buf, STRUCT_STAT *st_ptr) +{ + DIR *d; + struct dirent *di; -+ char *basename, *dirname; -+ char mangled_name[MAXPATHLEN]; ++ char *basename, *dirname, *slash; + char bestname[MAXPATHLEN]; + unsigned int bestscore = 0; + const char *ext; + -+ strlcpy(mangled_name, *fname_ptr, sizeof mangled_name); ++ strlcpy(buf, fname, MAXPATHLEN); ++ if ((slash = strrchr(buf, '/')) != NULL) { ++ dirname = buf; ++ *slash = '\0'; ++ basename = slash + 1; ++ } else { ++ basename = buf; ++ dirname = "."; ++ } + -+ split_names(mangled_name, &dirname, &basename); + if (!(d = opendir(dirname))) { + rsyserr(FERROR, errno, "recv_generator opendir(%s)", dirname); + return -1; + } + + /* Get final extension, eg. .gz; never full basename though. */ -+ ext = strrchr(basename + 1, '.'); -+ if (!ext) ++ for (ext = basename; *ext == '.'; ext++) {} ++ if (!(ext = strrchr(ext, '.'))) + ext = basename + strlen(basename); /* ext = "" */ + + while ((di = readdir(d)) != NULL) { @@ -85,8 +75,8 @@ Be sure to run "make proto" before "make". + + score = measure_name(dname, basename, ext); + if (verbose > 4) { -+ rprintf(FINFO, "[%s] fuzzy score for %s = %u\n", -+ who_am_i(), dname, score); ++ rprintf(FINFO, "fuzzy score for %s = %u\n", ++ dname, score); + } + if (score > bestscore) { + strlcpy(bestname, dname, sizeof bestname); @@ -97,12 +87,9 @@ Be sure to run "make proto" before "make". + + /* Found a candidate. */ + if (bestscore != 0) { -+ pathjoin(buf, MAXPATHLEN, dirname, bestname); -+ if (verbose > 2) { -+ rprintf(FINFO, "[%s] fuzzy match %s->%s\n", -+ who_am_i(), *fname_ptr, buf); -+ } -+ *fname_ptr = buf; ++ strlcpy(basename, MAXPATHLEN - (basename - buf), bestname); ++ if (verbose > 2) ++ rprintf(FINFO, "fuzzy match %s->%s\n", fname, buf); + return link_stat(buf, st_ptr, 0); + } + return -1; @@ -111,111 +98,183 @@ Be sure to run "make proto" before "make". /* * Acts on file number @p i from @p flist, whose name is @p fname. -@@ -254,7 +343,7 @@ static void recv_generator(char *fname, - { - int fd = -1; - STRUCT_STAT st; -- int statret, stat_errno; -+ int statret, stat_errno, fuzzy_file = 0; - char *fnamecmp; - char fnamecmpbuf[MAXPATHLEN]; - -@@ -439,6 +528,14 @@ static void recv_generator(char *fname, +@@ -496,6 +574,15 @@ static void recv_generator(char *fname, } else - *fnamecmpbuf = '\0'; + partialptr = NULL; -+ if (statret == -1 && fuzzy) { -+ statret = find_fuzzy(&fnamecmp, fnamecmpbuf, &st); -+ if (!S_ISREG(st.st_mode)) -+ statret = -1; -+ else -+ fuzzy_file = 1; ++ if (statret == -1 && fuzzy_basis) { ++ if (find_fuzzy(fname, fnamecmpbuf, &st) == 0 ++ && S_ISREG(st.st_mode)) { ++ statret = 0; ++ fnamecmp = fnamecmpbuf; ++ fnamecmp_type = FNAMECMP_FUZZY; ++ } + } + - if (statret == 0 && !S_ISREG(st.st_mode)) { - if (delete_file(fname) != 0) + if (statret == -1) { + if (preserve_hard_links && hard_link_check(file, HL_SKIP)) return; -@@ -472,7 +569,7 @@ static void recv_generator(char *fname, - return; - } +@@ -524,6 +611,8 @@ static void recv_generator(char *fname, -- if (skip_file(fnamecmp, file, &st)) { -+ if (!fuzzy_file && skip_file(fnamecmp, file, &st)) { - if (!*fnamecmpbuf) + if (!compare_dest && fnamecmp_type <= FNAMECMP_BASIS_DIR_HIGH) + ; ++ else if (fnamecmp_type == FNAMECMP_FUZZY) ++ ; + else if (unchanged_file(fnamecmp, file, &st)) { + if (fnamecmp_type == FNAMECMP_FNAME) set_perms(fname, file, &st, PERMS_REPORT); +@@ -598,8 +687,24 @@ notify_others: + write_int(f_out, i); + if (protocol_version >= 29 && inplace && !read_batch) + write_byte(f_out, fnamecmp_type); +- if (f_out_name >= 0) ++ if (f_out_name >= 0) { + write_byte(f_out_name, fnamecmp_type); ++ if (fnamecmp_type == FNAMECMP_FUZZY) { ++ uchar lenbuf[3], *lb = lenbuf; ++ int len = strlen(fnamecmpbuf); ++ if (len > 0x7F) { ++#if MAXPATHLEN > 0x7FFF ++ *lb++ = len / 0x10000 + 0x80; ++ *lb++ = len / 0x100; ++#else ++ *lb++ = len / 0x100 + 0x80; ++#endif ++ } ++ *lb = len; ++ write_buf(f_out_name, lenbuf, lb - lenbuf + 1); ++ write_buf(f_out_name, fnamecmpbuf, len); ++ } ++ } + + if (dry_run || read_batch) return; ---- orig/main.c 2004-07-22 00:10:43 -+++ main.c 2004-07-22 00:32:31 -@@ -47,6 +47,7 @@ extern int keep_dirlinks; +--- orig/main.c 2005-01-17 23:11:45 ++++ main.c 2005-01-14 18:33:15 +@@ -48,6 +48,7 @@ extern int keep_dirlinks; extern int preserve_hard_links; extern int protocol_version; extern int recurse; -+extern int fuzzy; ++extern int fuzzy_basis; extern int relative_paths; extern int rsync_port; extern int whole_file; -@@ -458,7 +459,7 @@ static int do_recv(int f_in,int f_out,st +@@ -464,7 +465,8 @@ static int do_recv(int f_in,int f_out,st int pid; int status = 0; int error_pipe[2], name_pipe[2]; -- BOOL need_name_pipe = compare_dest && !dry_run; -+ BOOL need_name_pipe = (compare_dest || fuzzy) && !dry_run; +- BOOL need_name_pipe = (basis_dir[0] || partial_dir) && !dry_run; ++ BOOL need_name_pipe = (basis_dir[0] || partial_dir || fuzzy_basis) ++ && !dry_run; - if (preserve_hard_links) - init_hard_links(flist); ---- orig/options.c 2004-07-29 16:08:03 -+++ options.c 2004-07-16 20:14:12 -@@ -85,6 +85,7 @@ int safe_symlinks = 0; - int copy_unsafe_links = 0; + /* The receiving side mustn't obey this, or an existing symlink that + * points to an identical file won't be replaced by the referent. */ +--- orig/options.c 2005-01-17 23:11:45 ++++ options.c 2005-01-15 21:08:13 +@@ -86,6 +86,7 @@ int copy_unsafe_links = 0; int size_only = 0; + int daemon_bwlimit = 0; int bwlimit = 0; -+int fuzzy = 0; ++int fuzzy_basis = 0; size_t bwlimit_writemax = 0; int delete_after = 0; int only_existing = 0; -@@ -279,6 +280,7 @@ void usage(enum logcode F) - rprintf(F," -T, --temp-dir=DIR create temporary files in directory DIR\n"); +@@ -288,6 +289,7 @@ void usage(enum logcode F) rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n"); - rprintf(F," --link-dest=DIR create hardlinks to DIR for unchanged files\n"); -+ rprintf(F," --fuzzy use similar file as basis if basis doesn't exist\n"); + rprintf(F," --copy-dest=DIR ... and include copies of unchanged files\n"); + rprintf(F," --link-dest=DIR hardlink to files in DIR when unchanged\n"); ++ rprintf(F," --fuzzy find similar file for basis when no dest file\n"); rprintf(F," -P equivalent to --partial --progress\n"); rprintf(F," -z, --compress compress file data\n"); rprintf(F," -C, --cvs-exclude auto ignore files in the same way CVS does\n"); -@@ -378,6 +380,7 @@ static struct poptOption long_options[] - {"temp-dir", 'T', POPT_ARG_STRING, &tmpdir, 0, 0, 0 }, - {"compare-dest", 0, POPT_ARG_STRING, &compare_dest, 0, 0, 0 }, - {"link-dest", 0, POPT_ARG_STRING, &compare_dest, OPT_LINK_DEST, 0, 0 }, -+ {"fuzzy", 0, POPT_ARG_NONE, &fuzzy, 0, 0, 0 }, +@@ -384,6 +386,7 @@ static struct poptOption long_options[] + {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 }, + {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 }, + {"link-dest", 0, POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 }, ++ {"fuzzy", 0, POPT_ARG_NONE, &fuzzy_basis, 0, 0, 0 }, /* TODO: Should this take an optional int giving the compression level? */ {"compress", 'z', POPT_ARG_NONE, &do_compression, 0, 0, 0 }, - {"daemon", 0, POPT_ARG_NONE, &daemon_opt, 0, 0, 0 }, -@@ -1042,6 +1045,9 @@ void server_options(char **args,int *arg - } + {"stats", 0, POPT_ARG_NONE, &do_stats, 0, 0, 0 }, +@@ -1234,6 +1237,9 @@ void server_options(char **args,int *arg + args[ac++] = "--no-relative"; } -+ if (fuzzy && am_sender) ++ if (fuzzy_basis && am_sender) + args[ac++] = "--fuzzy"; + *argc = ac; return; ---- orig/receiver.c 2004-07-23 21:59:07 -+++ receiver.c 2004-07-23 22:08:03 -@@ -39,7 +39,6 @@ extern int cvs_exclude; - extern int io_error; - extern char *tmpdir; - extern char *partial_dir; --extern char *compare_dest; - extern int make_backups; - extern int do_progress; - extern char *backup_dir; ---- orig/rsync.yo 2004-07-29 16:08:04 -+++ rsync.yo 2004-07-03 19:27:25 -@@ -327,6 +327,7 @@ verb( - -T --temp-dir=DIR create temporary files in directory DIR +--- orig/receiver.c 2005-01-17 23:11:45 ++++ receiver.c 2005-01-15 21:21:02 +@@ -324,6 +324,27 @@ static int receive_data(int f_in, char * + } + + ++static void read_gen_name(int fd, char *buf) ++{ ++ int len = read_byte(fd); ++ if (len & 0x80) { ++#if MAXPATHLEN > 32767 ++ uchar lenbuf[2]; ++ read_buf(fd, (char *)lenbuf, 2); ++ len = (len & ~0x80) * 0x10000 + lenbuf[0] * 0x100 + lenbuf[1]; ++#else ++ len = (len & ~0x80) * 0x100 + read_byte(fd); ++#endif ++ } ++ if (len >= MAXPATHLEN) { ++ rprintf(FERROR, "bogus data on generator name pipe\n"); ++ exit_cleanup(RERR_PROTOCOL); ++ } ++ ++ read_sbuf(fd, buf, len); ++} ++ ++ + static void discard_receive_data(int f_in, OFF_T length) + { + receive_data(f_in, NULL, -1, 0, NULL, -1, length); +@@ -454,6 +475,10 @@ int recv_files(int f_in, struct file_lis + case FNAMECMP_BACKUP: + fnamecmp = get_backup_name(fname); + break; ++ case FNAMECMP_FUZZY: ++ read_gen_name(f_in_name, fnamecmpbuf); ++ fnamecmp = fnamecmpbuf; ++ break; + default: + if (j >= basis_dir_cnt) { + rprintf(FERROR, +--- orig/rsync.h 2005-01-17 23:11:45 ++++ rsync.h 2005-01-15 21:24:09 +@@ -128,6 +128,7 @@ + #define FNAMECMP_FNAME 0x80 + #define FNAMECMP_PARTIAL_DIR 0x81 + #define FNAMECMP_BACKUP 0x82 ++#define FNAMECMP_FUZZY 0x83 + + + /* Log-message categories. FLOG is only used on the daemon side to +--- orig/rsync.yo 2005-01-17 23:11:46 ++++ rsync.yo 2005-01-15 21:48:52 +@@ -358,6 +358,7 @@ verb( --compare-dest=DIR also compare received files relative to DIR - --link-dest=DIR create hardlinks to DIR for unchanged files -+ --fuzzy use similar file as basis if basis is gone + --copy-dest=DIR ... and include copies of unchanged files + --link-dest=DIR hardlink to files in DIR when unchanged ++ --fuzzy find similar file for basis when no dest -P equivalent to --partial --progress -z, --compress compress file data -C, --cvs-exclude auto ignore files in the same way CVS does +@@ -878,6 +879,11 @@ Note that rsync versions prior to 2.6.1 + (or implied by -a). You can work-around this bug by avoiding the -o option + when sending to an old rsync. + ++dit(bf(--fuzzy)) This option tells rsync that it should look around for a ++basis file for any destination file that is missing. The current algorithm ++looks for a similarly-named file in the same directory as the destination ++file, and, if found, uses that to try to speed up the transfer. ++ + dit(bf(-z, --compress)) With this option, rsync compresses any data from + the files that it sends to the destination machine. This + option is useful on slow connections. The compression method used is the