From d7d8b8226cd72600fa05c34b6c4aa6110ebd5685 Mon Sep 17 00:00:00 2001 From: Wayne Davison Date: Sun, 13 Feb 2005 21:53:26 +0000 Subject: [PATCH] - If we find a file with an identical size & mod-time to the file on the server, prefer that over any similarly named file. - Added the short-option -y for specifying --fuzzy. --- fuzzy.diff | 59 +++++++++++++++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/fuzzy.diff b/fuzzy.diff index 657180b..b16206a 100644 --- a/fuzzy.diff +++ b/fuzzy.diff @@ -60,7 +60,7 @@ Be sure to run "make proto" before "make". /* This function is used to implement per-directory deletion, and * is used by all the --delete-WHEN options. Note that the fbuf --- orig/generator.c 2005-02-13 05:50:28 -+++ generator.c 2005-02-13 21:22:28 ++++ generator.c 2005-02-13 21:47:28 @@ -47,6 +47,7 @@ extern int size_only; extern OFF_T max_size; extern int io_timeout; @@ -69,7 +69,7 @@ Be sure to run "make proto" before "make". extern int always_checksum; extern char *partial_dir; extern char *basis_dir[]; -@@ -227,6 +228,48 @@ static void generate_and_send_sums(int f +@@ -227,6 +228,59 @@ static void generate_and_send_sums(int f unmap_file(mapbuf); } @@ -95,6 +95,17 @@ Be sure to run "make proto" before "make". + continue; + + name = fp->basename; ++ ++ if (fp->length == file->length ++ && fp->modtime == file->modtime) { ++ if (verbose > 4) { ++ rprintf(FINFO, ++ "fuzzy size/modtime match for %s\n", ++ name); ++ } ++ return j; ++ } ++ + len = strlen(name); + suf = find_filename_suffix(name, len, &suf_len); + @@ -118,7 +129,7 @@ Be sure to run "make proto" before "make". /* Acts on flist->file's ndx'th item, whose name is fname. If a directory, * make sure it exists, and has the right permissions/timestamp info. For -@@ -241,6 +284,8 @@ static void recv_generator(char *fname, +@@ -241,6 +295,8 @@ static void recv_generator(char *fname, int f_out, int f_out_name) { static int missing_below = -1; @@ -127,7 +138,7 @@ Be sure to run "make proto" before "make". int fd = -1, f_copy = -1; STRUCT_STAT st, partial_st; struct file_struct *back_file = NULL; -@@ -275,6 +320,16 @@ static void recv_generator(char *fname, +@@ -275,6 +331,16 @@ static void recv_generator(char *fname, statret = -1; stat_errno = ENOENT; } else { @@ -144,7 +155,7 @@ Be sure to run "make proto" before "make". statret = link_stat(fname, &st, keep_dirlinks && S_ISDIR(file->mode)); stat_errno = errno; -@@ -492,6 +547,24 @@ static void recv_generator(char *fname, +@@ -492,6 +558,24 @@ static void recv_generator(char *fname, } else partialptr = NULL; @@ -154,7 +165,7 @@ Be sure to run "make proto" before "make". + struct file_struct *fp = fuzzy_dirlist->files[j]; + f_name_to(fp, fnamecmpbuf); + if (verbose > 2) { -+ rprintf(FINFO, "fuzzy match for %s: %s\n", ++ rprintf(FINFO, "fuzzy basis selected for %s: %s\n", + safe_fname(fname), safe_fname(fnamecmpbuf)); + } + st.st_mode = fp->mode; @@ -169,7 +180,7 @@ Be sure to run "make proto" before "make". if (statret == -1) { if (preserve_hard_links && hard_link_check(file, HL_SKIP)) return; -@@ -520,6 +593,8 @@ static void recv_generator(char *fname, +@@ -520,6 +604,8 @@ static void recv_generator(char *fname, if (!compare_dest && fnamecmp_type <= FNAMECMP_BASIS_DIR_HIGH) ; @@ -178,7 +189,7 @@ Be sure to run "make proto" before "make". else if (unchanged_file(fnamecmp, file, &st)) { if (fnamecmp_type == FNAMECMP_FNAME) set_perms(fname, file, &st, PERMS_REPORT); -@@ -540,6 +615,11 @@ prepare_to_open: +@@ -540,6 +626,11 @@ prepare_to_open: statret = -1; goto notify_others; } @@ -190,7 +201,7 @@ Be sure to run "make proto" before "make". /* open the file */ fd = do_open(fnamecmp, O_RDONLY, 0); -@@ -594,8 +674,24 @@ notify_others: +@@ -594,8 +685,24 @@ notify_others: write_int(f_out, ndx); if (protocol_version >= 29 && inplace && !read_batch) write_byte(f_out, fnamecmp_type); @@ -237,7 +248,7 @@ Be sure to run "make proto" before "make". /* The receiving side mustn't obey this, or an existing symlink that * points to an identical file won't be replaced by the referent. */ --- orig/options.c 2005-02-13 05:50:28 -+++ options.c 2005-02-13 06:56:06 ++++ options.c 2005-02-13 21:41:41 @@ -89,6 +89,7 @@ int copy_unsafe_links = 0; int size_only = 0; int daemon_bwlimit = 0; @@ -250,7 +261,7 @@ Be sure to run "make proto" before "make". rprintf(F," --size-only skip files that match in size\n"); rprintf(F," --modify-window=NUM compare mod-times with reduced accuracy\n"); rprintf(F," -T, --temp-dir=DIR create temporary files in directory DIR\n"); -+ rprintf(F," --fuzzy find similar file for basis when no dest file\n"); ++ rprintf(F," -y, --fuzzy find similar file for basis if no dest file\n"); rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n"); rprintf(F," --copy-dest=DIR ... and include copies of unchanged files\n"); rprintf(F," --link-dest=DIR hardlink to files in DIR when unchanged\n"); @@ -258,7 +269,7 @@ Be sure to run "make proto" before "make". {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 }, {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 }, {"link-dest", 0, POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 }, -+ {"fuzzy", 0, POPT_ARG_NONE, &fuzzy_basis, 0, 0, 0 }, ++ {"fuzzy", 'y', POPT_ARG_NONE, &fuzzy_basis, 0, 0, 0 }, /* TODO: Should this take an optional int giving the compression level? */ {"compress", 'z', POPT_ARG_NONE, &do_compression, 0, 0, 0 }, {"stats", 0, POPT_ARG_NONE, &do_stats, 0, 0, 0 }, @@ -331,27 +342,29 @@ Be sure to run "make proto" before "make". /* For calling delete_file() */ #define DEL_DIR (1<<0) ---- orig/rsync.yo 2005-02-13 05:50:28 -+++ rsync.yo 2005-02-13 06:56:46 +--- orig/rsync.yo 2005-02-13 21:51:10 ++++ rsync.yo 2005-02-13 21:41:52 @@ -351,6 +351,7 @@ to the detailed description below for a --size-only skip files that match in size --modify-window=NUM compare mod-times with reduced accuracy - -T --temp-dir=DIR create temporary files in directory DIR -+ --fuzzy find similar file for basis when no dest + -T, --temp-dir=DIR create temporary files in directory DIR ++ -y, --fuzzy find similar file for basis if no dest file --compare-dest=DIR also compare received files relative to DIR --copy-dest=DIR ... and include copies of unchanged files --link-dest=DIR hardlink to files in DIR when unchanged -@@ -909,6 +910,14 @@ scratch directory when creating temporar +@@ -909,6 +910,16 @@ scratch directory when creating temporar transferred on the receiving side. The default behavior is to create the temporary files in the receiving directory. -+dit(bf(--fuzzy)) This option tells rsync that it should look around for a ++dit(bf(-y, --fuzzy)) This option tells rsync that it should look for a +basis file for any destination file that is missing. The current algorithm -+looks for a similarly-named file in the same directory as the destination -+file, and, if found, uses that to try to speed up the transfer. Note that -+the use of the --delete option might get rid of any potential fuzzy-match -+files, so either use --delete-after or filename exclusions if you need to -+prevent this. ++looks in the same directory as the destination file for either a file that ++has an identical size and modified-time, or a similarly-named file. If ++found, rsync uses the fuzzy basis file to try to speed up the transfer. ++ ++Note that the use of the bf(--delete) option might get rid of any potential ++fuzzy-match files, so either use bf(--delete-after) or specify some ++filename exclusions if you need to prevent this. + dit(bf(--compare-dest=DIR)) This option instructs rsync to use em(DIR) on the destination machine as an additional hierarchy to compare destination -- 2.34.1