From: Martin Pool Date: Wed, 3 Apr 2002 09:56:50 +0000 (+0000) Subject: Patch from Rusty Russell to support --fuzzy, thereby handling related X-Git-Url: https://mattmccutchen.net/rsync/rsync-patches.git/commitdiff_plain/241013b4fc9bf65aea632cda139241017c7db0d6 Patch from Rusty Russell to support --fuzzy, thereby handling related files with similar names. --- diff --git a/fuzzy.diff b/fuzzy.diff new file mode 100644 index 0000000..2d7b88e --- /dev/null +++ b/fuzzy.diff @@ -0,0 +1,485 @@ +From rusty@rustcorp.com.au Wed Apr 3 17:18:42 2002 +Return-Path: +Delivered-To: mbp@samba.org +Received: from wagner.rustcorp.com.au (sydney1.au.ibm.com [202.135.142.193]) + by lists.samba.org (Postfix) with ESMTP id EA7B849DC + for ; Tue, 2 Apr 2002 23:06:29 -0800 (PST) +Received: from wagner.rustcorp.com.au ([127.0.0.1] helo=rustcorp.com.au) + by wagner.rustcorp.com.au with esmtp (Exim 3.35 #1 (Debian)) + id 16set7-0000pL-00 + for ; Wed, 03 Apr 2002 17:08:57 +1000 +From: Rusty Russell +To: Martin Pool +Subject: Re: gzip patch +In-reply-to: Your message of "Wed, 03 Apr 2002 12:04:59 +1000." + <20020403020455.GC18851@samba.org> +Date: Wed, 03 Apr 2002 17:08:57 +1000 +Sender: rusty@rustcorp.com.au +Message-Id: +Status: RO +X-Status: A +Content-Length: 12810 +Lines: 461 + +In message <20020403020455.GC18851@samba.org> you write: +> Hi, +> +> I think you said the other day that you had a working --rsyncable +> patch for gzip. Could I have it please? + +Hi Martin, + + Just got your mail, sorry for the delay. Found old patch on +google, and updated it for 2.5.4 (I know, but that's what apt-get +source gave me). + +Compiles, otherwise untested. +Rusty. +-- + Anyone who quotes me in their sig is an idiot. -- Rusty Russell. + +diff -urN rsync-2.5.4/Makefile.in rsync-2.5.4-fuzzy/Makefile.in +--- rsync-2.5.4/Makefile.in Tue Feb 26 05:48:25 2002 ++++ rsync-2.5.4-fuzzy/Makefile.in Wed Apr 3 16:35:55 2002 +@@ -28,7 +28,7 @@ + ZLIBOBJ=zlib/deflate.o zlib/infblock.o zlib/infcodes.o zlib/inffast.o \ + zlib/inflate.o zlib/inftrees.o zlib/infutil.o zlib/trees.o \ + zlib/zutil.o zlib/adler32.o +-OBJS1=rsync.o generator.o receiver.o cleanup.o sender.o exclude.o util.o main.o checksum.o match.o syscall.o log.o backup.o ++OBJS1=rsync.o generator.o receiver.o cleanup.o sender.o exclude.o util.o main.o checksum.o match.o syscall.o log.o backup.o alternate.o + OBJS2=options.o flist.o io.o compat.o hlink.o token.o uidlist.o socket.o fileio.o batch.o \ + clientname.o + DAEMON_OBJ = params.o loadparm.o clientserver.o access.o connection.o authenticate.o +diff -urN rsync-2.5.4/alternate.c rsync-2.5.4-fuzzy/alternate.c +--- rsync-2.5.4/alternate.c Thu Jan 1 10:00:00 1970 ++++ rsync-2.5.4-fuzzy/alternate.c Wed Apr 3 17:04:15 2002 +@@ -0,0 +1,117 @@ ++#include "rsync.h" ++ ++extern char *compare_dest; ++extern int verbose; ++ ++/* Alternate methods for opening files, if local doesn't exist */ ++/* Sanity check that we are about to open regular file */ ++int do_open_regular(char *fname) ++{ ++ STRUCT_STAT st; ++ ++ if (do_stat(fname, &st) == 0 && S_ISREG(st.st_mode)) ++ return do_open(fname, O_RDONLY, 0); ++ ++ return -1; ++} ++ ++static void split_names(char *fname, char **dirname, char **basename) ++{ ++ char *slash; ++ ++ slash = strrchr(fname, '/'); ++ if (slash) { ++ *dirname = fname; ++ *slash = '\0'; ++ *basename = slash+1; ++ } else { ++ *basename = fname; ++ *dirname = "."; ++ } ++} ++ ++static unsigned int measure_name(const char *name, ++ const char *basename, ++ const char *ext) ++{ ++ int namelen = strlen(name); ++ int extlen = strlen(ext); ++ unsigned int score = 0; ++ ++ /* Extensions must match */ ++ if (namelen <= extlen || strcmp(name+namelen-extlen, ext) != 0) ++ return 0; ++ ++ /* Now score depends on similarity of prefix */ ++ for (; *name==*basename && *name; name++, basename++) ++ score++; ++ return score; ++} ++ ++int open_alternate_base_fuzzy(const char *fname) ++{ ++ DIR *d; ++ struct dirent *di; ++ char *basename, *dirname; ++ char mangled_name[MAXPATHLEN]; ++ char bestname[MAXPATHLEN]; ++ unsigned int bestscore = 0; ++ const char *ext; ++ ++ /* FIXME: can we assume fname fits here? */ ++ strcpy(mangled_name, fname); ++ ++ split_names(mangled_name, &dirname, &basename); ++ d = opendir(dirname); ++ if (!d) { ++ rprintf(FERROR,"recv_generator opendir(%s): %s\n", ++ dirname,strerror(errno)); ++ return -1; ++ } ++ ++ /* Get final extension, eg. .gz; never full basename though. */ ++ ext = strrchr(basename + 1, '.'); ++ if (!ext) ++ ext = basename + strlen(basename); /* ext = "" */ ++ ++ while ((di = readdir(d)) != NULL) { ++ const char *dname = d_name(di); ++ unsigned int score; ++ ++ if (strcmp(dname,".")==0 || ++ strcmp(dname,"..")==0) ++ continue; ++ ++ score = measure_name(dname, basename, ext); ++ if (verbose > 4) ++ rprintf(FINFO,"fuzzy score for %s = %u\n", ++ dname, score); ++ if (score > bestscore) { ++ strcpy(bestname, dname); ++ bestscore = score; ++ } ++ } ++ closedir(d); ++ ++ /* Found a candidate. */ ++ if (bestscore != 0) { ++ char fuzzyname[MAXPATHLEN]; ++ ++ snprintf(fuzzyname,MAXPATHLEN,"%s/%s", dirname, bestname); ++ if (verbose > 2) ++ rprintf(FINFO,"fuzzy match %s->%s\n", ++ fname, fuzzyname); ++ return do_open_regular(fuzzyname); ++ } ++ return -1; ++} ++ ++int open_alternate_base_comparedir(const char *fname) ++{ ++ char fnamebuf[MAXPATHLEN]; ++ /* try the file at compare_dest instead */ ++ snprintf(fnamebuf,MAXPATHLEN,"%s/%s",compare_dest,fname); ++ ++ /* FIXME: now follows symlinks... */ ++ return do_open_regular(fnamebuf); ++} +diff -urN rsync-2.5.4/generator.c rsync-2.5.4-fuzzy/generator.c +--- rsync-2.5.4/generator.c Fri Feb 8 03:36:12 2002 ++++ rsync-2.5.4-fuzzy/generator.c Wed Apr 3 17:00:06 2002 +@@ -42,11 +42,12 @@ + extern int always_checksum; + extern int modify_window; + extern char *compare_dest; ++extern int fuzzy; + + + /* choose whether to skip a particular file */ + static int skip_file(char *fname, +- struct file_struct *file, STRUCT_STAT *st) ++ struct file_struct *file, const STRUCT_STAT *st) + { + if (st->st_size != file->length) { + return 0; +@@ -185,7 +186,61 @@ + return s; + } + ++/* Returns -1 for can't open (null file), -2 for skip */ ++static int open_base_file(struct file_struct *file, ++ char *fname, ++ int statret, ++ STRUCT_STAT *st) ++{ ++ int fd = -1; ++ ++ if (statret == 0) { ++ if (S_ISREG(st->st_mode)) { ++ if (update_only ++ && cmp_modtime(st->st_mtime, file->modtime) > 0) { ++ if (verbose > 1) ++ rprintf(FINFO,"%s is newer\n",fname); ++ return -2; ++ } ++ if (skip_file(fname, file, st)) { ++ set_perms(fname, file, st, 1); ++ return -2; ++ } ++ fd = do_open(fname, O_RDONLY, 0); ++ if (fd == -1) { ++ rprintf(FERROR,"failed to open %s, continuing : %s\n",fname,strerror(errno)); ++ return -1; ++ } else ++ return fd; ++ } else { ++ /* Try to use symlink contents */ ++ if (S_ISLNK(st->st_mode)) { ++ fd = do_open_regular(fname); ++ /* Don't delete yet; receiver will need it */ ++ } else { ++ if (delete_file(fname) != 0) { ++ if (fd != -1) ++ close(fd); ++ return -2; ++ } ++ } ++ } ++ } ++ ++ if (fd == -1 && compare_dest != NULL) ++ fd = open_alternate_base_comparedir(fname); + ++ if (fd == -1 && fuzzy) ++ fd = open_alternate_base_fuzzy(fname); ++ ++ /* Update stat to understand size */ ++ if (fd != -1) { ++ if (do_fstat(fd, st) != 0) ++ rprintf(FERROR,"fstat %s : %s\n",fname,strerror(errno)); ++ } ++ ++ return fd; ++} + + /* + * Acts on file number I from FLIST, whose name is fname. +@@ -203,9 +258,6 @@ + struct sum_struct *s; + int statret; + struct file_struct *file = flist->files[i]; +- char *fnamecmp; +- char fnamecmpbuf[MAXPATHLEN]; +- extern char *compare_dest; + extern int list_only; + extern int preserve_perms; + extern int only_existing; +@@ -341,82 +393,29 @@ + return; + } + +- fnamecmp = fname; +- +- if ((statret == -1) && (compare_dest != NULL)) { +- /* try the file at compare_dest instead */ +- int saveerrno = errno; +- snprintf(fnamecmpbuf,MAXPATHLEN,"%s/%s",compare_dest,fname); +- statret = link_stat(fnamecmpbuf,&st); +- if (!S_ISREG(st.st_mode)) +- statret = -1; +- if (statret == -1) +- errno = saveerrno; +- else +- fnamecmp = fnamecmpbuf; +- } +- +- if (statret == -1) { +- if (errno == ENOENT) { +- write_int(f_out,i); +- if (!dry_run) send_sums(NULL,f_out); +- } else { +- if (verbose > 1) +- rprintf(FERROR, RSYNC_NAME +- ": recv_generator failed to open \"%s\": %s\n", +- fname, strerror(errno)); +- } +- return; +- } +- +- if (!S_ISREG(st.st_mode)) { +- if (delete_file(fname) != 0) { +- return; +- } +- +- /* now pretend the file didn't exist */ +- write_int(f_out,i); +- if (!dry_run) send_sums(NULL,f_out); +- return; +- } +- +- if (opt_ignore_existing && fnamecmp == fname) { +- if (verbose > 1) +- rprintf(FINFO,"%s exists\n",fname); +- return; +- } +- +- if (update_only && cmp_modtime(st.st_mtime,file->modtime)>0 && fnamecmp == fname) { ++ /* Failed to stat for some other reason. */ ++ if (statret == -1 && errno != ENOENT) { + if (verbose > 1) +- rprintf(FINFO,"%s is newer\n",fname); ++ rprintf(FERROR, RSYNC_NAME ++ ": recv_generator failed to open \"%s\": %s\n", ++ fname, strerror(errno)); + return; + } + +- if (skip_file(fname, file, &st)) { +- if (fnamecmp == fname) +- set_perms(fname,file,&st,1); +- return; +- } +- +- if (dry_run) { +- write_int(f_out,i); ++ fd = open_base_file(file, fname, statret, &st); ++ if (fd == -2) + return; +- } +- +- if (whole_file) { +- write_int(f_out,i); +- send_sums(NULL,f_out); +- return; +- } +- +- /* open the file */ +- fd = do_open(fnamecmp, O_RDONLY, 0); + +- if (fd == -1) { +- rprintf(FERROR,RSYNC_NAME": failed to open \"%s\", continuing : %s\n",fnamecmp,strerror(errno)); +- /* pretend the file didn't exist */ ++ if ((whole_file || dry_run) && fd != -1) { ++ close(fd); ++ fd = -1; ++ } ++ ++ if (fd == -1) { ++ /* the file didn't exist, or we can pretend it doesn't */ + write_int(f_out,i); +- send_sums(NULL,f_out); ++ if (!dry_run) ++ send_sums(NULL,f_out); + return; + } + +@@ -427,7 +426,7 @@ + } + + if (verbose > 3) +- rprintf(FINFO,"gen mapped %s of size %.0f\n",fnamecmp,(double)st.st_size); ++ rprintf(FINFO,"gen mapped %s of size %.0f\n",fname,(double)st.st_size); + + s = generate_sums(buf,st.st_size,adapt_block_size(file, block_size)); + +diff -urN rsync-2.5.4/options.c rsync-2.5.4-fuzzy/options.c +--- rsync-2.5.4/options.c Thu Feb 28 09:49:57 2002 ++++ rsync-2.5.4-fuzzy/options.c Wed Apr 3 16:43:54 2002 +@@ -73,6 +73,7 @@ + #else + int modify_window=0; + #endif ++int fuzzy=0; + int blocking_io=-1; + + /** Network address family. **/ +@@ -245,6 +246,7 @@ + rprintf(F," --bwlimit=KBPS limit I/O bandwidth, KBytes per second\n"); + rprintf(F," --write-batch=PREFIX write batch fileset starting with PREFIX\n"); + rprintf(F," --read-batch=PREFIX read batch fileset starting with PREFIX\n"); ++ rprintf(F," --fuzzy use similar file as basis if it does't exist\n"); + rprintf(F," -h, --help show this help screen\n"); + #ifdef INET6 + rprintf(F," -4 prefer IPv4\n"); +@@ -340,6 +342,7 @@ + {"hard-links", 'H', POPT_ARG_NONE, &preserve_hard_links}, + {"read-batch", 0, POPT_ARG_STRING, &batch_prefix, OPT_READ_BATCH}, + {"write-batch", 0, POPT_ARG_STRING, &batch_prefix, OPT_WRITE_BATCH}, ++ {"fuzzy", 0, POPT_ARG_NONE, &fuzzy}, + #ifdef INET6 + {0, '4', POPT_ARG_VAL, &default_af_hint, AF_INET }, + {0, '6', POPT_ARG_VAL, &default_af_hint, AF_INET6 }, +@@ -757,7 +760,9 @@ + args[ac++] = "--compare-dest"; + args[ac++] = compare_dest; + } +- ++ ++ if (fuzzy && am_sender) ++ args[ac++] = "--fuzzy"; + + *argc = ac; + } +diff -urN rsync-2.5.4/proto.h rsync-2.5.4-fuzzy/proto.h +--- rsync-2.5.4/proto.h Sat Feb 23 11:05:06 2002 ++++ rsync-2.5.4-fuzzy/proto.h Wed Apr 3 16:35:25 2002 +@@ -256,3 +256,6 @@ + int cmp_modtime(time_t file1, time_t file2); + int _Insure_trap_error(int a1, int a2, int a3, int a4, int a5, int a6); + int sys_gettimeofday(struct timeval *tv); ++int do_open_regular(char *fname); ++int open_alternate_base_fuzzy(const char *fname); ++int open_alternate_base_comparedir(const char *fname); +diff -urN rsync-2.5.4/receiver.c rsync-2.5.4-fuzzy/receiver.c +--- rsync-2.5.4/receiver.c Thu Feb 14 05:42:20 2002 ++++ rsync-2.5.4-fuzzy/receiver.c Wed Apr 3 16:46:46 2002 +@@ -36,6 +36,7 @@ + extern char *compare_dest; + extern int make_backups; + extern char *backup_suffix; ++extern int fuzzy; + + static struct delete_list { + DEV64_T dev; +@@ -307,8 +308,6 @@ + char *fname; + char template[MAXPATHLEN]; + char fnametmp[MAXPATHLEN]; +- char *fnamecmp; +- char fnamecmpbuf[MAXPATHLEN]; + struct map_struct *buf; + int i; + struct file_struct *file; +@@ -366,28 +365,24 @@ + if (verbose > 2) + rprintf(FINFO,"recv_files(%s)\n",fname); + +- fnamecmp = fname; +- + /* open the file */ +- fd1 = do_open(fnamecmp, O_RDONLY, 0); ++ fd1 = do_open(fname, O_RDONLY, 0); + +- if ((fd1 == -1) && (compare_dest != NULL)) { +- /* try the file at compare_dest instead */ +- snprintf(fnamecmpbuf,MAXPATHLEN,"%s/%s", +- compare_dest,fname); +- fnamecmp = fnamecmpbuf; +- fd1 = do_open(fnamecmp, O_RDONLY, 0); +- } ++ if (fd1 == -1 && compare_dest != NULL) ++ fd1 = open_alternate_base_comparedir(fname); ++ ++ if (fd1 == -1 && fuzzy) ++ fd1 = open_alternate_base_fuzzy(fname); + + if (fd1 != -1 && do_fstat(fd1,&st) != 0) { +- rprintf(FERROR,"fstat %s : %s\n",fnamecmp,strerror(errno)); ++ rprintf(FERROR,"fstat %s : %s\n",fname,strerror(errno)); + receive_data(f_in,NULL,-1,NULL,file->length); + close(fd1); + continue; + } + + if (fd1 != -1 && !S_ISREG(st.st_mode)) { +- rprintf(FERROR,"%s : not a regular file (recv_files)\n",fnamecmp); ++ rprintf(FERROR,"%s : not a regular file (recv_files)\n",fname); + receive_data(f_in,NULL,-1,NULL,file->length); + close(fd1); + continue; +@@ -403,7 +398,7 @@ + if (fd1 != -1 && st.st_size > 0) { + buf = map_file(fd1,st.st_size); + if (verbose > 2) +- rprintf(FINFO,"recv mapped %s of size %.0f\n",fnamecmp,(double)st.st_size); ++ rprintf(FINFO,"recv mapped %s of size %.0f\n",fname,(double)st.st_size); + } else { + buf = NULL; + } +