Updated for current CVS version by Wayne Davison. Passes *MOST* of the test suite, but otherwise UNTESTED. --- Makefile.in 15 May 2004 00:48:11 -0000 1.101 +++ Makefile.in 29 Jun 2004 15:14:48 -0000 @@ -32,7 +32,7 @@ ZLIBOBJ=zlib/deflate.o zlib/infblock.o z zlib/inflate.o zlib/inftrees.o zlib/infutil.o zlib/trees.o \ zlib/zutil.o zlib/adler32.o OBJS1=rsync.o generator.o receiver.o cleanup.o sender.o exclude.o util.o \ - main.o checksum.o match.o syscall.o log.o backup.o + main.o checksum.o match.o syscall.o log.o backup.o alternate.o OBJS2=options.o flist.o io.o compat.o hlink.o token.o uidlist.o socket.o \ fileio.o batch.o clientname.o OBJS3=progress.o pipe.o --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ alternate.c 29 Jun 2004 15:14:48 -0000 @@ -0,0 +1,114 @@ +#include "rsync.h" + +extern char *compare_dest; +extern int verbose; + +/* Alternate methods for opening files, if local doesn't exist */ +/* Sanity check that we are about to open regular file */ +int do_open_regular(char *fname) +{ + STRUCT_STAT st; + + if (do_stat(fname, &st) == 0 && S_ISREG(st.st_mode)) + return do_open(fname, O_RDONLY, 0); + + return -1; +} + +static void split_names(char *fname, char **dirname, char **basename) +{ + char *slash = strrchr(fname, '/'); + if (slash) { + *dirname = fname; + *slash = '\0'; + *basename = slash+1; + } else { + *basename = fname; + *dirname = "."; + } +} + +static unsigned int measure_name(const char *name, const char *basename, + const char *ext) +{ + int namelen = strlen(name); + int extlen = strlen(ext); + unsigned int score = 0; + + /* Extensions must match */ + if (namelen <= extlen || strcmp(name + namelen - extlen, ext) != 0) + return 0; + + /* Now score depends on similarity of prefix */ + for (; *name == *basename && *name; name++, basename++) + score++; + return score; +} + +int open_alternate_base_fuzzy(const char *fname) +{ + DIR *d; + struct dirent *di; + char *basename, *dirname; + char mangled_name[MAXPATHLEN]; + char bestname[MAXPATHLEN]; + unsigned int bestscore = 0; + const char *ext; + + strlcpy(mangled_name, fname, sizeof mangled_name); + + split_names(mangled_name, &dirname, &basename); + if (!(d = opendir(dirname))) { + rsyserr(FERROR, errno, "recv_generator opendir(%s)", dirname); + return -1; + } + + /* Get final extension, eg. .gz; never full basename though. */ + ext = strrchr(basename + 1, '.'); + if (!ext) + ext = basename + strlen(basename); /* ext = "" */ + + while ((di = readdir(d)) != NULL) { + const char *dname = d_name(di); + unsigned int score; + + if (dname[0] == '.' && (dname[1] == '\0' + || (dname[1] == '.' && dname[2] == '\0'))) + continue; + + score = measure_name(dname, basename, ext); + if (verbose > 4) { + rprintf(FINFO,"fuzzy score for %s = %u\n", + dname, score); + } + if (score > bestscore) { + strcpy(bestname, dname); + bestscore = score; + } + } + closedir(d); + + /* Found a candidate. */ + if (bestscore != 0) { + char fuzzyname[MAXPATHLEN]; + + pathjoin(fuzzyname,sizeof fuzzyname, dirname, bestname); + if (verbose > 2) { + rprintf(FINFO, "fuzzy match %s->%s\n", + fname, fuzzyname); + } + return do_open_regular(fuzzyname); + } + return -1; +} + +int open_alternate_base_comparedir(const char *fname) +{ + char fnamebuf[MAXPATHLEN]; + + /* try the file at compare_dest instead */ + pathjoin(fnamebuf, sizeof fnamebuf, compare_dest, fname); + + /* FIXME: now follows symlinks... */ + return do_open_regular(fnamebuf); +} --- generator.c 23 Jun 2004 21:21:19 -0000 1.90 +++ generator.c 29 Jun 2004 15:14:48 -0000 @@ -41,6 +41,7 @@ extern int ignore_times; extern int size_only; extern int io_timeout; extern int protocol_version; +extern int fuzzy; extern int always_checksum; extern char *compare_dest; extern int link_dest; @@ -256,7 +257,61 @@ static void generate_and_send_sums(struc } } +/* Returns -1 for can't open (null file), -2 for skip */ +static int open_base_file(struct file_struct *file, char *fname, int statret, + STRUCT_STAT *st) +{ + int fd = -1; + if (statret == 0) { + if (S_ISREG(st->st_mode)) { + if (update_only + && cmp_modtime(st->st_mtime, file->modtime) > 0) { + if (verbose > 1) + rprintf(FINFO, "%s is newer\n", fname); + return -2; + } + if (skip_file(fname, file, st)) { + set_perms(fname, file, st, PERMS_REPORT); + return -2; + } + fd = do_open(fname, O_RDONLY, 0); + if (fd == -1) { + rsyserr(FERROR, errno, "failed to open %s, continuing", + full_fname(fname)); + return -1; + } + return fd; + } else { + /* Try to use symlink contents */ + if (S_ISLNK(st->st_mode)) { + fd = do_open_regular(fname); + /* Don't delete yet; receiver will need it */ + } else { + if (delete_file(fname) != 0) { + if (fd != -1) + close(fd); + return -2; + } + } + } + } + + if (fd == -1 && compare_dest != NULL) + fd = open_alternate_base_comparedir(fname); + + if (fd == -1 && fuzzy) + fd = open_alternate_base_fuzzy(fname); + + /* Update stat to understand size */ + if (fd != -1) { + if (do_fstat(fd, st) != 0) { + rsyserr(FERROR, errno, "fstat %s", full_fname(fname)); + } + } + + return fd; +} /** * Acts on file number @p i from @p flist, whose name is @p fname. @@ -272,8 +327,6 @@ void recv_generator(char *fname, struct STRUCT_STAT st; struct map_struct *mapbuf; int statret; - char *fnamecmp; - char fnamecmpbuf[MAXPATHLEN]; if (list_only) return; @@ -413,109 +466,39 @@ void recv_generator(char *fname, struct } #endif - if (preserve_hard_links && hard_link_check(file, HL_CHECK_MASTER)) - return; - - if (!S_ISREG(file->mode)) { - rprintf(FINFO, "skipping non-regular file \"%s\"\n",fname); - return; - } - - fnamecmp = fname; - - if (statret == -1 && compare_dest != NULL) { - /* try the file at compare_dest instead */ - int saveerrno = errno; - pathjoin(fnamecmpbuf, sizeof fnamecmpbuf, compare_dest, fname); - statret = link_stat(fnamecmpbuf, &st, 0); - if (!S_ISREG(st.st_mode)) - statret = -1; - if (statret == -1) - errno = saveerrno; -#if HAVE_LINK - else if (link_dest && !dry_run) { - if (do_link(fnamecmpbuf, fname) != 0) { - if (verbose > 0) { - rsyserr(FINFO, errno, "link %s => %s", - fnamecmpbuf, fname); - } - } - fnamecmp = fnamecmpbuf; - } -#endif - else - fnamecmp = fnamecmpbuf; - } - - if (statret == -1) { - if (preserve_hard_links && hard_link_check(file, HL_SKIP)) - return; - if (errno == ENOENT) { - write_int(f_out,i); - if (!dry_run) - write_sum_head(f_out, NULL); - } else if (verbose > 1) { + /* Failed to stat for some reason besides "not found". */ + if (statret == -1 && errno != ENOENT) { + if (verbose > 1) { rsyserr(FERROR, errno, - "recv_generator: failed to open %s", + "recv_generator failed to stat %s", full_fname(fname)); } return; } - if (!S_ISREG(st.st_mode)) { - if (delete_file(fname) != 0) { - return; - } + if ((fd = open_base_file(file, fname, statret, &st)) == -2) + return; - /* now pretend the file didn't exist */ + if ((disable_deltas_p() || dry_run) && fd != -1) { + close(fd); + fd = -1; + } + + if (fd == -1) { + /* the file didn't exist, or we can pretend it doesn't */ if (preserve_hard_links && hard_link_check(file, HL_SKIP)) return; - write_int(f_out,i); + write_int(f_out, i); if (!dry_run) write_sum_head(f_out, NULL); - return; - } - - if (opt_ignore_existing && fnamecmp == fname) { - if (verbose > 1) - rprintf(FINFO,"%s exists\n",fname); - return; - } + return; + } - if (update_only && cmp_modtime(st.st_mtime,file->modtime)>0 && fnamecmp == fname) { - if (verbose > 1) - rprintf(FINFO,"%s is newer\n",fname); - return; - } - - if (skip_file(fname, file, &st)) { - if (fnamecmp == fname) - set_perms(fname, file, &st, PERMS_REPORT); - return; - } - - if (dry_run) { - write_int(f_out,i); - return; - } - - if (disable_deltas_p()) { - write_int(f_out,i); - write_sum_head(f_out, NULL); + if (preserve_hard_links && hard_link_check(file, HL_CHECK_MASTER)) return; - } - /* open the file */ - fd = do_open(fnamecmp, O_RDONLY, 0); - - if (fd == -1) { - rsyserr(FERROR, errno, "failed to open %s, continuing", - full_fname(fnamecmp)); - /* pretend the file didn't exist */ - if (preserve_hard_links && hard_link_check(file, HL_SKIP)) - return; - write_int(f_out,i); - write_sum_head(f_out, NULL); + if (!S_ISREG(file->mode)) { + rprintf(FINFO, "skipping non-regular file \"%s\"\n",fname); return; } @@ -525,7 +508,7 @@ void recv_generator(char *fname, struct mapbuf = NULL; if (verbose > 3) { - rprintf(FINFO,"gen mapped %s of size %.0f\n", fnamecmp, + rprintf(FINFO, "gen mapped %s of size %.0f\n", fname, (double)st.st_size); } --- options.c 20 Jun 2004 19:47:05 -0000 1.157 +++ options.c 29 Jun 2004 15:14:48 -0000 @@ -94,6 +94,7 @@ int ignore_errors = 0; int modify_window = 0; int blocking_io = -1; int checksum_seed = 0; +int fuzzy = 0; unsigned int block_size = 0; @@ -270,6 +271,7 @@ void usage(enum logcode F) rprintf(F," -T --temp-dir=DIR create temporary files in directory DIR\n"); rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n"); rprintf(F," --link-dest=DIR create hardlinks to DIR for unchanged files\n"); + rprintf(F," --fuzzy use similar file as basis if basis doesn't exist\n"); rprintf(F," -P equivalent to --partial --progress\n"); rprintf(F," -z, --compress compress file data\n"); rprintf(F," -C, --cvs-exclude auto ignore files in the same way CVS does\n"); @@ -368,6 +370,7 @@ static struct poptOption long_options[] {"temp-dir", 'T', POPT_ARG_STRING, &tmpdir, 0, 0, 0 }, {"compare-dest", 0, POPT_ARG_STRING, &compare_dest, 0, 0, 0 }, {"link-dest", 0, POPT_ARG_STRING, &compare_dest, OPT_LINK_DEST, 0, 0 }, + {"fuzzy", 0, POPT_ARG_NONE, &fuzzy, 0, 0, 0 }, /* TODO: Should this take an optional int giving the compression level? */ {"compress", 'z', POPT_ARG_NONE, &do_compression, 0, 0, 0 }, {"daemon", 0, POPT_ARG_NONE, &daemon_opt, 0, 0, 0 }, @@ -989,6 +992,9 @@ void server_options(char **args,int *arg } } + if (fuzzy && am_sender) + args[ac++] = "--fuzzy"; + *argc = ac; return; --- receiver.c 29 Jun 2004 15:12:01 -0000 1.83 +++ receiver.c 29 Jun 2004 15:14:48 -0000 @@ -48,6 +48,7 @@ extern int ignore_errors; extern int orig_umask; extern int keep_partial; extern int checksum_seed; +extern int fuzzy; static void delete_one(char *fn, int is_dir) { @@ -300,8 +301,6 @@ int recv_files(int f_in,struct file_list char *fname, fbuf[MAXPATHLEN]; char template[MAXPATHLEN]; char fnametmp[MAXPATHLEN]; - char *fnamecmp; - char fnamecmpbuf[MAXPATHLEN]; struct map_struct *mapbuf; struct file_struct *file; struct stats initial_stats; @@ -364,35 +363,31 @@ int recv_files(int f_in,struct file_list if (verbose > 2) rprintf(FINFO,"recv_files(%s)\n",fname); - fnamecmp = fname; - /* open the file */ - fd1 = do_open(fnamecmp, O_RDONLY, 0); + fd1 = do_open(fname, O_RDONLY, 0); - if (fd1 == -1 && compare_dest != NULL) { - /* try the file at compare_dest instead */ - pathjoin(fnamecmpbuf, sizeof fnamecmpbuf, - compare_dest, fname); - fnamecmp = fnamecmpbuf; - fd1 = do_open(fnamecmp, O_RDONLY, 0); - } + if (fd1 == -1 && compare_dest != NULL) + fd1 = open_alternate_base_comparedir(fname); + + if (fd1 == -1 && fuzzy) + fd1 = open_alternate_base_fuzzy(fname); if (fd1 != -1 && do_fstat(fd1,&st) != 0) { rsyserr(FERROR, errno, "fstat %s failed", - full_fname(fnamecmp)); + full_fname(fname)); receive_data(f_in,NULL,-1,NULL,file->length); close(fd1); continue; } - if (fd1 != -1 && S_ISDIR(st.st_mode) && fnamecmp == fname) { + if (fd1 != -1 && S_ISDIR(st.st_mode)) { /* this special handling for directories * wouldn't be necessary if robust_rename() * and the underlying robust_unlink could cope * with directories */ rprintf(FERROR,"recv_files: %s is a directory\n", - full_fname(fnamecmp)); + full_fname(fname)); receive_data(f_in, NULL, -1, NULL, file->length); close(fd1); continue; @@ -415,7 +410,7 @@ int recv_files(int f_in,struct file_list mapbuf = map_file(fd1,st.st_size); if (verbose > 2) { rprintf(FINFO, "recv mapped %s of size %.0f\n", - fnamecmp, (double)st.st_size); + fname, (double)st.st_size); } } else mapbuf = NULL; --- rsync.yo 5 Jun 2004 16:16:30 -0000 1.171 +++ rsync.yo 29 Jun 2004 15:14:49 -0000 @@ -325,6 +325,7 @@ verb( -T --temp-dir=DIR create temporary files in directory DIR --compare-dest=DIR also compare received files relative to DIR --link-dest=DIR create hardlinks to DIR for unchanged files + --fuzzy use similar file as basis if basis is gone -P equivalent to --partial --progress -z, --compress compress file data -C, --cvs-exclude auto ignore files in the same way CVS does