-The changes to generator.c were greatly simplified, making the patch
-easier to maintain and fixing the failing test in the testsuite.
-Lightly tested.
+This latest version has most of the TODO-list items solved. The one
+remaining issue is that we really need to handle all the files in a dir
+before we move on to the sub-directories, so this patch needs the sorting
+algorithm to change to put all the subdirs at the end of the list of a
+dir's contents.
Be sure to run "make proto" before "make".
+--- orig/flist.c 2005-02-12 19:54:27
++++ flist.c 2005-02-13 09:49:22
+@@ -330,7 +330,7 @@ void send_file_entry(struct file_struct
+ char fname[MAXPATHLEN];
+ int l1, l2;
+
+- if (f == -1)
++ if (f < 0)
+ return;
+
+ if (!file) {
+@@ -975,7 +975,8 @@ void send_file_name(int f, struct file_l
+ struct file_struct *file;
+ char fbuf[MAXPATHLEN];
+
+- if (!(file = make_file(fname, flist, ALL_FILTERS)))
++ file = make_file(fname, flist, f == -2 ? SERVER_FILTERS : ALL_FILTERS);
++ if (!file)
+ return;
+
+ maybe_emit_filelist_progress(flist);
+@@ -1311,7 +1312,7 @@ struct file_list *recv_file_list(int f)
+
+ clean_flist(flist, relative_paths, 1);
+
+- if (f != -1) {
++ if (f >= 0) {
+ /* Now send the uid/gid list. This was introduced in
+ * protocol version 15 */
+ recv_uid_list(f, flist);
+@@ -1650,6 +1651,25 @@ static int is_backup_file(char *fn)
+ return k > 0 && strcmp(fn+k, backup_suffix) == 0;
+ }
+
++struct file_list *get_dirlist(const char *dirname, int ignore_excludes)
++{
++ struct file_list *dirlist;
++ char dirbuf[MAXPATHLEN];
++ int dlen;
++ int save_recurse = recurse;
++
++ dlen = strlcpy(dirbuf, dirname, MAXPATHLEN);
++ if (dlen >= MAXPATHLEN)
++ return NULL;
++
++ dirlist = flist_new(WITHOUT_HLINK, "get_dirlist");
++ recurse = 0;
++ send_directory(ignore_excludes ? -2 : -1, dirlist, dirbuf, dlen);
++ recurse = save_recurse;
++
++ return dirlist;
++}
++
+
+ /* This function is used to implement per-directory deletion, and
+ * is used by all the --delete-WHEN options. Note that the fbuf
--- orig/generator.c 2005-02-13 05:50:28
-+++ generator.c 2005-02-03 02:11:10
++++ generator.c 2005-02-13 10:01:48
@@ -47,6 +47,7 @@ extern int size_only;
extern OFF_T max_size;
extern int io_timeout;
extern int always_checksum;
extern char *partial_dir;
extern char *basis_dir[];
-@@ -227,6 +228,88 @@ static void generate_and_send_sums(int f
+@@ -227,6 +228,47 @@ static void generate_and_send_sums(int f
unmap_file(mapbuf);
}
-+/* Try to find a filename in the same dir as "fname" with a similar name.
-+ *
-+ * TODO:
-+ * - We should be using a cache of names for the current dir, not
-+ * re-reading the destination directory for every file.
-+ * - We must not return an rsync tempfile from the current transfer.
-+ * - If the highest-rated name is not a normal file, we should fall-
-+ * back to the next highest-rated file.
-+ * - We must not return a destination file that is being updated
-+ * during the current transfer, even if we already processed it
-+ * (since the receiver may not be done with it yet).
-+ * - We must weed out any names that a daemon's config has excluded.
-+ */
-+static int find_fuzzy(const char *fname, char *buf, STRUCT_STAT *st_ptr)
++/* Try to find a filename in the same dir as "fname" with a similar name. */
++static int find_fuzzy(struct file_struct *file, struct file_list *dirlist)
+{
-+ DIR *d;
-+ struct dirent *di;
-+ char *basename, *dirname, *slash;
-+ char bestname[MAXPATHLEN];
-+ int suf_len, basename_len;
++ int fname_len, fname_suf_len;
++ const char *fname_suf, *fname = file->basename;
+ uint32 lowest_dist = 0x7FFFFFFF;
-+ const char *suf;
-+
-+ strlcpy(buf, fname, MAXPATHLEN);
-+ if ((slash = strrchr(buf, '/')) != NULL) {
-+ dirname = buf;
-+ *slash = '\0';
-+ basename = slash + 1;
-+ } else {
-+ basename = buf;
-+ dirname = ".";
-+ }
-+ basename_len = strlen(basename);
-+
-+ if (!(d = opendir(dirname))) {
-+ rsyserr(FERROR, errno, "recv_generator opendir(%s)", dirname);
-+ return -1;
-+ }
-+ if (slash)
-+ *slash = '/';
++ int j, lowest_j = -1;
+
-+ suf = find_filename_suffix(basename, basename_len, &suf_len);
++ fname_len = strlen(fname);
++ fname_suf = find_filename_suffix(fname, fname_len, &fname_suf_len);
+
-+ bestname[0] = '\0';
-+ while ((di = readdir(d)) != NULL) {
-+ const char *dname_suf, *dname = d_name(di);
++ for (j = 0; j < dirlist->count; j++) {
++ struct file_struct *fp = dirlist->files[j];
++ const char *suf, *name;
++ int len, suf_len;
+ uint32 dist;
-+ int dname_len, dname_suf_len;
+
-+ if (dname[0] == '.' && (dname[1] == '\0'
-+ || (dname[1] == '.' && dname[2] == '\0')))
++ if (!S_ISREG(fp->mode) || !fp->length)
+ continue;
+
-+ dname_len = strlen(dname);
-+ dname_suf = find_filename_suffix(dname, dname_len, &dname_suf_len);
++ name = fp->basename;
++ len = strlen(name);
++ suf = find_filename_suffix(name, len, &suf_len);
+
-+ dist = fuzzy_distance(dname, dname_len, basename, basename_len);
++ dist = fuzzy_distance(name, len, fname, fname_len);
+ /* Add some extra weight to how well the suffixes match. */
-+ dist += fuzzy_distance(dname_suf, dname_suf_len, suf, suf_len) * 10;
++ dist += fuzzy_distance(suf, suf_len, fname_suf, fname_suf_len)
++ * 10;
+ if (verbose > 4) {
+ rprintf(FINFO, "fuzzy distance for %s = %d (%d)\n",
-+ dname, (int)(dist>>16), (int)(dist&0xFFFF));
++ name, (int)(dist>>16), (int)(dist&0xFFFF));
+ }
+ if (dist <= lowest_dist) {
-+ strlcpy(bestname, dname, sizeof bestname);
+ lowest_dist = dist;
++ lowest_j = j;
+ }
+ }
-+ closedir(d);
+
-+ /* Found a candidate. */
-+ if (bestname[0] != '\0') {
-+ strlcpy(basename, bestname, MAXPATHLEN - (basename - buf));
-+ if (verbose > 2) {
-+ rprintf(FINFO, "fuzzy match %s->%s\n",
-+ safe_fname(fname), buf);
-+ }
-+ return link_stat(buf, st_ptr, 0);
-+ }
-+ return -1;
++ return lowest_j;
+}
+
/* Acts on flist->file's ndx'th item, whose name is fname. If a directory,
* make sure it exists, and has the right permissions/timestamp info. For
-@@ -492,6 +575,15 @@ static void recv_generator(char *fname,
+@@ -241,6 +283,8 @@ static void recv_generator(char *fname,
+ int f_out, int f_out_name)
+ {
+ static int missing_below = -1;
++ static char *fuzzy_dirname = NULL;
++ static struct file_list *fuzzy_dirlist = NULL;
+ int fd = -1, f_copy = -1;
+ STRUCT_STAT st, partial_st;
+ struct file_struct *back_file = NULL;
+@@ -275,6 +319,16 @@ static void recv_generator(char *fname,
+ statret = -1;
+ stat_errno = ENOENT;
+ } else {
++ if (fuzzy_basis && S_ISREG(file->mode)) {
++ char *dn = file->dirname ? file->dirname : ".";
++ if (fuzzy_dirname != dn) {
++ if (fuzzy_dirlist)
++ flist_free(fuzzy_dirlist);
++ fuzzy_dirname = dn;
++ fuzzy_dirlist = get_dirlist(fuzzy_dirname, 1);
++ }
++ }
++
+ statret = link_stat(fname, &st,
+ keep_dirlinks && S_ISDIR(file->mode));
+ stat_errno = errno;
+@@ -492,6 +546,24 @@ static void recv_generator(char *fname,
} else
partialptr = NULL;
-+ if (statret == -1 && fuzzy_basis) {
-+ if (find_fuzzy(fname, fnamecmpbuf, &st) == 0
-+ && S_ISREG(st.st_mode)) {
++ if (statret == -1 && fuzzy_basis && dry_run <= 1) {
++ int j = find_fuzzy(file, fuzzy_dirlist);
++ if (j >= 0) {
++ struct file_struct *fp = fuzzy_dirlist->files[j];
++ f_name_to(fp, fnamecmpbuf);
++ if (verbose > 2) {
++ rprintf(FINFO, "fuzzy match for %s: %s\n",
++ safe_fname(fname), safe_fname(fnamecmpbuf));
++ }
++ st.st_mode = fp->mode;
++ st.st_size = fp->length;
++ st.st_mtime = fp->modtime;
+ statret = 0;
+ fnamecmp = fnamecmpbuf;
+ fnamecmp_type = FNAMECMP_FUZZY;
if (statret == -1) {
if (preserve_hard_links && hard_link_check(file, HL_SKIP))
return;
-@@ -520,6 +612,8 @@ static void recv_generator(char *fname,
+@@ -520,6 +592,8 @@ static void recv_generator(char *fname,
if (!compare_dest && fnamecmp_type <= FNAMECMP_BASIS_DIR_HIGH)
;
else if (unchanged_file(fnamecmp, file, &st)) {
if (fnamecmp_type == FNAMECMP_FNAME)
set_perms(fname, file, &st, PERMS_REPORT);
-@@ -594,8 +688,24 @@ notify_others:
+@@ -540,6 +614,11 @@ prepare_to_open:
+ statret = -1;
+ goto notify_others;
+ }
++ if (fuzzy_basis && fnamecmp_type == FNAMECMP_FNAME) {
++ int j = flist_find(fuzzy_dirlist, file);
++ if (j >= 0) /* don't use an updating file as fuzzy basis */
++ fuzzy_dirlist->files[j]->length = 0;
++ }
+
+ /* open the file */
+ fd = do_open(fnamecmp, O_RDONLY, 0);
+@@ -594,8 +673,24 @@ notify_others:
write_int(f_out, ndx);
if (protocol_version >= 29 && inplace && !read_batch)
write_byte(f_out, fnamecmp_type);
the destination machine as an additional hierarchy to compare destination
files against doing transfers (if the files are missing in the destination
--- orig/util.c 2005-02-11 10:53:15
-+++ util.c 2005-01-19 17:30:51
-@@ -1224,3 +1224,108 @@ void *_realloc_array(void *ptr, unsigned
++++ util.c 2005-02-13 09:44:25
+@@ -1224,3 +1224,110 @@ void *_realloc_array(void *ptr, unsigned
return malloc(size * num);
return realloc(ptr, size * num);
}
+ *len_ptr = 0;
+
+ /* Find the last significant suffix. */
-+ for (s = fn + fn_len - 1; fn_len > 1; ) {
-+ while (*s != '.' && s != fn) s--;
++ for (s = fn + fn_len; fn_len > 1; ) {
++ while (*--s != '.' && s != fn) {}
+ if (s == fn)
+ break;
+ s_len = fn_len - (s - fn);
+ continue;
+ *len_ptr = s_len;
+ suf = s;
++ if (s_len == 1)
++ break;
+ /* Determine if the suffix is all digits. */
+ for (s++, s_len--; s_len > 0; s++, s_len--) {
+ if (!isdigit(*s))
+ return suf;
+ }
+ /* An all-digit suffix may not be that signficant. */
-+ continue;
++ s = suf;
+ }
+
+ return suf;