The '%n' escape needs to append a trailing slash onto a directory name.
[rsync/rsync.git] / generator.c
index bf50791..188f51f 100644 (file)
@@ -47,6 +47,7 @@ extern int size_only;
 extern OFF_T max_size;
 extern int io_timeout;
 extern int protocol_version;
+extern int fuzzy_basis;
 extern int always_checksum;
 extern char *partial_dir;
 extern char *basis_dir[];
@@ -61,7 +62,7 @@ extern int orig_umask;
 extern int safe_symlinks;
 extern long block_size; /* "long" because popt can't set an int32. */
 
-extern struct exclude_list_struct server_exclude_list;
+extern struct filter_list_struct server_filter_list;
 
 static int unchanged_attrs(struct file_struct *file, STRUCT_STAT *st)
 {
@@ -78,6 +79,45 @@ static int unchanged_attrs(struct file_struct *file, STRUCT_STAT *st)
        return 1;
 }
 
+
+#define SID_UPDATING             ITEM_UPDATING
+#define SID_REPORT_CHECKSUM      ITEM_REPORT_CHECKSUM
+#define SID_NO_DEST_AND_NO_UPDATE (1<<16)
+
+static void itemize(struct file_struct *file, int statret, STRUCT_STAT *st,
+                   int32 sflags, int f_out, int ndx)
+{
+       int iflags = sflags & (SID_UPDATING | SID_REPORT_CHECKSUM);
+
+       if (statret >= 0) {
+               if (S_ISREG(file->mode) && file->length != st->st_size)
+                       iflags |= ITEM_REPORT_SIZE;
+       } else
+               iflags |= ITEM_IS_NEW;
+       if (statret >= 0 && !(sflags & SID_NO_DEST_AND_NO_UPDATE)) {
+               int keep_time = !preserve_times ? 0
+                   : S_ISDIR(file->mode) ? !omit_dir_times : !S_ISLNK(file->mode);
+
+               if ((iflags & ITEM_UPDATING && !keep_time)
+                   || (keep_time && file->modtime != st->st_mtime))
+                       iflags |= ITEM_REPORT_TIME;
+               if (preserve_perms && file->mode != st->st_mode)
+                       iflags |= ITEM_REPORT_PERMS;
+               if (preserve_uid && am_root && file->uid != st->st_uid)
+                       iflags |= ITEM_REPORT_OWNER;
+               if (preserve_gid && file->gid != GID_NONE && st->st_gid != file->gid)
+                       iflags |= ITEM_REPORT_GROUP;
+       }
+
+       if (iflags && !read_batch) {
+               if (ndx >= 0)
+                       write_int(f_out, ndx);
+               write_byte(f_out, iflags);
+               write_byte(f_out, iflags >> 8);
+       }
+}
+
+
 /* Perform our quick-check heuristic for determining if a file is unchanged. */
 static int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st)
 {
@@ -186,7 +226,7 @@ static void sum_sizes_sqroot(struct sum_struct *sum, int64 len)
  */
 static void generate_and_send_sums(int fd, OFF_T len, int f_out, int f_copy)
 {
-       size_t i;
+       int32 i;
        struct map_struct *mapbuf;
        struct sum_struct sum;
        OFF_T offset = 0;
@@ -228,18 +268,76 @@ static void generate_and_send_sums(int fd, OFF_T len, int f_out, int f_copy)
 }
 
 
-/*
- * Acts on file number @p i from @p flist, whose name is @p fname.
- *
- * First fixes up permissions, then generates checksums for the file.
+/* Try to find a filename in the same dir as "fname" with a similar name. */
+static int find_fuzzy(struct file_struct *file, struct file_list *dirlist)
+{
+       int fname_len, fname_suf_len;
+       const char *fname_suf, *fname = file->basename;
+       uint32 lowest_dist = 0x7FFFFFFF;
+       int j, lowest_j = -1;
+
+       fname_len = strlen(fname);
+       fname_suf = find_filename_suffix(fname, fname_len, &fname_suf_len);
+
+       for (j = 0; j < dirlist->count; j++) {
+               struct file_struct *fp = dirlist->files[j];
+               const char *suf, *name;
+               int len, suf_len;
+               uint32 dist;
+
+               if (!S_ISREG(fp->mode) || !fp->length
+                   || fp->flags & FLAG_NO_FUZZY)
+                       continue;
+
+               name = fp->basename;
+
+               if (fp->length == file->length
+                   && fp->modtime == file->modtime) {
+                       if (verbose > 4) {
+                               rprintf(FINFO,
+                                       "fuzzy size/modtime match for %s\n",
+                                       name);
+                       }
+                       return j;
+               }
+
+               len = strlen(name);
+               suf = find_filename_suffix(name, len, &suf_len);
+
+               dist = fuzzy_distance(name, len, fname, fname_len);
+               /* Add some extra weight to how well the suffixes match. */
+               dist += fuzzy_distance(suf, suf_len, fname_suf, fname_suf_len)
+                     * 10;
+               if (verbose > 4) {
+                       rprintf(FINFO, "fuzzy distance for %s = %d.%05d\n",
+                               name, (int)(dist>>16), (int)(dist&0xFFFF));
+               }
+               if (dist <= lowest_dist) {
+                       lowest_dist = dist;
+                       lowest_j = j;
+               }
+       }
+
+       return lowest_j;
+}
+
+
+/* Acts on flist->file's ndx'th item, whose name is fname.  If a directory,
+ * make sure it exists, and has the right permissions/timestamp info.  For
+ * all other non-regular files (symlinks, etc.) we create them here.  For
+ * regular files that have changed, we try to find a basis file and then
+ * start sending checksums.
  *
- * @note This comment was added later by mbp who was trying to work it
- * out.  It might be wrong.
- */
+ * Note that f_out is set to -1 when doing final directory-permission and
+ * modification-time repair. */
 static void recv_generator(char *fname, struct file_list *flist,
-                          struct file_struct *file, int i,
+                          struct file_struct *file, int ndx,
                           int f_out, int f_out_name)
 {
+       static int missing_below = -1;
+       static char *fuzzy_dirname = NULL;
+       static struct file_list *fuzzy_dirlist = NULL;
+       struct file_struct *fuzzy_file = NULL;
        int fd = -1, f_copy = -1;
        STRUCT_STAT st, partial_st;
        struct file_struct *back_file = NULL;
@@ -251,12 +349,27 @@ static void recv_generator(char *fname, struct file_list *flist,
        if (list_only)
                return;
 
-       if (verbose > 2)
-               rprintf(FINFO, "recv_generator(%s,%d)\n", safe_fname(fname), i);
+       if (!fname) {
+               if (fuzzy_dirlist) {
+                       flist_free(fuzzy_dirlist);
+                       fuzzy_dirlist = NULL;
+                       fuzzy_dirname = NULL;
+               }
+               if (missing_below >= 0) {
+                       dry_run--;
+                       missing_below = -1;
+               }
+               return;
+       }
+
+       if (verbose > 2) {
+               rprintf(FINFO, "recv_generator(%s,%d)\n",
+                       safe_fname(fname), ndx);
+       }
 
-       if (server_exclude_list.head
-           && check_exclude(&server_exclude_list, fname,
-                            S_ISDIR(file->mode)) < 0) {
+       if (server_filter_list.head
+           && check_filter(&server_filter_list, fname,
+                           S_ISDIR(file->mode)) < 0) {
                if (verbose) {
                        rprintf(FINFO, "skipping server-excluded file \"%s\"\n",
                                safe_fname(fname));
@@ -264,10 +377,26 @@ static void recv_generator(char *fname, struct file_list *flist,
                return;
        }
 
+       if (missing_below >= 0 && file->dir.depth <= missing_below) {
+               dry_run--;
+               missing_below = -1;
+       }
        if (dry_run > 1) {
                statret = -1;
                stat_errno = ENOENT;
        } else {
+               if (fuzzy_basis && S_ISREG(file->mode)) {
+                       char *dn = file->dirname ? file->dirname : ".";
+                       /* Yes, identical dirnames are guaranteed to have
+                        * identical pointers at this point. */
+                       if (fuzzy_dirname != dn) {
+                               if (fuzzy_dirlist)
+                                       flist_free(fuzzy_dirlist);
+                               fuzzy_dirname = dn;
+                               fuzzy_dirlist = get_dirlist(fuzzy_dirname, 1);
+                       }
+               }
+
                statret = link_stat(fname, &st,
                                    keep_dirlinks && S_ISDIR(file->mode));
                stat_errno = errno;
@@ -301,6 +430,12 @@ static void recv_generator(char *fname, struct file_list *flist,
                        delete_file(fname, DEL_TERSE);
                        statret = -1;
                }
+               if (dry_run && statret != 0 && missing_below < 0) {
+                       missing_below = file->dir.depth;
+                       dry_run++;
+               }
+               if (protocol_version >= 29 && f_out != -1)
+                       itemize(file, statret, &st, 0, f_out, ndx);
                if (statret != 0 && do_mkdir(fname,file->mode) != 0 && errno != EEXIST) {
                        if (!relative_paths || errno != ENOENT
                            || create_directory_path(fname, orig_umask) < 0
@@ -310,27 +445,31 @@ static void recv_generator(char *fname, struct file_list *flist,
                                        full_fname(fname));
                        }
                }
-               /* f_out is set to -1 when doing final directory-permission
-                * and modification-time repair. */
                if (set_perms(fname, file, statret ? NULL : &st, 0)
-                   && verbose && f_out != -1)
+                   && verbose && protocol_version < 29 && f_out != -1)
                        rprintf(FINFO, "%s/\n", safe_fname(fname));
-               if (delete_during && f_out != -1
-                   && (file->flags & FLAG_DEL_START))
-                       delete_in_dir(flist, fname);
+               if (delete_during && f_out != -1 && csum_length != SUM_LENGTH
+                   && (file->flags & FLAG_DEL_HERE))
+                       delete_in_dir(flist, fname, file);
                return;
-       } else if (max_size && file->length > max_size) {
-               if (verbose > 1)
-                       rprintf(FINFO, "%s is over max-size\n", fname);
+       }
+       
+       if (max_size && file->length > max_size) {
+               if (verbose > 1) {
+                       rprintf(FINFO, "%s is over max-size\n",
+                               safe_fname(fname));
+               }
                return;
        }
 
        if (preserve_links && S_ISLNK(file->mode)) {
-#if SUPPORT_LINKS
+#ifdef SUPPORT_LINKS
                if (safe_symlinks && unsafe_symlink(file->u.link, fname)) {
                        if (verbose) {
-                               rprintf(FINFO, "ignoring unsafe symlink %s -> \"%s\"\n",
-                                       full_fname(fname), file->u.link);
+                               rprintf(FINFO,
+                                       "ignoring unsafe symlink %s -> \"%s\"\n",
+                                       full_fname(fname),
+                                       safe_fname(file->u.link));
                        }
                        return;
                }
@@ -346,6 +485,10 @@ static void recv_generator(char *fname, struct file_list *flist,
                                 * right place -- no further action
                                 * required. */
                                if (strcmp(lnk, file->u.link) == 0) {
+                                       if (protocol_version >= 29) {
+                                               itemize(file, 0, &st, 0,
+                                                       f_out, ndx);
+                                       }
                                        set_perms(fname, file, &st,
                                                  PERMS_REPORT);
                                        return;
@@ -360,7 +503,10 @@ static void recv_generator(char *fname, struct file_list *flist,
                                full_fname(fname), safe_fname(file->u.link));
                } else {
                        set_perms(fname,file,NULL,0);
-                       if (verbose) {
+                       if (protocol_version >= 29) {
+                               itemize(file, statret, &st, SID_UPDATING,
+                                       f_out, ndx);
+                       } else if (verbose) {
                                rprintf(FINFO, "%s -> %s\n", safe_fname(fname),
                                        safe_fname(file->u.link));
                        }
@@ -374,6 +520,10 @@ static void recv_generator(char *fname, struct file_list *flist,
                    st.st_mode != file->mode ||
                    st.st_rdev != file->u.rdev) {
                        int dflag = S_ISDIR(st.st_mode) ? DEL_DIR : 0;
+                       if (protocol_version >= 29) {
+                               itemize(file, statret, &st, SID_UPDATING,
+                                       f_out, ndx);
+                       }
                        delete_file(fname, dflag | DEL_TERSE);
                        if (verbose > 2) {
                                rprintf(FINFO,"mknod(%s,0%o,0x%x)\n",
@@ -385,12 +535,16 @@ static void recv_generator(char *fname, struct file_list *flist,
                                        full_fname(fname));
                        } else {
                                set_perms(fname,file,NULL,0);
-                               if (verbose) {
+                               if (verbose && protocol_version < 29) {
                                        rprintf(FINFO, "%s\n",
                                                safe_fname(fname));
                                }
                        }
                } else {
+                       if (protocol_version >= 29) {
+                               itemize(file, statret, &st, 0,
+                                       f_out, ndx);
+                       }
                        set_perms(fname, file, &st, PERMS_REPORT);
                }
                return;
@@ -408,7 +562,7 @@ static void recv_generator(char *fname, struct file_list *flist,
        fnamecmp = fname;
        fnamecmp_type = FNAMECMP_FNAME;
 
-       if (statret == -1 && basis_dir[0] != NULL) {
+       if (statret != 0 && basis_dir[0] != NULL) {
                int fallback_match = -1;
                int match_level = 0;
                int i = 0;
@@ -442,13 +596,13 @@ static void recv_generator(char *fname, struct file_list *flist,
                                pathjoin(fnamecmpbuf, sizeof fnamecmpbuf,
                                         basis_dir[i], fname);
                        }
-#if HAVE_LINK
+#ifdef HAVE_LINK
                        if (link_dest && match_level == 3 && !dry_run) {
                                if (do_link(fnamecmpbuf, fname) < 0) {
                                        if (verbose) {
                                                rsyserr(FINFO, errno,
                                                        "link %s => %s",
-                                                       fnamecmpbuf,
+                                                       full_fname(fnamecmpbuf),
                                                        safe_fname(fname));
                                        }
                                        fnamecmp = fnamecmpbuf;
@@ -474,12 +628,30 @@ static void recv_generator(char *fname, struct file_list *flist,
        if (partial_dir && (partialptr = partial_dir_fname(fname)) != NULL
            && link_stat(partialptr, &partial_st, 0) == 0
            && S_ISREG(partial_st.st_mode)) {
-               if (statret == -1)
+               if (statret != 0)
                        goto prepare_to_open;
        } else
                partialptr = NULL;
 
-       if (statret == -1) {
+       if (statret != 0 && fuzzy_basis && dry_run <= 1) {
+               int j = find_fuzzy(file, fuzzy_dirlist);
+               if (j >= 0) {
+                       fuzzy_file = fuzzy_dirlist->files[j];
+                       f_name_to(fuzzy_file, fnamecmpbuf);
+                       if (verbose > 2) {
+                               rprintf(FINFO, "fuzzy basis selected for %s: %s\n",
+                                       safe_fname(fname), safe_fname(fnamecmpbuf));
+                       }
+                       st.st_mode = fuzzy_file->mode;
+                       st.st_size = fuzzy_file->length;
+                       st.st_mtime = fuzzy_file->modtime;
+                       statret = 0;
+                       fnamecmp = fnamecmpbuf;
+                       fnamecmp_type = FNAMECMP_FUZZY;
+               }
+       }
+
+       if (statret != 0) {
                if (preserve_hard_links && hard_link_check(file, HL_SKIP))
                        return;
                if (stat_errno == ENOENT)
@@ -507,7 +679,15 @@ static void recv_generator(char *fname, struct file_list *flist,
 
        if (!compare_dest && fnamecmp_type <= FNAMECMP_BASIS_DIR_HIGH)
                ;
+       else if (fnamecmp_type == FNAMECMP_FUZZY)
+               ;
        else if (unchanged_file(fnamecmp, file, &st)) {
+               if (protocol_version >= 29) {
+                       itemize(file, statret, &st,
+                               fnamecmp_type == FNAMECMP_FNAME
+                                              ? 0 : SID_NO_DEST_AND_NO_UPDATE,
+                               f_out, ndx);
+               }
                if (fnamecmp_type == FNAMECMP_FNAME)
                        set_perms(fname, file, &st, PERMS_REPORT);
                return;
@@ -521,12 +701,19 @@ prepare_to_open:
                statret = 0;
        }
 
-       if (dry_run || whole_file > 0) {
-               statret = -1;
+       if (dry_run || read_batch)
                goto notify_others;
-       }
-       if (read_batch)
+       if (whole_file > 0) {
+               if (statret == 0)
+                       statret = 1;
                goto notify_others;
+       }
+
+       if (fuzzy_basis) {
+               int j = flist_find(fuzzy_dirlist, file);
+               if (j >= 0) /* don't use changing file as future fuzzy basis */
+                       fuzzy_dirlist->files[j]->flags |= FLAG_NO_FUZZY;
+       }
 
        /* open the file */
        fd = do_open(fnamecmp, O_RDONLY, 0);
@@ -547,7 +734,7 @@ prepare_to_open:
                        close(fd);
                        return;
                }
-               if (!(back_file = make_file(fname, NULL, NO_EXCLUDES))) {
+               if (!(back_file = make_file(fname, NULL, NO_FILTERS))) {
                        close(fd);
                        goto pretend_missing;
                }
@@ -575,14 +762,35 @@ prepare_to_open:
        }
 
        if (verbose > 2)
-               rprintf(FINFO, "generating and sending sums for %d\n", i);
+               rprintf(FINFO, "generating and sending sums for %d\n", ndx);
 
 notify_others:
-       write_int(f_out, i);
-       if (protocol_version >= 29 && inplace && !read_batch)
-               write_byte(f_out, fnamecmp_type);
-       if (f_out_name >= 0)
+       write_int(f_out, ndx);
+       if (protocol_version >= 29) {
+               itemize(file, statret, &st, SID_UPDATING
+                       | (always_checksum ? SID_REPORT_CHECKSUM : 0),
+                       f_out, -1);
+               if (inplace && !read_batch)
+                       write_byte(f_out, fnamecmp_type);
+       }
+       if (f_out_name >= 0) {
                write_byte(f_out_name, fnamecmp_type);
+               if (fnamecmp_type == FNAMECMP_FUZZY) {
+                       uchar lenbuf[3], *lb = lenbuf;
+                       int len = strlen(fuzzy_file->basename);
+                       if (len > 0x7F) {
+#if MAXPATHLEN > 0x7FFF
+                               *lb++ = len / 0x10000 + 0x80;
+                               *lb++ = len / 0x100;
+#else
+                               *lb++ = len / 0x100 + 0x80;
+#endif
+                       }
+                       *lb = len;
+                       write_buf(f_out_name, lenbuf, lb - lenbuf + 1);
+                       write_buf(f_out_name, fuzzy_file->basename, len);
+               }
+       }
 
        if (dry_run || read_batch)
                return;
@@ -595,7 +803,7 @@ notify_others:
                        set_perms(backupptr, back_file, NULL, 0);
                        if (verbose > 1) {
                                rprintf(FINFO, "backed up %s to %s\n",
-                                       fname, backupptr);
+                                       safe_fname(fname), safe_fname(backupptr));
                        }
                        free(back_file);
                }
@@ -614,6 +822,8 @@ void generate_files(int f_out, struct file_list *flist, char *local_name,
        char fbuf[MAXPATHLEN];
        int need_retouch_dir_times = preserve_times && !omit_dir_times;
        int need_retouch_dir_perms = 0;
+       int save_only_existing = only_existing;
+       int save_opt_ignore_existing = opt_ignore_existing;
 
        if (verbose > 2) {
                rprintf(FINFO, "generator starting pid=%ld count=%d\n",
@@ -627,9 +837,8 @@ void generate_files(int f_out, struct file_list *flist, char *local_name,
                        : "delta transmission enabled\n");
        }
 
-       /* we expect to just sit around now, so don't exit on a
-          timeout. If we really get a timeout then the other process should
-          exit */
+       /* We expect to just sit around now, so don't exit on a timeout.
+        * If we really get a timeout then the other process should exit. */
        io_timeout = 0;
 
        for (i = 0; i < flist->count; i++) {
@@ -638,9 +847,10 @@ void generate_files(int f_out, struct file_list *flist, char *local_name,
 
                if (!file->basename)
                        continue;
-               /* we need to ensure that any directories we create have writeable
-                  permissions initially so that we can create the files within
-                  them. This is then fixed after the files are transferred */
+
+               /* We need to ensure that any dirs we create have writeable
+                * permissions during the time we are putting files within
+                * them.  This is then fixed after the transfer is done. */
                if (!am_root && S_ISDIR(file->mode) && !(file->mode & S_IWUSR)) {
                        copy = *file;
                        copy.mode |= S_IWUSR; /* user write */
@@ -651,9 +861,14 @@ void generate_files(int f_out, struct file_list *flist, char *local_name,
                recv_generator(local_name ? local_name : f_name_to(file, fbuf),
                               flist, file, i, f_out, f_out_name);
        }
+       recv_generator(NULL, NULL, NULL, 0, -1, -1);
+       if (delete_during)
+               delete_in_dir(NULL, NULL, NULL);
 
        phase++;
        csum_length = SUM_LENGTH;
+       only_existing = max_size = opt_ignore_existing = 0;
+       update_only = always_checksum = size_only = 0;
        ignore_times = 1;
 
        if (verbose > 2)
@@ -670,6 +885,9 @@ void generate_files(int f_out, struct file_list *flist, char *local_name,
        }
 
        phase++;
+       only_existing = save_only_existing;
+       opt_ignore_existing = save_opt_ignore_existing;
+
        if (verbose > 2)
                rprintf(FINFO,"generate_files phase=%d\n",phase);
 
@@ -681,7 +899,8 @@ void generate_files(int f_out, struct file_list *flist, char *local_name,
        if (preserve_hard_links)
                do_hard_links();
 
-       if (need_retouch_dir_perms || need_retouch_dir_times) {
+       if ((need_retouch_dir_perms || need_retouch_dir_times)
+           && !list_only && !local_name && !dry_run) {
                /* Now we need to fix any directory permissions that were
                 * modified during the transfer and/or re-set any tweaked
                 * modified-time values. */
@@ -695,6 +914,7 @@ void generate_files(int f_out, struct file_list *flist, char *local_name,
                                       flist, file, i, -1, -1);
                }
        }
+       recv_generator(NULL, NULL, NULL, 0, -1, -1);
 
        if (verbose > 2)
                rprintf(FINFO,"generate_files finished\n");