X-Git-Url: https://mattmccutchen.net/rsync/rsync.git/blobdiff_plain/31937d363b3e622f045781d410a5d718e87fa867..8e3ead09a3768d1b38f65c8e296903af5fbbbc42:/generator.c diff --git a/generator.c b/generator.c index bbe70fb0..f59a5128 100644 --- a/generator.c +++ b/generator.c @@ -25,10 +25,15 @@ extern int verbose; extern int dry_run; +extern int log_format_has_i; +extern int log_format_has_o_or_i; +extern int daemon_log_format_has_i; +extern int am_root; +extern int am_server; +extern int am_daemon; extern int relative_paths; extern int keep_dirlinks; extern int preserve_links; -extern int am_root; extern int preserve_devices; extern int preserve_hard_links; extern int preserve_perms; @@ -47,6 +52,7 @@ extern int size_only; extern OFF_T max_size; extern int io_timeout; extern int protocol_version; +extern int fuzzy_basis; extern int always_checksum; extern char *partial_dir; extern char *basis_dir[]; @@ -60,6 +66,7 @@ extern int only_existing; extern int orig_umask; extern int safe_symlinks; extern long block_size; /* "long" because popt can't set an int32. */ +extern struct stats stats; extern struct filter_list_struct server_filter_list; @@ -78,6 +85,49 @@ static int unchanged_attrs(struct file_struct *file, STRUCT_STAT *st) return 1; } + +#define SID_UPDATING ITEM_UPDATING +#define SID_REPORT_CHECKSUM ITEM_REPORT_CHECKSUM +#define SID_NO_DEST_AND_NO_UPDATE (1<<16) + +static void itemize(struct file_struct *file, int statret, STRUCT_STAT *st, + int32 sflags, int f_out, int ndx) +{ + int iflags = sflags & (SID_UPDATING | SID_REPORT_CHECKSUM); + + if (statret >= 0) { + if (S_ISREG(file->mode) && file->length != st->st_size) + iflags |= ITEM_REPORT_SIZE; + if (!(sflags & SID_NO_DEST_AND_NO_UPDATE)) { + int keep_time = !preserve_times ? 0 + : S_ISDIR(file->mode) ? !omit_dir_times + : !S_ISLNK(file->mode); + + if ((iflags & ITEM_UPDATING && !keep_time) + || (keep_time && file->modtime != st->st_mtime)) + iflags |= ITEM_REPORT_TIME; + if (preserve_perms && file->mode != st->st_mode) + iflags |= ITEM_REPORT_PERMS; + if (preserve_uid && am_root && file->uid != st->st_uid) + iflags |= ITEM_REPORT_OWNER; + if (preserve_gid && file->gid != GID_NONE + && st->st_gid != file->gid) + iflags |= ITEM_REPORT_GROUP; + } + } else + iflags |= ITEM_IS_NEW | ITEM_UPDATING; + + if ((iflags || verbose > 1) && !read_batch) { + if (protocol_version >= 29) { + if (ndx >= 0) + write_int(f_out, ndx); + write_shortint(f_out, iflags); + } else if (ndx >= 0) + log_recv(file, &stats, iflags); + } +} + + /* Perform our quick-check heuristic for determining if a file is unchanged. */ static int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st) { @@ -186,7 +236,7 @@ static void sum_sizes_sqroot(struct sum_struct *sum, int64 len) */ static void generate_and_send_sums(int fd, OFF_T len, int f_out, int f_copy) { - size_t i; + int32 i; struct map_struct *mapbuf; struct sum_struct sum; OFF_T offset = 0; @@ -228,6 +278,60 @@ static void generate_and_send_sums(int fd, OFF_T len, int f_out, int f_copy) } +/* Try to find a filename in the same dir as "fname" with a similar name. */ +static int find_fuzzy(struct file_struct *file, struct file_list *dirlist) +{ + int fname_len, fname_suf_len; + const char *fname_suf, *fname = file->basename; + uint32 lowest_dist = 0x7FFFFFFF; + int j, lowest_j = -1; + + fname_len = strlen(fname); + fname_suf = find_filename_suffix(fname, fname_len, &fname_suf_len); + + for (j = 0; j < dirlist->count; j++) { + struct file_struct *fp = dirlist->files[j]; + const char *suf, *name; + int len, suf_len; + uint32 dist; + + if (!S_ISREG(fp->mode) || !fp->length + || fp->flags & FLAG_NO_FUZZY) + continue; + + name = fp->basename; + + if (fp->length == file->length + && fp->modtime == file->modtime) { + if (verbose > 4) { + rprintf(FINFO, + "fuzzy size/modtime match for %s\n", + name); + } + return j; + } + + len = strlen(name); + suf = find_filename_suffix(name, len, &suf_len); + + dist = fuzzy_distance(name, len, fname, fname_len); + /* Add some extra weight to how well the suffixes match. */ + dist += fuzzy_distance(suf, suf_len, fname_suf, fname_suf_len) + * 10; + if (verbose > 4) { + rprintf(FINFO, "fuzzy distance for %s = %d.%05d\n", + name, (int)(dist>>16), (int)(dist&0xFFFF)); + } + if (dist <= lowest_dist) { + lowest_dist = dist; + lowest_j = j; + } + } + + return lowest_j; +} + + /* Acts on flist->file's ndx'th item, whose name is fname. If a directory, * make sure it exists, and has the right permissions/timestamp info. For * all other non-regular files (symlinks, etc.) we create them here. For @@ -240,34 +344,99 @@ static void recv_generator(char *fname, struct file_list *flist, struct file_struct *file, int ndx, int f_out, int f_out_name) { + static int missing_below = -1, excluded_below = -1; + static char *fuzzy_dirname = NULL; + static struct file_list *fuzzy_dirlist = NULL; + struct file_struct *fuzzy_file = NULL; int fd = -1, f_copy = -1; STRUCT_STAT st, partial_st; struct file_struct *back_file = NULL; int statret, stat_errno; char *fnamecmp, *partialptr, *backupptr = NULL; char fnamecmpbuf[MAXPATHLEN]; + int itemizing, maybe_PERMS_REPORT; uchar fnamecmp_type; + enum logcode code; if (list_only) return; - if (verbose > 2) - rprintf(FINFO, "recv_generator(%s,%d)\n", safe_fname(fname), ndx); + if (protocol_version >= 29) { + itemizing = 1; + code = daemon_log_format_has_i ? 0 : FLOG; + maybe_PERMS_REPORT = log_format_has_i ? 0 : PERMS_REPORT; + } else if (am_daemon) { + itemizing = daemon_log_format_has_i && !dry_run; + code = itemizing || dry_run ? FCLIENT : FINFO; + maybe_PERMS_REPORT = PERMS_REPORT; + } else if (!am_server) { + itemizing = log_format_has_i; + code = itemizing ? 0 : FINFO; + maybe_PERMS_REPORT = log_format_has_i ? 0 : PERMS_REPORT; + } else { + itemizing = 0; + code = FINFO; + maybe_PERMS_REPORT = PERMS_REPORT; + } - if (server_filter_list.head - && check_filter(&server_filter_list, fname, - S_ISDIR(file->mode)) < 0) { - if (verbose) { - rprintf(FINFO, "skipping server-excluded file \"%s\"\n", - safe_fname(fname)); + if (!fname) { + if (fuzzy_dirlist) { + flist_free(fuzzy_dirlist); + fuzzy_dirlist = NULL; + fuzzy_dirname = NULL; + } + if (missing_below >= 0) { + dry_run--; + missing_below = -1; } return; } + if (verbose > 2) { + rprintf(FINFO, "recv_generator(%s,%d)\n", + safe_fname(fname), ndx); + } + + if (server_filter_list.head) { + if (excluded_below >= 0) { + if (file->dir.depth > excluded_below) + goto skipping; + excluded_below = -1; + } + if (check_filter(&server_filter_list, fname, + S_ISDIR(file->mode)) < 0) { + if (S_ISDIR(file->mode)) + excluded_below = file->dir.depth; + skipping: + if (verbose) { + rprintf(FINFO, + "skipping server-excluded file \"%s\"\n", + safe_fname(fname)); + } + return; + } + } + + if (missing_below >= 0 && file->dir.depth <= missing_below) { + dry_run--; + missing_below = -1; + } if (dry_run > 1) { statret = -1; stat_errno = ENOENT; } else { + if (fuzzy_basis && S_ISREG(file->mode)) { + char *dn = file->dirname ? file->dirname : "."; + /* Yes, identical dirnames are guaranteed to have + * identical pointers at this point. */ + if (fuzzy_dirname != dn) { + if (fuzzy_dirlist) + flist_free(fuzzy_dirlist); + fuzzy_dirname = dn; + fuzzy_dirlist = get_dirlist(fuzzy_dirname, 1); + } + } + statret = link_stat(fname, &st, keep_dirlinks && S_ISDIR(file->mode)); stat_errno = errno; @@ -298,9 +467,15 @@ static void recv_generator(char *fname, struct file_list *flist, * we need to delete it. If it doesn't exist, then * (perhaps recursively) create it. */ if (statret == 0 && !S_ISDIR(st.st_mode)) { - delete_file(fname, DEL_TERSE); + delete_file(fname, st.st_mode, DEL_TERSE); statret = -1; } + if (dry_run && statret != 0 && missing_below < 0) { + missing_below = file->dir.depth; + dry_run++; + } + if (itemizing && f_out != -1) + itemize(file, statret, &st, 0, f_out, ndx); if (statret != 0 && do_mkdir(fname,file->mode) != 0 && errno != EEXIST) { if (!relative_paths || errno != ENOENT || create_directory_path(fname, orig_umask) < 0 @@ -311,55 +486,73 @@ static void recv_generator(char *fname, struct file_list *flist, } } if (set_perms(fname, file, statret ? NULL : &st, 0) - && verbose && f_out != -1) - rprintf(FINFO, "%s/\n", safe_fname(fname)); + && verbose && code && f_out != -1) + rprintf(code, "%s/\n", safe_fname(fname)); if (delete_during && f_out != -1 && csum_length != SUM_LENGTH && (file->flags & FLAG_DEL_HERE)) delete_in_dir(flist, fname, file); return; - } else if (max_size && file->length > max_size) { - if (verbose > 1) - rprintf(FINFO, "%s is over max-size\n", fname); + } + + if (max_size && file->length > max_size) { + if (verbose > 1) { + rprintf(FINFO, "%s is over max-size\n", + safe_fname(fname)); + } return; } if (preserve_links && S_ISLNK(file->mode)) { -#if SUPPORT_LINKS +#ifdef SUPPORT_LINKS if (safe_symlinks && unsafe_symlink(file->u.link, fname)) { if (verbose) { - rprintf(FINFO, "ignoring unsafe symlink %s -> \"%s\"\n", - full_fname(fname), file->u.link); + rprintf(FINFO, + "ignoring unsafe symlink %s -> \"%s\"\n", + full_fname(fname), + safe_fname(file->u.link)); } return; } if (statret == 0) { - int dflag = S_ISDIR(st.st_mode) ? DEL_DIR : 0; char lnk[MAXPATHLEN]; int len; - if (!dflag + if (!S_ISDIR(st.st_mode) && (len = readlink(fname, lnk, MAXPATHLEN-1)) > 0) { lnk[len] = 0; /* A link already pointing to the * right place -- no further action * required. */ if (strcmp(lnk, file->u.link) == 0) { + if (itemizing) { + itemize(file, 0, &st, 0, + f_out, ndx); + } set_perms(fname, file, &st, - PERMS_REPORT); + maybe_PERMS_REPORT); return; } } /* Not the right symlink (or not a symlink), so * delete it. */ - delete_file(fname, dflag | DEL_TERSE); + if (S_ISLNK(st.st_mode)) + delete_file(fname, st.st_mode, DEL_TERSE); + else { + delete_file(fname, st.st_mode, DEL_TERSE); + statret = -1; + } } if (do_symlink(file->u.link,fname) != 0) { rsyserr(FERROR, errno, "symlink %s -> \"%s\" failed", full_fname(fname), safe_fname(file->u.link)); } else { set_perms(fname,file,NULL,0); - if (verbose) { - rprintf(FINFO, "%s -> %s\n", safe_fname(fname), + if (itemizing) { + itemize(file, statret, &st, SID_UPDATING, + f_out, ndx); + } + if (code && verbose) { + rprintf(code, "%s -> %s\n", safe_fname(fname), safe_fname(file->u.link)); } } @@ -371,8 +564,12 @@ static void recv_generator(char *fname, struct file_list *flist, if (statret != 0 || st.st_mode != file->mode || st.st_rdev != file->u.rdev) { - int dflag = S_ISDIR(st.st_mode) ? DEL_DIR : 0; - delete_file(fname, dflag | DEL_TERSE); + if (IS_DEVICE(st.st_mode)) + delete_file(fname, st.st_mode, DEL_TERSE); + else { + delete_file(fname, st.st_mode, DEL_TERSE); + statret = -1; + } if (verbose > 2) { rprintf(FINFO,"mknod(%s,0%o,0x%x)\n", safe_fname(fname), @@ -383,13 +580,21 @@ static void recv_generator(char *fname, struct file_list *flist, full_fname(fname)); } else { set_perms(fname,file,NULL,0); - if (verbose) { - rprintf(FINFO, "%s\n", + if (itemizing) { + itemize(file, statret, &st, SID_UPDATING, + f_out, ndx); + } + if (code && verbose) { + rprintf(code, "%s\n", safe_fname(fname)); } } } else { - set_perms(fname, file, &st, PERMS_REPORT); + if (itemizing) { + itemize(file, statret, &st, 0, + f_out, ndx); + } + set_perms(fname, file, &st, maybe_PERMS_REPORT); } return; } @@ -406,7 +611,7 @@ static void recv_generator(char *fname, struct file_list *flist, fnamecmp = fname; fnamecmp_type = FNAMECMP_FNAME; - if (statret == -1 && basis_dir[0] != NULL) { + if (statret != 0 && basis_dir[0] != NULL) { int fallback_match = -1; int match_level = 0; int i = 0; @@ -440,13 +645,13 @@ static void recv_generator(char *fname, struct file_list *flist, pathjoin(fnamecmpbuf, sizeof fnamecmpbuf, basis_dir[i], fname); } -#if HAVE_LINK +#ifdef HAVE_LINK if (link_dest && match_level == 3 && !dry_run) { if (do_link(fnamecmpbuf, fname) < 0) { if (verbose) { rsyserr(FINFO, errno, "link %s => %s", - fnamecmpbuf, + full_fname(fnamecmpbuf), safe_fname(fname)); } fnamecmp = fnamecmpbuf; @@ -462,8 +667,7 @@ static void recv_generator(char *fname, struct file_list *flist, } if (statret == 0 && !S_ISREG(st.st_mode)) { - int dflag = S_ISDIR(st.st_mode) ? DEL_DIR : 0; - if (delete_file(fname, dflag | DEL_TERSE) != 0) + if (delete_file(fname, st.st_mode, DEL_TERSE) != 0) return; statret = -1; stat_errno = ENOENT; @@ -472,12 +676,30 @@ static void recv_generator(char *fname, struct file_list *flist, if (partial_dir && (partialptr = partial_dir_fname(fname)) != NULL && link_stat(partialptr, &partial_st, 0) == 0 && S_ISREG(partial_st.st_mode)) { - if (statret == -1) + if (statret != 0) goto prepare_to_open; } else partialptr = NULL; - if (statret == -1) { + if (statret != 0 && fuzzy_basis && dry_run <= 1) { + int j = find_fuzzy(file, fuzzy_dirlist); + if (j >= 0) { + fuzzy_file = fuzzy_dirlist->files[j]; + f_name_to(fuzzy_file, fnamecmpbuf); + if (verbose > 2) { + rprintf(FINFO, "fuzzy basis selected for %s: %s\n", + safe_fname(fname), safe_fname(fnamecmpbuf)); + } + st.st_mode = fuzzy_file->mode; + st.st_size = fuzzy_file->length; + st.st_mtime = fuzzy_file->modtime; + statret = 0; + fnamecmp = fnamecmpbuf; + fnamecmp_type = FNAMECMP_FUZZY; + } + } + + if (statret != 0) { if (preserve_hard_links && hard_link_check(file, HL_SKIP)) return; if (stat_errno == ENOENT) @@ -505,9 +727,17 @@ static void recv_generator(char *fname, struct file_list *flist, if (!compare_dest && fnamecmp_type <= FNAMECMP_BASIS_DIR_HIGH) ; + else if (fnamecmp_type == FNAMECMP_FUZZY) + ; else if (unchanged_file(fnamecmp, file, &st)) { + if (itemizing) { + itemize(file, statret, &st, + fnamecmp_type == FNAMECMP_FNAME + ? 0 : SID_NO_DEST_AND_NO_UPDATE, + f_out, ndx); + } if (fnamecmp_type == FNAMECMP_FNAME) - set_perms(fname, file, &st, PERMS_REPORT); + set_perms(fname, file, &st, maybe_PERMS_REPORT); return; } @@ -519,12 +749,19 @@ prepare_to_open: statret = 0; } - if (dry_run || whole_file > 0) { - statret = -1; + if (dry_run || read_batch) goto notify_others; - } - if (read_batch) + if (whole_file > 0) { + if (statret == 0) + statret = 1; goto notify_others; + } + + if (fuzzy_basis) { + int j = flist_find(fuzzy_dirlist, file); + if (j >= 0) /* don't use changing file as future fuzzy basis */ + fuzzy_dirlist->files[j]->flags |= FLAG_NO_FUZZY; + } /* open the file */ fd = do_open(fnamecmp, O_RDONLY, 0); @@ -577,10 +814,31 @@ prepare_to_open: notify_others: write_int(f_out, ndx); - if (protocol_version >= 29 && inplace && !read_batch) - write_byte(f_out, fnamecmp_type); - if (f_out_name >= 0) + if (itemizing) { + itemize(file, statret, &st, SID_UPDATING + | (always_checksum ? SID_REPORT_CHECKSUM : 0), + f_out, -1); + if (inplace && !read_batch) + write_byte(f_out, fnamecmp_type); + } + if (f_out_name >= 0) { write_byte(f_out_name, fnamecmp_type); + if (fnamecmp_type == FNAMECMP_FUZZY) { + uchar lenbuf[3], *lb = lenbuf; + int len = strlen(fuzzy_file->basename); + if (len > 0x7F) { +#if MAXPATHLEN > 0x7FFF + *lb++ = len / 0x10000 + 0x80; + *lb++ = len / 0x100; +#else + *lb++ = len / 0x100 + 0x80; +#endif + } + *lb = len; + write_buf(f_out_name, lenbuf, lb - lenbuf + 1); + write_buf(f_out_name, fuzzy_file->basename, len); + } + } if (dry_run || read_batch) return; @@ -593,7 +851,7 @@ notify_others: set_perms(backupptr, back_file, NULL, 0); if (verbose > 1) { rprintf(FINFO, "backed up %s to %s\n", - fname, backupptr); + safe_fname(fname), safe_fname(backupptr)); } free(back_file); } @@ -651,6 +909,7 @@ void generate_files(int f_out, struct file_list *flist, char *local_name, recv_generator(local_name ? local_name : f_name_to(file, fbuf), flist, file, i, f_out, f_out_name); } + recv_generator(NULL, NULL, NULL, 0, -1, -1); if (delete_during) delete_in_dir(NULL, NULL, NULL); @@ -703,6 +962,7 @@ void generate_files(int f_out, struct file_list *flist, char *local_name, flist, file, i, -1, -1); } } + recv_generator(NULL, NULL, NULL, 0, -1, -1); if (verbose > 2) rprintf(FINFO,"generate_files finished\n");