X-Git-Url: https://mattmccutchen.net/rsync/rsync.git/blobdiff_plain/2d41264e9ef645be0337ea64a398de2e563a4287..87a57a3072c0fe742b154bd62869cc08c65625bb:/util.c diff --git a/util.c b/util.c index 5243c746..6cbe7e1c 100644 --- a/util.c +++ b/util.c @@ -32,7 +32,7 @@ extern int dry_run; extern int module_id; extern int modify_window; extern char *partial_dir; -extern struct exclude_list_struct server_exclude_list; +extern struct filter_list_struct server_filter_list; int sanitize_paths = 0; @@ -79,7 +79,7 @@ int fd_pair(int fd[2]) { int ret; -#if HAVE_SOCKETPAIR +#ifdef HAVE_SOCKETPAIR ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fd); #else ret = pipe(fd); @@ -105,9 +105,9 @@ void print_child_argv(char **cmd) "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "0123456789" ",.-_=+@/") != strlen(*cmd)) { - rprintf(FINFO, "\"%s\" ", *cmd); + rprintf(FINFO, "\"%s\" ", safe_fname(*cmd)); } else { - rprintf(FINFO, "%s ", *cmd); + rprintf(FINFO, "%s ", safe_fname(*cmd)); } } rprintf(FINFO, "\n"); @@ -130,22 +130,22 @@ void overflow(char *str) int set_modtime(char *fname, time_t modtime) { - if (dry_run) - return 0; - if (verbose > 2) { rprintf(FINFO, "set modtime of %s to (%ld) %s", - fname, (long)modtime, + safe_fname(fname), (long)modtime, asctime(localtime(&modtime))); } + if (dry_run) + return 0; + { #ifdef HAVE_UTIMBUF struct utimbuf tbuf; tbuf.actime = time(NULL); tbuf.modtime = modtime; return utime(fname,&tbuf); -#elif defined(HAVE_UTIME) +#elif defined HAVE_UTIME time_t t[2]; t[0] = time(NULL); t[1] = modtime; @@ -197,7 +197,7 @@ int create_directory_path(char *fname, int base_umask) * * Derived from GNU C's cccp.c. */ -static int full_write(int desc, char *ptr, size_t len) +int full_write(int desc, char *ptr, size_t len) { int total_written; @@ -245,7 +245,8 @@ static int safe_read(int desc, char *ptr, size_t len) /** Copy a file. * - * This is used in conjunction with the --temp-dir option */ + * This is used in conjunction with the --temp-dir, --backup, and + * --copy-dest options. */ int copy_file(char *source, char *dest, mode_t mode) { int ifd; @@ -350,7 +351,7 @@ int robust_unlink(char *fname) if (verbose > 0) { rprintf(FINFO,"renaming %s to %s because of text busy\n", - fname, path); + safe_fname(fname), safe_fname(path)); } /* maybe we should return rename()'s exit status? Nah. */ @@ -479,14 +480,14 @@ int lock_range(int fd, int offset, int len) return fcntl(fd,F_SETLK,&lock) == 0; } -static int exclude_server_path(char *arg) +static int filter_server_path(char *arg) { char *s; - if (server_exclude_list.head) { + if (server_filter_list.head) { for (s = arg; (s = strchr(s, '/')) != NULL; ) { *s = '\0'; - if (check_exclude(&server_exclude_list, arg, 1) < 0) { + if (check_filter(&server_filter_list, arg, 1) < 0) { /* We must leave arg truncated! */ return 1; } @@ -502,7 +503,7 @@ static void glob_expand_one(char *s, char ***argv_ptr, int *argc_ptr, char **argv = *argv_ptr; int argc = *argc_ptr; int maxargs = *maxargs_ptr; -#if !(defined(HAVE_GLOB) && defined(HAVE_GLOB_H)) +#if !defined HAVE_GLOB || !defined HAVE_GLOB_H if (argc == maxargs) { maxargs += MAX_ARGS; if (!(argv = realloc_array(argv, char *, maxargs))) @@ -513,7 +514,7 @@ static void glob_expand_one(char *s, char ***argv_ptr, int *argc_ptr, if (!*s) s = "."; s = argv[argc++] = strdup(s); - exclude_server_path(s); + filter_server_path(s); #else glob_t globbuf; int i; @@ -529,7 +530,7 @@ static void glob_expand_one(char *s, char ***argv_ptr, int *argc_ptr, s = strdup(s); memset(&globbuf, 0, sizeof globbuf); - if (!exclude_server_path(s)) + if (!filter_server_path(s)) glob(s, 0, NULL, &globbuf); if (MAX((int)globbuf.gl_pathc, 1) > maxargs - argc) { maxargs += globbuf.gl_pathc + MAX_ARGS; @@ -664,11 +665,11 @@ int count_dir_elements(const char *p) return cnt; } -/* Turns multiple adjacent slashes into a single slash; gets rid of "./" - * elements; collapses ".." elements except for those at the start of the - * string; removes a trailing slash. If the resulting name would be empty, - * change it into a ".". */ -unsigned int clean_fname(char *name) +/* Turns multiple adjacent slashes into a single slash, gets rid of "./" + * elements (but not a trailing dot dir), removes a trailing slash, and + * optionally collapses ".." elements (except for those at the start of the + * string). If the resulting name would be empty, change it into a ".". */ +unsigned int clean_fname(char *name, BOOL collapse_dot_dot) { char *limit = name - 1, *t = name, *f = name; int anchored; @@ -691,7 +692,8 @@ unsigned int clean_fname(char *name) continue; } /* collapse ".." dirs */ - if (f[1] == '.' && (f[2] == '/' || !f[2])) { + if (collapse_dot_dot + && f[1] == '.' && (f[2] == '/' || !f[2])) { char *s = t - 1; if (s == name && anchored) { f += 2; @@ -703,9 +705,7 @@ unsigned int clean_fname(char *name) f += 2; continue; } - *t++ = *f++; - *t++ = *f++; - limit = t; + limit = t + 2; } } while (*f && (*t++ = *f++) != '/') {} @@ -731,12 +731,17 @@ unsigned int clean_fname(char *name) * The rootdir string contains a value to use in place of a leading slash. * Specify NULL to get the default of lp_path(module_id). * - * If depth is > 0, it is a count of how many '..'s to allow at the start - * of the path. + * If depth is >= 0, it is a count of how many '..'s to allow at the start + * of the path. Use -1 to allow unlimited depth. * - * We call clean_fname() to clean up the path, but we preserve a trailing - * slash because that is sometimes significant on command-line arguments. - */ + * We also clean the path in a manner similar to clean_fname() but with a + * few differences: + * + * Turns multiple adjacent slashes into a single slash, gets rid of "." dir + * elements (INCLUDING a trailing dot dir), PRESERVES a trailing slash, and + * ALWAYS collapses ".." elements (except for those at the start of the + * string up to "depth" deep). If the resulting name would be empty, + * change it into a ".". */ char *sanitize_path(char *dest, const char *p, const char *rootdir, int depth) { char *start, *sanp; @@ -781,25 +786,22 @@ char *sanitize_path(char *dest, const char *p, const char *rootdir, int depth) } if (*p == '.' && p[1] == '.' && (p[2] == '/' || p[2] == '\0')) { /* ".." component followed by slash or end */ - if (depth > 0 && sanp == start) { - /* allow depth levels of .. at the beginning */ - --depth; - *sanp++ = *p++; - *sanp++ = *p++; - /* move virtual beginning to leave .. alone */ - start = sanp; - continue; - } - p += 2; - if (sanp != start) { - /* back up sanp one level */ - --sanp; /* now pointing at slash */ - while (sanp > start && sanp[-1] != '/') { - /* skip back up to slash */ - sanp--; + if (depth <= 0 || sanp != start) { + p += 2; + if (sanp != start) { + /* back up sanp one level */ + --sanp; /* now pointing at slash */ + while (sanp > start && sanp[-1] != '/') { + /* skip back up to slash */ + sanp--; + } } + continue; } - continue; + /* allow depth levels of .. at the beginning */ + depth--; + /* move the virtual beginning to leave the .. alone */ + start = sanp + 3; } /* copy one component through next slash */ while (*p && (*sanp++ = *p++) != '/') {} @@ -854,7 +856,7 @@ int push_dir(char *dir) curr_dir_len += len; } - curr_dir_len = clean_fname(curr_dir); + curr_dir_len = clean_fname(curr_dir, 1); return 1; } @@ -875,28 +877,41 @@ int pop_dir(char *dir) return 1; } -/** - * Return the filename, turning any newlines into '?'s. This ensures that - * outputting it on a line of its own cannot generate an empty line. This - * function can handle only 2 names at a time! - **/ -const char *safe_fname(const char *fname) +/* Return the filename, turning any non-printable characters into escaped + * characters (e.g. \n -> \012, \ -> \\). This ensures that outputting it + * cannot generate an empty line nor corrupt the screen. This function can + * return only MAX_SAFE_NAMES values at a time! The returned value can be + * longer than MAXPATHLEN (because we may be trying to output an error about + * a too-long filename)! */ +char *safe_fname(const char *fname) { - static char fbuf1[MAXPATHLEN], fbuf2[MAXPATHLEN]; - static char *fbuf = fbuf2; - char *nl = strchr(fname, '\n'); - - if (!nl) - return fname; - - fbuf = fbuf == fbuf1 ? fbuf2 : fbuf1; - strlcpy(fbuf, fname, MAXPATHLEN); - nl = fbuf + (nl - (char *)fname); - do { - *nl = '?'; - } while ((nl = strchr(nl+1, '\n')) != NULL); +#define MAX_SAFE_NAMES 4 + static char fbuf[MAX_SAFE_NAMES][MAXPATHLEN*2]; + static int ndx = 0; + int limit = sizeof fbuf / MAX_SAFE_NAMES - 1; + char *t; + + ndx = (ndx + 1) % MAX_SAFE_NAMES; + for (t = fbuf[ndx]; *fname; fname++) { + if (*fname == '\\') { + if ((limit -= 2) < 0) + break; + *t++ = '\\'; + *t++ = '\\'; + } else if (!isprint(*(uchar*)fname)) { + if ((limit -= 4) < 0) + break; + sprintf(t, "\\%03o", *(uchar*)fname); + t += 4; + } else { + if (--limit < 0) + break; + *t++ = *fname; + } + } + *t = '\0'; - return fbuf; + return fbuf[ndx]; } /** @@ -918,25 +933,21 @@ char *full_fname(const char *fn) p1 = p2 = ""; else { p1 = curr_dir; - p2 = "/"; + for (p2 = p1; *p2 == '/'; p2++) {} + if (*p2) + p2 = "/"; } if (module_id >= 0) { m1 = " (in "; m2 = lp_name(module_id); m3 = ")"; - if (*p1) { + if (p1 == curr_dir) { if (!lp_use_chroot(module_id)) { char *p = lp_path(module_id); if (*p != '/' || p[1]) p1 += strlen(p); } - if (!*p1) - p2++; - else - p1++; } - else - fn++; } else m1 = m2 = m3 = ""; @@ -965,9 +976,17 @@ char *partial_dir_fname(const char *fname) fn = fname; if ((int)pathjoin(t, sz, partial_dir, fn) >= sz) return NULL; - if (server_exclude_list.head - && check_exclude(&server_exclude_list, partial_fname, 0) < 0) - return NULL; + if (server_filter_list.head) { + static int len; + if (!len) + len = strlen(partial_dir); + t[len] = '\0'; + if (check_filter(&server_filter_list, partial_fname, 1) < 0) + return NULL; + t[len] = '/'; + if (check_filter(&server_filter_list, partial_fname, 0) < 0) + return NULL; + } return partial_fname; } @@ -989,11 +1008,7 @@ int handle_partial_dir(const char *fname, int create) dir = partial_fname; if (create) { STRUCT_STAT st; -#if SUPPORT_LINKS int statret = do_lstat(dir, &st); -#else - int statret = do_stat(dir, &st); -#endif if (statret == 0 && !S_ISDIR(st.st_mode)) { if (do_unlink(dir) < 0) return 0; @@ -1216,3 +1231,110 @@ void *_realloc_array(void *ptr, unsigned int size, unsigned long num) return malloc(size * num); return realloc(ptr, size * num); } + +/* Take a filename and filename length and return the most significant + * filename suffix we can find. This ignores suffixes such as "~", + * ".bak", ".orig", ".~1~", etc. */ +const char *find_filename_suffix(const char *fn, int fn_len, int *len_ptr) +{ + const char *suf, *s; + BOOL had_tilde; + int s_len; + + /* One or more dots at the start aren't a suffix. */ + while (fn_len && *fn == '.') fn++, fn_len--; + + /* Ignore the ~ in a "foo~" filename. */ + if (fn_len > 1 && fn[fn_len-1] == '~') + fn_len--, had_tilde = True; + else + had_tilde = False; + + /* Assume we don't find an suffix. */ + suf = ""; + *len_ptr = 0; + + /* Find the last significant suffix. */ + for (s = fn + fn_len; fn_len > 1; ) { + while (*--s != '.' && s != fn) {} + if (s == fn) + break; + s_len = fn_len - (s - fn); + fn_len = s - fn; + if (s_len == 4) { + if (strcmp(s+1, "bak") == 0 + || strcmp(s+1, "old") == 0) + continue; + } else if (s_len == 5) { + if (strcmp(s+1, "orig") == 0) + continue; + } else if (s_len > 2 && had_tilde + && s[1] == '~' && isdigit(*(uchar*)(s+2))) + continue; + *len_ptr = s_len; + suf = s; + if (s_len == 1) + break; + /* Determine if the suffix is all digits. */ + for (s++, s_len--; s_len > 0; s++, s_len--) { + if (!isdigit(*(uchar*)s)) + return suf; + } + /* An all-digit suffix may not be that signficant. */ + s = suf; + } + + return suf; +} + +/* This is an implementation of the Levenshtein distance algorithm. It + * was implemented to avoid needing a two-dimensional matrix (to save + * memory). It was also tweaked to try to factor in the ASCII distance + * between changed characters as a minor distance quantity. The normal + * Levenshtein units of distance (each signifying a single change between + * the two strings) are defined as a "UNIT". */ + +#define UNIT (1 << 16) + +uint32 fuzzy_distance(const char *s1, int len1, const char *s2, int len2) +{ + uint32 a[MAXPATHLEN], diag, above, left, diag_inc, above_inc, left_inc; + int32 cost; + int i1, i2; + + if (!len1 || !len2) { + if (!len1) { + s1 = s2; + len1 = len2; + } + for (i1 = 0, cost = 0; i1 < len1; i1++) + cost += s1[i1]; + return (int32)len1 * UNIT + cost; + } + + for (i2 = 0; i2 < len2; i2++) + a[i2] = (i2+1) * UNIT; + + for (i1 = 0; i1 < len1; i1++) { + diag = i1 * UNIT; + above = (i1+1) * UNIT; + for (i2 = 0; i2 < len2; i2++) { + left = a[i2]; + if ((cost = *((uchar*)s1+i1) - *((uchar*)s2+i2)) != 0) { + if (cost < 0) + cost = UNIT - cost; + else + cost = UNIT + cost; + } + diag_inc = diag + cost; + left_inc = left + UNIT + *((uchar*)s1+i1); + above_inc = above + UNIT + *((uchar*)s2+i2); + a[i2] = above = left < above + ? (left_inc < diag_inc ? left_inc : diag_inc) + : (above_inc < diag_inc ? above_inc : diag_inc); + diag = left; + } + } + + return a[len2-1]; +}