X-Git-Url: https://mattmccutchen.net/rsync/rsync.git/blobdiff_plain/4f5b0756df0dfc925b9576db47ecce949c378e18..bdedced84b7f446f818efd4d25b8985285a50009:/util.c diff --git a/util.c b/util.c index eb557134..6cbe7e1c 100644 --- a/util.c +++ b/util.c @@ -245,7 +245,8 @@ static int safe_read(int desc, char *ptr, size_t len) /** Copy a file. * - * This is used in conjunction with the --temp-dir and --backup options */ + * This is used in conjunction with the --temp-dir, --backup, and + * --copy-dest options. */ int copy_file(char *source, char *dest, mode_t mode) { int ifd; @@ -876,12 +877,13 @@ int pop_dir(char *dir) return 1; } -/** - * Return the filename, turning any newlines into '?'s. This ensures that - * outputting it on a line of its own cannot generate an empty line. This - * function can handle only 2 names at a time! - **/ -const char *safe_fname(const char *fname) +/* Return the filename, turning any non-printable characters into escaped + * characters (e.g. \n -> \012, \ -> \\). This ensures that outputting it + * cannot generate an empty line nor corrupt the screen. This function can + * return only MAX_SAFE_NAMES values at a time! The returned value can be + * longer than MAXPATHLEN (because we may be trying to output an error about + * a too-long filename)! */ +char *safe_fname(const char *fname) { #define MAX_SAFE_NAMES 4 static char fbuf[MAX_SAFE_NAMES][MAXPATHLEN*2]; @@ -891,12 +893,21 @@ const char *safe_fname(const char *fname) ndx = (ndx + 1) % MAX_SAFE_NAMES; for (t = fbuf[ndx]; *fname; fname++) { - if (!isprint(*fname)) - *t++ = '?'; - else + if (*fname == '\\') { + if ((limit -= 2) < 0) + break; + *t++ = '\\'; + *t++ = '\\'; + } else if (!isprint(*(uchar*)fname)) { + if ((limit -= 4) < 0) + break; + sprintf(t, "\\%03o", *(uchar*)fname); + t += 4; + } else { + if (--limit < 0) + break; *t++ = *fname; - if (--limit == 0) - break; + } } *t = '\0'; @@ -922,25 +933,21 @@ char *full_fname(const char *fn) p1 = p2 = ""; else { p1 = curr_dir; - p2 = "/"; + for (p2 = p1; *p2 == '/'; p2++) {} + if (*p2) + p2 = "/"; } if (module_id >= 0) { m1 = " (in "; m2 = lp_name(module_id); m3 = ")"; - if (*p1) { + if (p1 == curr_dir) { if (!lp_use_chroot(module_id)) { char *p = lp_path(module_id); if (*p != '/' || p[1]) p1 += strlen(p); } - if (!*p1) - p2++; - else - p1++; } - else - fn++; } else m1 = m2 = m3 = ""; @@ -1224,3 +1231,110 @@ void *_realloc_array(void *ptr, unsigned int size, unsigned long num) return malloc(size * num); return realloc(ptr, size * num); } + +/* Take a filename and filename length and return the most significant + * filename suffix we can find. This ignores suffixes such as "~", + * ".bak", ".orig", ".~1~", etc. */ +const char *find_filename_suffix(const char *fn, int fn_len, int *len_ptr) +{ + const char *suf, *s; + BOOL had_tilde; + int s_len; + + /* One or more dots at the start aren't a suffix. */ + while (fn_len && *fn == '.') fn++, fn_len--; + + /* Ignore the ~ in a "foo~" filename. */ + if (fn_len > 1 && fn[fn_len-1] == '~') + fn_len--, had_tilde = True; + else + had_tilde = False; + + /* Assume we don't find an suffix. */ + suf = ""; + *len_ptr = 0; + + /* Find the last significant suffix. */ + for (s = fn + fn_len; fn_len > 1; ) { + while (*--s != '.' && s != fn) {} + if (s == fn) + break; + s_len = fn_len - (s - fn); + fn_len = s - fn; + if (s_len == 4) { + if (strcmp(s+1, "bak") == 0 + || strcmp(s+1, "old") == 0) + continue; + } else if (s_len == 5) { + if (strcmp(s+1, "orig") == 0) + continue; + } else if (s_len > 2 && had_tilde + && s[1] == '~' && isdigit(*(uchar*)(s+2))) + continue; + *len_ptr = s_len; + suf = s; + if (s_len == 1) + break; + /* Determine if the suffix is all digits. */ + for (s++, s_len--; s_len > 0; s++, s_len--) { + if (!isdigit(*(uchar*)s)) + return suf; + } + /* An all-digit suffix may not be that signficant. */ + s = suf; + } + + return suf; +} + +/* This is an implementation of the Levenshtein distance algorithm. It + * was implemented to avoid needing a two-dimensional matrix (to save + * memory). It was also tweaked to try to factor in the ASCII distance + * between changed characters as a minor distance quantity. The normal + * Levenshtein units of distance (each signifying a single change between + * the two strings) are defined as a "UNIT". */ + +#define UNIT (1 << 16) + +uint32 fuzzy_distance(const char *s1, int len1, const char *s2, int len2) +{ + uint32 a[MAXPATHLEN], diag, above, left, diag_inc, above_inc, left_inc; + int32 cost; + int i1, i2; + + if (!len1 || !len2) { + if (!len1) { + s1 = s2; + len1 = len2; + } + for (i1 = 0, cost = 0; i1 < len1; i1++) + cost += s1[i1]; + return (int32)len1 * UNIT + cost; + } + + for (i2 = 0; i2 < len2; i2++) + a[i2] = (i2+1) * UNIT; + + for (i1 = 0; i1 < len1; i1++) { + diag = i1 * UNIT; + above = (i1+1) * UNIT; + for (i2 = 0; i2 < len2; i2++) { + left = a[i2]; + if ((cost = *((uchar*)s1+i1) - *((uchar*)s2+i2)) != 0) { + if (cost < 0) + cost = UNIT - cost; + else + cost = UNIT + cost; + } + diag_inc = diag + cost; + left_inc = left + UNIT + *((uchar*)s1+i1); + above_inc = above + UNIT + *((uchar*)s2+i2); + a[i2] = above = left < above + ? (left_inc < diag_inc ? left_inc : diag_inc) + : (above_inc < diag_inc ? above_inc : diag_inc); + diag = left; + } + } + + return a[len2-1]; +}