X-Git-Url: https://mattmccutchen.net/rsync/rsync.git/blobdiff_plain/4f5b0756df0dfc925b9576db47ecce949c378e18..bdedced84b7f446f818efd4d25b8985285a50009:/util.c

diff --git a/util.c b/util.c
index eb557134..6cbe7e1c 100644
--- a/util.c
+++ b/util.c
@@ -245,7 +245,8 @@ static int safe_read(int desc, char *ptr, size_t len)
 
 /** Copy a file.
  *
- * This is used in conjunction with the --temp-dir and --backup options */
+ * This is used in conjunction with the --temp-dir, --backup, and
+ * --copy-dest options. */
 int copy_file(char *source, char *dest, mode_t mode)
 {
 	int ifd;
@@ -876,12 +877,13 @@ int pop_dir(char *dir)
 	return 1;
 }
 
-/**
- * Return the filename, turning any newlines into '?'s.  This ensures that
- * outputting it on a line of its own cannot generate an empty line.  This
- * function can handle only 2 names at a time!
- **/
-const char *safe_fname(const char *fname)
+/* Return the filename, turning any non-printable characters into escaped
+ * characters (e.g. \n -> \012, \ -> \\).  This ensures that outputting it
+ * cannot generate an empty line nor corrupt the screen.  This function can
+ * return only MAX_SAFE_NAMES values at a time!  The returned value can be
+ * longer than MAXPATHLEN (because we may be trying to output an error about
+ * a too-long filename)! */
+char *safe_fname(const char *fname)
 {
 #define MAX_SAFE_NAMES 4
 	static char fbuf[MAX_SAFE_NAMES][MAXPATHLEN*2];
@@ -891,12 +893,21 @@ const char *safe_fname(const char *fname)
 
 	ndx = (ndx + 1) % MAX_SAFE_NAMES;
 	for (t = fbuf[ndx]; *fname; fname++) {
-		if (!isprint(*fname))
-			*t++ = '?';
-		else
+		if (*fname == '\\') {
+			if ((limit -= 2) < 0)
+				break;
+			*t++ = '\\';
+			*t++ = '\\';
+		} else if (!isprint(*(uchar*)fname)) {
+			if ((limit -= 4) < 0)
+				break;
+			sprintf(t, "\\%03o", *(uchar*)fname);
+			t += 4;
+		} else {
+			if (--limit < 0)
+				break;
 			*t++ = *fname;
-		if (--limit == 0)
-			break;
+		}
 	}
 	*t = '\0';
 
@@ -922,25 +933,21 @@ char *full_fname(const char *fn)
 		p1 = p2 = "";
 	else {
 		p1 = curr_dir;
-		p2 = "/";
+		for (p2 = p1; *p2 == '/'; p2++) {}
+		if (*p2)
+			p2 = "/";
 	}
 	if (module_id >= 0) {
 		m1 = " (in ";
 		m2 = lp_name(module_id);
 		m3 = ")";
-		if (*p1) {
+		if (p1 == curr_dir) {
 			if (!lp_use_chroot(module_id)) {
 				char *p = lp_path(module_id);
 				if (*p != '/' || p[1])
 					p1 += strlen(p);
 			}
-			if (!*p1)
-				p2++;
-			else
-				p1++;
 		}
-		else
-			fn++;
 	} else
 		m1 = m2 = m3 = "";
 
@@ -1224,3 +1231,110 @@ void *_realloc_array(void *ptr, unsigned int size, unsigned long num)
 		return malloc(size * num);
 	return realloc(ptr, size * num);
 }
+
+/* Take a filename and filename length and return the most significant
+ * filename suffix we can find.  This ignores suffixes such as "~",
+ * ".bak", ".orig", ".~1~", etc. */
+const char *find_filename_suffix(const char *fn, int fn_len, int *len_ptr)
+{
+	const char *suf, *s;
+	BOOL had_tilde;
+	int s_len;
+
+	/* One or more dots at the start aren't a suffix. */
+	while (fn_len && *fn == '.') fn++, fn_len--;
+
+	/* Ignore the ~ in a "foo~" filename. */
+	if (fn_len > 1 && fn[fn_len-1] == '~')
+		fn_len--, had_tilde = True;
+	else
+		had_tilde = False;
+
+	/* Assume we don't find an suffix. */
+	suf = "";
+	*len_ptr = 0;
+
+	/* Find the last significant suffix. */
+	for (s = fn + fn_len; fn_len > 1; ) {
+		while (*--s != '.' && s != fn) {}
+		if (s == fn)
+			break;
+		s_len = fn_len - (s - fn);
+		fn_len = s - fn;
+		if (s_len == 4) {
+			if (strcmp(s+1, "bak") == 0
+			 || strcmp(s+1, "old") == 0)
+				continue;
+		} else if (s_len == 5) {
+			if (strcmp(s+1, "orig") == 0)
+				continue;
+		} else if (s_len > 2 && had_tilde
+		    && s[1] == '~' && isdigit(*(uchar*)(s+2)))
+			continue;
+		*len_ptr = s_len;
+		suf = s;
+		if (s_len == 1)
+			break;
+		/* Determine if the suffix is all digits. */
+		for (s++, s_len--; s_len > 0; s++, s_len--) {
+			if (!isdigit(*(uchar*)s))
+				return suf;
+		}
+		/* An all-digit suffix may not be that signficant. */
+		s = suf;
+	}
+
+	return suf;
+}
+
+/* This is an implementation of the Levenshtein distance algorithm.  It
+ * was implemented to avoid needing a two-dimensional matrix (to save
+ * memory).  It was also tweaked to try to factor in the ASCII distance
+ * between changed characters as a minor distance quantity.  The normal
+ * Levenshtein units of distance (each signifying a single change between
+ * the two strings) are defined as a "UNIT". */
+
+#define UNIT (1 << 16)
+
+uint32 fuzzy_distance(const char *s1, int len1, const char *s2, int len2)
+{
+	uint32 a[MAXPATHLEN], diag, above, left, diag_inc, above_inc, left_inc;
+	int32 cost;
+	int i1, i2;
+
+	if (!len1 || !len2) {
+		if (!len1) {
+			s1 = s2;
+			len1 = len2;
+		}
+		for (i1 = 0, cost = 0; i1 < len1; i1++)
+			cost += s1[i1];
+		return (int32)len1 * UNIT + cost;
+	}
+
+	for (i2 = 0; i2 < len2; i2++)
+		a[i2] = (i2+1) * UNIT;
+
+	for (i1 = 0; i1 < len1; i1++) {
+		diag = i1 * UNIT;
+		above = (i1+1) * UNIT;
+		for (i2 = 0; i2 < len2; i2++) {
+			left = a[i2];
+			if ((cost = *((uchar*)s1+i1) - *((uchar*)s2+i2)) != 0) {
+				if (cost < 0)
+					cost = UNIT - cost;
+				else
+					cost = UNIT + cost;
+			}
+			diag_inc = diag + cost;
+			left_inc = left + UNIT + *((uchar*)s1+i1);
+			above_inc = above + UNIT + *((uchar*)s2+i2);
+			a[i2] = above = left < above
+			      ? (left_inc < diag_inc ? left_inc : diag_inc)
+			      : (above_inc < diag_inc ? above_inc : diag_inc);
+			diag = left;
+		}
+	}
+
+	return a[len2-1];
+}