Tweaked a comment.

[rsync/rsync.git] / util.c
diff --git a/util.c b/util.c

index eb55713..a43cfb0 100644 (file)
--- a/util.c
+++ b/util.c
@@ -876,11 +876,11 @@ int pop_dir(char *dir)
         return 1;
  }
  
-/**
- * Return the filename, turning any newlines into '?'s.  This ensures that
- * outputting it on a line of its own cannot generate an empty line.  This
- * function can handle only 2 names at a time!
- **/
+/* Return the filename, turning any non-printable characters into '?'s.
+ * This ensures that outputting it on a line of its own cannot generate an
+ * empty line.  This function can return only MAX_SAFE_NAMES values at a
+ * time!  The returned value can be longer than MAXPATHLEN (because we
+ * may be trying to output an error about a too-long filename)! */
  const char *safe_fname(const char *fname)
  {
  #define MAX_SAFE_NAMES 4
@@ -891,7 +891,7 @@ const char *safe_fname(const char *fname)
  
         ndx = (ndx + 1) % MAX_SAFE_NAMES;
         for (t = fbuf[ndx]; *fname; fname++) {
-               if (!isprint(*fname))
+               if (!isprint(*(uchar*)fname))
                         *t++ = '?';
                 else
                         *t++ = *fname;
@@ -922,25 +922,21 @@ char *full_fname(const char *fn)
                 p1 = p2 = "";
         else {
                 p1 = curr_dir;
-               p2 = "/";
+               for (p2 = p1; *p2 == '/'; p2++) {}
+               if (*p2)
+                       p2 = "/";
         }
         if (module_id >= 0) {
                 m1 = " (in ";
                 m2 = lp_name(module_id);
                 m3 = ")";
-               if (*p1) {
+               if (p1 == curr_dir) {
                         if (!lp_use_chroot(module_id)) {
                                 char *p = lp_path(module_id);
                                 if (*p != '/' || p[1])
                                         p1 += strlen(p);
                         }
-                       if (!*p1)
-                               p2++;
-                       else
-                               p1++;
                 }
-               else
-                       fn++;
         } else
                 m1 = m2 = m3 = "";
  
@@ -1224,3 +1220,110 @@ void *_realloc_array(void *ptr, unsigned int size, unsigned long num)
                 return malloc(size * num);
         return realloc(ptr, size * num);
  }
+
+/* Take a filename and filename length and return the most significant
+ * filename suffix we can find.  This ignores suffixes such as "~",
+ * ".bak", ".orig", ".~1~", etc. */
+const char *find_filename_suffix(const char *fn, int fn_len, int *len_ptr)
+{
+       const char *suf, *s;
+       BOOL had_tilde;
+       int s_len;
+
+       /* One or more dots at the start aren't a suffix. */
+       while (fn_len && *fn == '.') fn++, fn_len--;
+
+       /* Ignore the ~ in a "foo~" filename. */
+       if (fn_len > 1 && fn[fn_len-1] == '~')
+               fn_len--, had_tilde = True;
+       else
+               had_tilde = False;
+
+       /* Assume we don't find an suffix. */
+       suf = "";
+       *len_ptr = 0;
+
+       /* Find the last significant suffix. */
+       for (s = fn + fn_len; fn_len > 1; ) {
+               while (*--s != '.' && s != fn) {}
+               if (s == fn)
+                       break;
+               s_len = fn_len - (s - fn);
+               fn_len = s - fn;
+               if (s_len == 3) {
+                       if (strcmp(s+1, "bak") == 0
+                        || strcmp(s+1, "old") == 0)
+                               continue;
+               } else if (s_len == 4) {
+                       if (strcmp(s+1, "orig") == 0)
+                               continue;
+               } else if (s_len > 2 && had_tilde
+                   && s[1] == '~' && isdigit(*(uchar*)(s+2)))
+                       continue;
+               *len_ptr = s_len;
+               suf = s;
+               if (s_len == 1)
+                       break;
+               /* Determine if the suffix is all digits. */
+               for (s++, s_len--; s_len > 0; s++, s_len--) {
+                       if (!isdigit(*(uchar*)s))
+                               return suf;
+               }
+               /* An all-digit suffix may not be that signficant. */
+               s = suf;
+       }
+
+       return suf;
+}
+
+/* This is an implementation of the Levenshtein distance algorithm.  It
+ * was implemented to avoid needing a two-dimensional matrix (to save
+ * memory).  It was also tweaked to try to factor in the ASCII distance
+ * between changed characters as a minor distance quantity.  The normal
+ * Levenshtein units of distance (each signifying a single change between
+ * the two strings) are defined as a "UNIT". */
+
+#define UNIT (1 << 16)
+
+uint32 fuzzy_distance(const char *s1, int len1, const char *s2, int len2)
+{
+       uint32 a[MAXPATHLEN], diag, above, left, diag_inc, above_inc, left_inc;
+       int32 cost;
+       int i1, i2;
+
+       if (!len1 || !len2) {
+               if (!len1) {
+                       s1 = s2;
+                       len1 = len2;
+               }
+               for (i1 = 0, cost = 0; i1 < len1; i1++)
+                       cost += s1[i1];
+               return (int32)len1 * UNIT + cost;
+       }
+
+       for (i2 = 0; i2 < len2; i2++)
+               a[i2] = (i2+1) * UNIT;
+
+       for (i1 = 0; i1 < len1; i1++) {
+               diag = i1 * UNIT;
+               above = (i1+1) * UNIT;
+               for (i2 = 0; i2 < len2; i2++) {
+                       left = a[i2];
+                       if ((cost = *((uchar*)s1+i1) - *((uchar*)s2+i2)) != 0) {
+                               if (cost < 0)
+                                       cost = UNIT - cost;
+                               else
+                                       cost = UNIT + cost;
+                       }
+                       diag_inc = diag + cost;
+                       left_inc = left + UNIT + *((uchar*)s1+i1);
+                       above_inc = above + UNIT + *((uchar*)s2+i2);
+                       a[i2] = above = left < above
+                             ? (left_inc < diag_inc ? left_inc : diag_inc)
+                             : (above_inc < diag_inc ? above_inc : diag_inc);
+                       diag = left;
+               }
+       }
+
+       return a[len2-1];
+}