-/* -*- c-file-style: "linux" -*-
+/*
+ * Utility routines used in rsync.
*
- * Copyright (C) 1996-2000 by Andrew Tridgell
- * Copyright (C) Paul Mackerras 1996
- * Copyright (C) 2001, 2002 by Martin Pool <mbp@samba.org>
+ * Copyright (C) 1996-2000 Andrew Tridgell
+ * Copyright (C) 1996 Paul Mackerras
+ * Copyright (C) 2001, 2002 Martin Pool <mbp@samba.org>
+ * Copyright (C) 2003, 2004, 2005, 2006 Wayne Davison
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
*/
-/**
- * @file
- *
- * Utilities used in rsync
- **/
-
#include "rsync.h"
extern int verbose;
extern int dry_run;
extern int module_id;
extern int modify_window;
+extern int relative_paths;
+extern int human_readable;
+extern unsigned int module_dirlen;
+extern mode_t orig_umask;
extern char *partial_dir;
-extern struct exclude_list_struct server_exclude_list;
+extern struct filter_list_struct server_filter_list;
int sanitize_paths = 0;
+char curr_dir[MAXPATHLEN];
+unsigned int curr_dir_len;
+int curr_dir_depth; /* This is only set for a sanitizing daemon. */
-
-/**
- * Set a fd into nonblocking mode
- **/
+/* Set a fd into nonblocking mode. */
void set_nonblocking(int fd)
{
int val;
}
}
-/**
- * Set a fd into blocking mode
- **/
+/* Set a fd into blocking mode. */
void set_blocking(int fd)
{
int val;
}
}
-
/**
* Create a file descriptor pair - like pipe() but use socketpair if
* possible (because of blocking issues on pipes).
{
int ret;
-#if HAVE_SOCKETPAIR
+#ifdef HAVE_SOCKETPAIR
ret = socketpair(AF_UNIX, SOCK_STREAM, 0, fd);
#else
ret = pipe(fd);
return ret;
}
-
void print_child_argv(char **cmd)
{
rprintf(FINFO, "opening connection using ");
rprintf(FINFO, "\n");
}
-
void out_of_memory(char *str)
{
rprintf(FERROR, "ERROR: out of memory in %s\n", str);
exit_cleanup(RERR_MALLOC);
}
-void overflow(char *str)
+void overflow_exit(char *str)
{
rprintf(FERROR, "ERROR: buffer overflow in %s\n", str);
exit_cleanup(RERR_MALLOC);
}
-
-
-int set_modtime(char *fname, time_t modtime)
+int set_modtime(char *fname, time_t modtime, mode_t mode)
{
- if (dry_run)
- return 0;
+#if !defined HAVE_LUTIMES || !defined HAVE_UTIMES
+ if (S_ISLNK(mode))
+ return 1;
+#endif
if (verbose > 2) {
rprintf(FINFO, "set modtime of %s to (%ld) %s",
asctime(localtime(&modtime)));
}
+ if (dry_run)
+ return 0;
+
{
-#ifdef HAVE_UTIMBUF
+#ifdef HAVE_UTIMES
+ struct timeval t[2];
+ t[0].tv_sec = time(NULL);
+ t[0].tv_usec = 0;
+ t[1].tv_sec = modtime;
+ t[1].tv_usec = 0;
+# ifdef HAVE_LUTIMES
+ if (S_ISLNK(mode))
+ return lutimes(fname, t);
+# endif
+ return utimes(fname, t);
+#elif defined HAVE_UTIMBUF
struct utimbuf tbuf;
tbuf.actime = time(NULL);
tbuf.modtime = modtime;
return utime(fname,&tbuf);
-#elif defined(HAVE_UTIME)
+#elif defined HAVE_UTIME
time_t t[2];
t[0] = time(NULL);
t[1] = modtime;
return utime(fname,t);
#else
- struct timeval t[2];
- t[0].tv_sec = time(NULL);
- t[0].tv_usec = 0;
- t[1].tv_sec = modtime;
- t[1].tv_usec = 0;
- return utimes(fname,t);
+#error No file-time-modification routine found!
#endif
}
}
+/* This creates a new directory with default permissions. Since there
+ * might be some directory-default permissions affecting this, we can't
+ * force the permissions directly using the original umask and mkdir(). */
+int mkdir_defmode(char *fname)
+{
+ int ret;
-/**
- Create any necessary directories in fname. Unfortunately we don't know
- what perms to give the directory when this is called so we need to rely
- on the umask
-**/
-int create_directory_path(char *fname, int base_umask)
+ umask(orig_umask);
+ ret = do_mkdir(fname, ACCESSPERMS);
+ umask(0);
+
+ return ret;
+}
+
+/* Create any necessary directories in fname. Any missing directories are
+ * created with default permissions. */
+int create_directory_path(char *fname)
{
char *p;
+ int ret = 0;
while (*fname == '/')
fname++;
while (strncmp(fname, "./", 2) == 0)
fname += 2;
+ umask(orig_umask);
p = fname;
while ((p = strchr(p,'/')) != NULL) {
- *p = 0;
- do_mkdir(fname, 0777 & ~base_umask);
- *p = '/';
- p++;
+ *p = '\0';
+ if (do_mkdir(fname, ACCESSPERMS) < 0 && errno != EEXIST)
+ ret = -1;
+ *p++ = '/';
}
- return 0;
-}
+ umask(0);
+ return ret;
+}
/**
* Write @p len bytes at @p ptr to descriptor @p desc, retrying if
*
* Derived from GNU C's cccp.c.
*/
-static int full_write(int desc, char *ptr, size_t len)
+int full_write(int desc, char *ptr, size_t len)
{
int total_written;
return total_written;
}
-
/**
* Read @p len bytes at @p ptr from descriptor @p desc, retrying if
* interrupted.
return n_chars;
}
-
/** Copy a file.
*
- * This is used in conjunction with the --temp-dir option */
-int copy_file(char *source, char *dest, mode_t mode)
+ * This is used in conjunction with the --temp-dir, --backup, and
+ * --copy-dest options. */
+int copy_file(const char *source, const char *dest, mode_t mode)
{
int ifd;
int ofd;
* --delete trying to remove old .rsyncNNN files, hence it renames it
* each time.
**/
-int robust_unlink(char *fname)
+int robust_unlink(const char *fname)
{
#ifndef ETXTBSY
return do_unlink(fname);
}
/* Returns 0 on successful rename, 1 if we successfully copied the file
- * across filesystems, -2 if copy_file() failed, and -1 on other errors. */
-int robust_rename(char *from, char *to, int mode)
+ * across filesystems, -2 if copy_file() failed, and -1 on other errors.
+ * If partialptr is not NULL and we need to do a copy, copy the file into
+ * the active partial-dir instead of over the destination file. */
+int robust_rename(char *from, char *to, char *partialptr,
+ int mode)
{
int tries = 4;
break;
#endif
case EXDEV:
+ if (partialptr) {
+ if (!handle_partial_dir(partialptr,PDIR_CREATE))
+ return -1;
+ to = partialptr;
+ }
if (copy_file(from, to, mode) != 0)
return -2;
do_unlink(from);
return -1;
}
-
static pid_t all_pids[10];
static int num_pids;
}
}
-
/** Turn a user name into a uid */
int name_to_uid(char *name, uid_t *uid)
{
return 0;
}
-
/** Lock a byte range in a open file */
int lock_range(int fd, int offset, int len)
{
return fcntl(fd,F_SETLK,&lock) == 0;
}
-static int exclude_server_path(char *arg)
+static int filter_server_path(char *arg)
{
char *s;
- if (server_exclude_list.head) {
+ if (server_filter_list.head) {
for (s = arg; (s = strchr(s, '/')) != NULL; ) {
*s = '\0';
- if (check_exclude(&server_exclude_list, arg, 1) < 0) {
+ if (check_filter(&server_filter_list, arg, 1) < 0) {
/* We must leave arg truncated! */
return 1;
}
char **argv = *argv_ptr;
int argc = *argc_ptr;
int maxargs = *maxargs_ptr;
-#if !(defined(HAVE_GLOB) && defined(HAVE_GLOB_H))
+#if !defined HAVE_GLOB || !defined HAVE_GLOB_H
if (argc == maxargs) {
maxargs += MAX_ARGS;
if (!(argv = realloc_array(argv, char *, maxargs)))
if (!*s)
s = ".";
s = argv[argc++] = strdup(s);
- exclude_server_path(s);
+ filter_server_path(s);
#else
glob_t globbuf;
- int i;
if (maxargs <= argc)
return;
s = ".";
if (sanitize_paths)
- s = sanitize_path(NULL, s, "", 0);
+ s = sanitize_path(NULL, s, "", 0, NULL);
else
s = strdup(s);
memset(&globbuf, 0, sizeof globbuf);
- if (!exclude_server_path(s))
+ if (!filter_server_path(s))
glob(s, 0, NULL, &globbuf);
if (MAX((int)globbuf.gl_pathc, 1) > maxargs - argc) {
maxargs += globbuf.gl_pathc + MAX_ARGS;
if (globbuf.gl_pathc == 0)
argv[argc++] = s;
else {
- int j = globbuf.gl_pathc;
+ int i;
free(s);
- for (i = 0; i < j; i++) {
+ for (i = 0; i < (int)globbuf.gl_pathc; i++) {
if (!(argv[argc++] = strdup(globbuf.gl_pathv[i])))
out_of_memory("glob_expand_one");
}
int cnt = 0, new_component = 1;
while (*p) {
if (*p++ == '/')
- new_component = 1;
+ new_component = (*p != '.' || (p[1] != '/' && p[1] != '\0'));
else if (new_component) {
new_component = 0;
cnt++;
return cnt;
}
-/* Turns multiple adjacent slashes into a single slash; gets rid of "./"
- * elements; collapses ".." elements except for those at the start of the
- * string; removes a trailing slash. If the resulting name would be empty,
- * change it into a ".". */
-unsigned int clean_fname(char *name)
+/* Turns multiple adjacent slashes into a single slash, gets rid of "./"
+ * elements (but not a trailing dot dir), removes a trailing slash, and
+ * optionally collapses ".." elements (except for those at the start of the
+ * string). If the resulting name would be empty, change it into a ".". */
+unsigned int clean_fname(char *name, BOOL collapse_dot_dot)
{
char *limit = name - 1, *t = name, *f = name;
int anchored;
continue;
}
/* collapse ".." dirs */
- if (f[1] == '.' && (f[2] == '/' || !f[2])) {
+ if (collapse_dot_dot
+ && f[1] == '.' && (f[2] == '/' || !f[2])) {
char *s = t - 1;
if (s == name && anchored) {
f += 2;
f += 2;
continue;
}
- *t++ = *f++;
- *t++ = *f++;
- limit = t;
+ limit = t + 2;
}
}
while (*f && (*t++ = *f++) != '/') {}
* The rootdir string contains a value to use in place of a leading slash.
* Specify NULL to get the default of lp_path(module_id).
*
- * If depth is > 0, it is a count of how many '..'s to allow at the start
- * of the path.
+ * The depth var is a count of how many '..'s to allow at the start of the
+ * path. If symlink is set, combine its value with the "p" value to get
+ * the target path, and **return NULL if any '..'s try to escape**.
*
- * We call clean_fname() to clean up the path, but we preserve a trailing
- * slash because that is sometimes significant on command-line arguments.
- */
-char *sanitize_path(char *dest, const char *p, const char *rootdir, int depth)
+ * We also clean the path in a manner similar to clean_fname() but with a
+ * few differences:
+ *
+ * Turns multiple adjacent slashes into a single slash, gets rid of "." dir
+ * elements (INCLUDING a trailing dot dir), PRESERVES a trailing slash, and
+ * ALWAYS collapses ".." elements (except for those at the start of the
+ * string up to "depth" deep). If the resulting name would be empty,
+ * change it into a ".". */
+char *sanitize_path(char *dest, const char *p, const char *rootdir, int depth,
+ const char *symlink)
{
- char *start, *sanp;
- int rlen = 0;
+ char *start, *sanp, *save_dest = dest;
+ int rlen = 0, leave_one_dotdir = relative_paths;
+
+ if (symlink && *symlink == '/') {
+ p = symlink;
+ symlink = "";
+ }
if (dest != p) {
int plen = strlen(p);
}
start = sanp = dest + rlen;
- while (*p != '\0') {
+ while (1) {
+ if (*p == '\0') {
+ if (!symlink || !*symlink)
+ break;
+ while (sanp != start && sanp[-1] != '/') {
+ /* strip last element */
+ sanp--;
+ }
+ /* Append a relative symlink */
+ p = symlink;
+ symlink = "";
+ }
/* discard leading or extra slashes */
if (*p == '/') {
p++;
* always be left pointing after a slash
*/
if (*p == '.' && (p[1] == '/' || p[1] == '\0')) {
- /* skip "." component */
- p++;
- continue;
+ if (leave_one_dotdir && p[1])
+ leave_one_dotdir = 0;
+ else {
+ /* skip "." component */
+ p++;
+ continue;
+ }
}
if (*p == '.' && p[1] == '.' && (p[2] == '/' || p[2] == '\0')) {
/* ".." component followed by slash or end */
- if (depth > 0 && sanp == start) {
- /* allow depth levels of .. at the beginning */
- --depth;
- *sanp++ = *p++;
- *sanp++ = *p++;
- /* move virtual beginning to leave .. alone */
- start = sanp;
- continue;
- }
- p += 2;
- if (sanp != start) {
- /* back up sanp one level */
- --sanp; /* now pointing at slash */
- while (sanp > start && sanp[-1] != '/') {
- /* skip back up to slash */
- sanp--;
+ if (depth <= 0 || sanp != start) {
+ if (symlink && sanp == start) {
+ if (!save_dest)
+ free(dest);
+ return NULL;
+ }
+ p += 2;
+ if (sanp != start) {
+ /* back up sanp one level */
+ --sanp; /* now pointing at slash */
+ while (sanp > start && sanp[-1] != '/') {
+ /* skip back up to slash */
+ sanp--;
+ }
}
+ continue;
}
- continue;
+ /* allow depth levels of .. at the beginning */
+ depth--;
+ /* move the virtual beginning to leave the .. alone */
+ start = sanp + 3;
}
/* copy one component through next slash */
while (*p && (*sanp++ = *p++) != '/') {}
return dest;
}
-char curr_dir[MAXPATHLEN];
-unsigned int curr_dir_len;
+/* If sanitize_paths is not set, this works exactly the same as do_stat().
+ * Otherwise, we verify that no symlink takes us outside the module path.
+ * If we encounter an escape attempt, we return a symlink's stat info! */
+int safe_stat(const char *fname, STRUCT_STAT *stp)
+{
+#ifdef SUPPORT_LINKS
+ char tmpbuf[MAXPATHLEN], linkbuf[MAXPATHLEN], *mod_path;
+ int i, llen, mod_path_len;
-/**
- * Like chdir(), but it keeps track of the current directory (in the
+ if (!sanitize_paths)
+ return do_stat(fname, stp);
+
+ mod_path = lp_path(module_id);
+ mod_path_len = strlen(mod_path);
+
+ for (i = 0; i < 16; i++) {
+#ifdef DEBUG
+ if (*fname == '/')
+ assert(strncmp(fname, mod_path, mod_path_len) == 0 && fname[mod_path_len] == '/');
+#endif
+ if (do_lstat(fname, stp) < 0)
+ return -1;
+ if (!S_ISLNK(stp->st_mode))
+ return 0;
+ if ((llen = readlink(fname, linkbuf, sizeof linkbuf - 1)) < 0)
+ return -1;
+ linkbuf[llen] = '\0';
+ if (*fname == '/')
+ fname += mod_path_len;
+ if (!(fname = sanitize_path(tmpbuf, fname, mod_path, curr_dir_depth, linkbuf)))
+ break;
+ }
+
+ return 0; /* Leave *stp set to the last symlink. */
+#else
+ return do_stat(fname, stp);
+#endif
+}
+
+void die_on_unsafe_path(char *path, int strip_filename)
+{
+#ifdef SUPPORT_LINKS
+ char *final_slash, *p;
+ STRUCT_STAT st;
+
+ if (!path)
+ return;
+ if (strip_filename) {
+ if (!(final_slash = strrchr(path, '/')))
+ return;
+ *final_slash = '\0';
+ } else
+ final_slash = NULL;
+
+ p = path;
+ if (*p == '/')
+ p += module_dirlen + 1;
+ while (*p) {
+ if ((p = strchr(p, '/')) != NULL)
+ *p = '\0';
+ if (safe_stat(path, &st) < 0) {
+ *p++ = '/';
+ goto done;
+ }
+ if (S_ISLNK(st.st_mode)) {
+ rprintf(FERROR, "Unsafe path: %s\n", path);
+ exit_cleanup(RERR_SYNTAX);
+ }
+ if (!p)
+ break;
+ *p++ = '/';
+ }
+
+ done:
+ if (final_slash)
+ *final_slash = '/';
+#endif
+}
+
+/* Like chdir(), but it keeps track of the current directory (in the
* global "curr_dir"), and ensures that the path size doesn't overflow.
- * Also cleans the path using the clean_fname() function.
- **/
+ * Also cleans the path using the clean_fname() function. */
int push_dir(char *dir)
{
static int initialised;
curr_dir_len += len;
}
- curr_dir_len = clean_fname(curr_dir);
+ curr_dir_len = clean_fname(curr_dir, 1);
+ if (sanitize_paths) {
+ if (module_dirlen > curr_dir_len)
+ module_dirlen = curr_dir_len;
+ curr_dir_depth = count_dir_elements(curr_dir + module_dirlen);
+ }
return 1;
}
curr_dir_len = strlcpy(curr_dir, dir, sizeof curr_dir);
if (curr_dir_len >= sizeof curr_dir)
curr_dir_len = sizeof curr_dir - 1;
+ if (sanitize_paths)
+ curr_dir_depth = count_dir_elements(curr_dir + module_dirlen);
return 1;
}
-/**
- * Return the filename, turning any newlines into '?'s. This ensures that
- * outputting it on a line of its own cannot generate an empty line. This
- * function can handle only 2 names at a time!
- **/
-const char *safe_fname(const char *fname)
-{
- static char fbuf1[MAXPATHLEN], fbuf2[MAXPATHLEN];
- static char *fbuf = fbuf2;
- char *nl = strchr(fname, '\n');
-
- if (!nl)
- return fname;
-
- fbuf = fbuf == fbuf1 ? fbuf2 : fbuf1;
- strlcpy(fbuf, fname, MAXPATHLEN);
- nl = fbuf + (nl - (char *)fname);
- do {
- *nl = '?';
- } while ((nl = strchr(nl+1, '\n')) != NULL);
-
- return fbuf;
-}
-
/**
* Return a quoted string with the full pathname of the indicated filename.
* The string " (in MODNAME)" may also be appended. The returned pointer
if (result)
free(result);
- fn = safe_fname(fn);
if (*fn == '/')
p1 = p2 = "";
else {
p1 = curr_dir;
- p2 = "/";
+ for (p2 = p1; *p2 == '/'; p2++) {}
+ if (*p2)
+ p2 = "/";
}
if (module_id >= 0) {
m1 = " (in ";
m2 = lp_name(module_id);
m3 = ")";
- if (*p1) {
+ if (p1 == curr_dir) {
if (!lp_use_chroot(module_id)) {
char *p = lp_path(module_id);
if (*p != '/' || p[1])
p1 += strlen(p);
}
- if (!*p1)
- p2++;
- else
- p1++;
}
- else
- fn++;
} else
m1 = m2 = m3 = "";
fn = fname;
if ((int)pathjoin(t, sz, partial_dir, fn) >= sz)
return NULL;
- if (server_exclude_list.head
- && check_exclude(&server_exclude_list, partial_fname, 0) < 0)
- return NULL;
+ if (server_filter_list.head) {
+ t = strrchr(partial_fname, '/');
+ *t = '\0';
+ if (check_filter(&server_filter_list, partial_fname, 1) < 0)
+ return NULL;
+ *t = '/';
+ if (check_filter(&server_filter_list, partial_fname, 0) < 0)
+ return NULL;
+ }
return partial_fname;
}
dir = partial_fname;
if (create) {
STRUCT_STAT st;
-#if SUPPORT_LINKS
int statret = do_lstat(dir, &st);
-#else
- int statret = do_stat(dir, &st);
-#endif
+ if (sanitize_paths && *partial_dir != '/')
+ die_on_unsafe_path(dir, 1); /* lstat handles last element */
if (statret == 0 && !S_ISDIR(st.st_mode)) {
if (do_unlink(dir) < 0)
return 0;
return 1;
}
-/** We need to supply our own strcmp function for file list comparisons
- to ensure that signed/unsigned usage is consistent between machines. */
-int u_strcmp(const char *cs1, const char *cs2)
-{
- const uchar *s1 = (const uchar *)cs1;
- const uchar *s2 = (const uchar *)cs2;
-
- while (*s1 && *s2 && (*s1 == *s2)) {
- s1++; s2++;
- }
-
- return (int)*s1 - (int)*s2;
-}
-
-
-
/**
* Determine if a symlink points outside the current directory tree.
* This is considered "unsafe" because e.g. when mirroring somebody
return (depth < 0);
}
+/* Return the int64 number as a string. If the --human-readable option was
+ * specified, we may output the number in K, M, or G units. We can return
+ * up to 4 buffers at a time. */
+char *human_num(int64 num)
+{
+ static char bufs[4][128]; /* more than enough room */
+ static unsigned int n;
+ char *s;
+
+ n = (n + 1) % (sizeof bufs / sizeof bufs[0]);
+
+ if (human_readable) {
+ char units = '\0';
+ int mult = human_readable == 1 ? 1000 : 1024;
+ double dnum = 0;
+ if (num > mult*mult*mult) {
+ dnum = (double)num / (mult*mult*mult);
+ units = 'G';
+ } else if (num > mult*mult) {
+ dnum = (double)num / (mult*mult);
+ units = 'M';
+ } else if (num > mult) {
+ dnum = (double)num / mult;
+ units = 'K';
+ }
+ if (units) {
+ sprintf(bufs[n], "%.2f%c", dnum, units);
+ return bufs[n];
+ }
+ }
+
+ s = bufs[n] + sizeof bufs[0] - 1;
+ *s = '\0';
+
+ if (!num)
+ *--s = '0';
+ while (num) {
+ *--s = (num % 10) + '0';
+ num /= 10;
+ }
+ return s;
+}
+
+/* Return the double number as a string. If the --human-readable option was
+ * specified, we may output the number in K, M, or G units. We use a buffer
+ * from human_num() to return our result. */
+char *human_dnum(double dnum, int decimal_digits)
+{
+ char *buf = human_num(dnum);
+ int len = strlen(buf);
+ if (isdigit(*(uchar*)(buf+len-1))) {
+ /* There's extra room in buf prior to the start of the num. */
+ buf -= decimal_digits + 1;
+ snprintf(buf, len + decimal_digits + 2, "%.*f", decimal_digits, dnum);
+ }
+ return buf;
+}
/**
* Return the date and time as a string
{
static char TimeBuf[200];
struct tm *tm = localtime(&t);
+ char *p;
#ifdef HAVE_STRFTIME
strftime(TimeBuf, sizeof TimeBuf - 1, "%Y/%m/%d %H:%M:%S", tm);
strlcpy(TimeBuf, asctime(tm), sizeof TimeBuf);
#endif
- if (TimeBuf[strlen(TimeBuf)-1] == '\n') {
- TimeBuf[strlen(TimeBuf)-1] = 0;
- }
+ if ((p = strchr(TimeBuf, '\n')) != NULL)
+ *p = '\0';
- return(TimeBuf);
+ return TimeBuf;
}
-
/**
* Sleep for a specified number of milliseconds.
*
return True;
}
-
-/**
- * Determine if two file modification times are equivalent (either
- * exact or in the modification timestamp window established by
- * --modify-window).
+/* Determine if two time_t values are equivalent (either exact, or in
+ * the modification timestamp window established by --modify-window).
*
* @retval 0 if the times should be treated as the same
*
*
* @retval -1 if the 2nd is later
**/
-int cmp_modtime(time_t file1, time_t file2)
+int cmp_time(time_t file1, time_t file2)
{
if (file2 > file1) {
if (file2 - file1 <= modify_window)
}
#endif
-
#define MALLOC_MAX 0x40000000
void *_new_array(unsigned int size, unsigned long num)
return malloc(size * num);
return realloc(ptr, size * num);
}
+
+/* Take a filename and filename length and return the most significant
+ * filename suffix we can find. This ignores suffixes such as "~",
+ * ".bak", ".orig", ".~1~", etc. */
+const char *find_filename_suffix(const char *fn, int fn_len, int *len_ptr)
+{
+ const char *suf, *s;
+ BOOL had_tilde;
+ int s_len;
+
+ /* One or more dots at the start aren't a suffix. */
+ while (fn_len && *fn == '.') fn++, fn_len--;
+
+ /* Ignore the ~ in a "foo~" filename. */
+ if (fn_len > 1 && fn[fn_len-1] == '~')
+ fn_len--, had_tilde = True;
+ else
+ had_tilde = False;
+
+ /* Assume we don't find an suffix. */
+ suf = "";
+ *len_ptr = 0;
+
+ /* Find the last significant suffix. */
+ for (s = fn + fn_len; fn_len > 1; ) {
+ while (*--s != '.' && s != fn) {}
+ if (s == fn)
+ break;
+ s_len = fn_len - (s - fn);
+ fn_len = s - fn;
+ if (s_len == 4) {
+ if (strcmp(s+1, "bak") == 0
+ || strcmp(s+1, "old") == 0)
+ continue;
+ } else if (s_len == 5) {
+ if (strcmp(s+1, "orig") == 0)
+ continue;
+ } else if (s_len > 2 && had_tilde
+ && s[1] == '~' && isdigit(*(uchar*)(s+2)))
+ continue;
+ *len_ptr = s_len;
+ suf = s;
+ if (s_len == 1)
+ break;
+ /* Determine if the suffix is all digits. */
+ for (s++, s_len--; s_len > 0; s++, s_len--) {
+ if (!isdigit(*(uchar*)s))
+ return suf;
+ }
+ /* An all-digit suffix may not be that signficant. */
+ s = suf;
+ }
+
+ return suf;
+}
+
+/* This is an implementation of the Levenshtein distance algorithm. It
+ * was implemented to avoid needing a two-dimensional matrix (to save
+ * memory). It was also tweaked to try to factor in the ASCII distance
+ * between changed characters as a minor distance quantity. The normal
+ * Levenshtein units of distance (each signifying a single change between
+ * the two strings) are defined as a "UNIT". */
+
+#define UNIT (1 << 16)
+
+uint32 fuzzy_distance(const char *s1, int len1, const char *s2, int len2)
+{
+ uint32 a[MAXPATHLEN], diag, above, left, diag_inc, above_inc, left_inc;
+ int32 cost;
+ int i1, i2;
+
+ if (!len1 || !len2) {
+ if (!len1) {
+ s1 = s2;
+ len1 = len2;
+ }
+ for (i1 = 0, cost = 0; i1 < len1; i1++)
+ cost += s1[i1];
+ return (int32)len1 * UNIT + cost;
+ }
+
+ for (i2 = 0; i2 < len2; i2++)
+ a[i2] = (i2+1) * UNIT;
+
+ for (i1 = 0; i1 < len1; i1++) {
+ diag = i1 * UNIT;
+ above = (i1+1) * UNIT;
+ for (i2 = 0; i2 < len2; i2++) {
+ left = a[i2];
+ if ((cost = *((uchar*)s1+i1) - *((uchar*)s2+i2)) != 0) {
+ if (cost < 0)
+ cost = UNIT - cost;
+ else
+ cost = UNIT + cost;
+ }
+ diag_inc = diag + cost;
+ left_inc = left + UNIT + *((uchar*)s1+i1);
+ above_inc = above + UNIT + *((uchar*)s2+i2);
+ a[i2] = above = left < above
+ ? (left_inc < diag_inc ? left_inc : diag_inc)
+ : (above_inc < diag_inc ? above_inc : diag_inc);
+ diag = left;
+ }
+ }
+
+ return a[len2-1];
+}
+
+#define BB_SLOT_SIZE (16*1024) /* Desired size in bytes */
+#define BB_PER_SLOT_BITS (BB_SLOT_SIZE * 8) /* Number of bits per slot */
+#define BB_PER_SLOT_INTS (BB_SLOT_SIZE / 4) /* Number of int32s per slot */
+
+struct bitbag {
+ uint32 **bits;
+ int slot_cnt;
+};
+
+struct bitbag *bitbag_create(int max_ndx)
+{
+ struct bitbag *bb = new(struct bitbag);
+ bb->slot_cnt = (max_ndx + BB_PER_SLOT_BITS - 1) / BB_PER_SLOT_BITS;
+
+ if (!(bb->bits = (uint32**)calloc(bb->slot_cnt, sizeof (uint32*))))
+ out_of_memory("bitbag_create");
+
+ return bb;
+}
+
+void bitbag_set_bit(struct bitbag *bb, int ndx)
+{
+ int slot = ndx / BB_PER_SLOT_BITS;
+ ndx %= BB_PER_SLOT_BITS;
+
+ if (!bb->bits[slot]) {
+ if (!(bb->bits[slot] = (uint32*)calloc(BB_PER_SLOT_INTS, 4)))
+ out_of_memory("bitbag_set_bit");
+ }
+
+ bb->bits[slot][ndx/32] |= 1u << (ndx % 32);
+}
+
+#if 0 /* not needed yet */
+void bitbag_clear_bit(struct bitbag *bb, int ndx)
+{
+ int slot = ndx / BB_PER_SLOT_BITS;
+ ndx %= BB_PER_SLOT_BITS;
+
+ if (!bb->bits[slot])
+ return;
+
+ bb->bits[slot][ndx/32] &= ~(1u << (ndx % 32));
+}
+
+int bitbag_check_bit(struct bitbag *bb, int ndx)
+{
+ int slot = ndx / BB_PER_SLOT_BITS;
+ ndx %= BB_PER_SLOT_BITS;
+
+ if (!bb->bits[slot])
+ return 0;
+
+ return bb->bits[slot][ndx/32] & (1u << (ndx % 32)) ? 1 : 0;
+}
+#endif
+
+/* Call this with -1 to start checking from 0. Returns -1 at the end. */
+int bitbag_next_bit(struct bitbag *bb, int after)
+{
+ uint32 bits, mask;
+ int i, ndx = after + 1;
+ int slot = ndx / BB_PER_SLOT_BITS;
+ ndx %= BB_PER_SLOT_BITS;
+
+ mask = (1u << (ndx % 32)) - 1;
+ for (i = ndx / 32; slot < bb->slot_cnt; slot++, i = mask = 0) {
+ if (!bb->bits[slot])
+ continue;
+ for ( ; i < BB_PER_SLOT_INTS; i++, mask = 0) {
+ if (!(bits = bb->bits[slot][i] & ~mask))
+ continue;
+ /* The xor magic figures out the lowest enabled bit in
+ * bits, and the switch quickly computes log2(bit). */
+ switch (bits ^ (bits & (bits-1))) {
+#define LOG2(n) case 1u << n: return slot*BB_PER_SLOT_BITS + i*32 + n
+ LOG2(0); LOG2(1); LOG2(2); LOG2(3);
+ LOG2(4); LOG2(5); LOG2(6); LOG2(7);
+ LOG2(8); LOG2(9); LOG2(10); LOG2(11);
+ LOG2(12); LOG2(13); LOG2(14); LOG2(15);
+ LOG2(16); LOG2(17); LOG2(18); LOG2(19);
+ LOG2(20); LOG2(21); LOG2(22); LOG2(23);
+ LOG2(24); LOG2(25); LOG2(26); LOG2(27);
+ LOG2(28); LOG2(29); LOG2(30); LOG2(31);
+ }
+ return -1; /* impossible... */
+ }
+ }
+
+ return -1;
+}