X-Git-Url: https://mattmccutchen.net/rsync/rsync-patches.git/blobdiff_plain/ff55cce0a8b6abecf86ffd55a2eb3bf97a79dd7e..8e65e95897d45f2f9912349580c977c6e0a5cfc7:/link-by-hash.diff diff --git a/link-by-hash.diff b/link-by-hash.diff index 946acc0..9532c5b 100644 --- a/link-by-hash.diff +++ b/link-by-hash.diff @@ -1,30 +1,50 @@ -After applying this patch and running configure, you MUST run this -command before "make": - - make proto - -Jason M. Felice writes: +Jason M. Felice wrote: This patch adds the --link-by-hash=DIR option, which hard links received files in a link farm arranged by MD4 file hash. The result is that the system will only store one copy of the unique contents of each file, regardless of the file's name. +To use this patch, run these commands for a successful build: + + patch -p1 0xFFFFFFFFu && S_ISREG(mode)) +--- old/hashlink.c ++++ new/hashlink.c +@@ -0,0 +1,336 @@ +/* + Copyright (C) Cronosys, LLC 2004 + @@ -49,16 +69,14 @@ the file's name. + +extern char *link_by_hash_dir; + -+#if HAVE_LINK ++#ifdef HAVE_LINK + -+char* make_hash_name(struct file_struct *file) ++char *make_hash_name(struct file_struct *file) +{ + char hash[33], *dst; -+ unsigned char *src; -+ unsigned char c; ++ uchar c, *src = (uchar*)F_SUM(file); + int i; + -+ src = (unsigned char*)file->u.sum; + for (dst = hash, i = 0; i < 4; i++, src++) { + c = *src >> 4; + *(dst++) = (c >= 10) ? (c - 10 + 'a') : (c + '0'); @@ -254,9 +272,8 @@ the file's name. + char *linkname; + long last_fnbr; + -+ if (file->length == 0) { -+ return robust_rename(fnametmp,fname,0644); -+ } ++ if (F_LENGTH(file) == 0) ++ return robust_rename(fnametmp, fname, NULL, 0644); + + if (do_stat(hashname, &st) == -1) { + char *dirname; @@ -268,14 +285,14 @@ the file's name. + rsyserr(FERROR, errno, "mkdir failed: %s", dirname); + free(hashname); + free(dirname); -+ return robust_rename(fnametmp,fname,0644); ++ return robust_rename(fnametmp, fname, NULL, 0644); + } + free(dirname); + + if (do_mkdir(hashname, 0755) == -1 && errno != EEXIST) { + rsyserr(FERROR, errno, "mkdir failed: %s", hashname); + free(hashname); -+ return robust_rename(fnametmp,fname,0644); ++ return robust_rename(fnametmp, fname, NULL, 0644); + } + + first = 1; @@ -336,7 +353,7 @@ the file's name. + } else { + rsyserr(FERROR, errno, "link \"%s\" -> \"%s\"", + linkname, full_fname(fname)); -+ rc = robust_rename(fnametmp,fname,0644); ++ rc = robust_rename(fnametmp, fname, NULL, 0644); + } + } else { + do_unlink(fnametmp); @@ -347,7 +364,7 @@ the file's name. + rprintf(FINFO, "link-by-hash (new): %s -> \"%s\"\n", + full_fname(fname),linkname); + -+ rc = robust_rename(fnametmp,fname,0644); ++ rc = robust_rename(fnametmp, fname, NULL, 0644); + if (rc != 0) { + rsyserr(FERROR, errno, "rename \"%s\" -> \"%s\"", + full_fname(fnametmp), full_fname(fname)); @@ -363,52 +380,51 @@ the file's name. + free(hashname); + return rc; +} -+ +#endif ---- orig/options.c 2005-08-27 21:11:26 -+++ options.c 2005-05-19 08:55:42 -@@ -141,6 +141,7 @@ char *log_format = NULL; - char *password_file = NULL; - char *rsync_path = RSYNC_PATH; - char *backup_dir = NULL; +--- old/options.c ++++ new/options.c +@@ -153,6 +153,7 @@ char *backup_suffix = NULL; + char *tmpdir = NULL; + char *partial_dir = NULL; + char *basis_dir[MAX_BASIS_DIRS+1]; +char *link_by_hash_dir = NULL; - char backup_dir_buf[MAXPATHLEN]; - int rsync_port = 0; - int compare_dest = 0; -@@ -322,6 +323,7 @@ void usage(enum logcode F) + char *config_file = NULL; + char *shell_cmd = NULL; + char *logfile_name = NULL; +@@ -384,6 +385,7 @@ void usage(enum logcode F) rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n"); rprintf(F," --copy-dest=DIR ... and include copies of unchanged files\n"); rprintf(F," --link-dest=DIR hardlink to files in DIR when unchanged\n"); + rprintf(F," --link-by-hash=DIR create hardlinks by hash into DIR\n"); rprintf(F," -z, --compress compress file data during the transfer\n"); - rprintf(F," -C, --cvs-exclude auto-ignore files the same way CVS does\n"); - rprintf(F," -f, --filter=RULE add a file-filtering RULE\n"); -@@ -362,7 +364,7 @@ void usage(enum logcode F) - - enum {OPT_VERSION = 1000, OPT_DAEMON, OPT_SENDER, OPT_EXCLUDE, OPT_EXCLUDE_FROM, - OPT_FILTER, OPT_COMPARE_DEST, OPT_COPY_DEST, OPT_LINK_DEST, -- OPT_INCLUDE, OPT_INCLUDE_FROM, OPT_MODIFY_WINDOW, -+ OPT_INCLUDE, OPT_INCLUDE_FROM, OPT_MODIFY_WINDOW, OPT_LINK_BY_HASH, + rprintf(F," --compress-level=NUM explicitly set compression level\n"); + rprintf(F," --skip-compress=LIST skip compressing files with a suffix in LIST\n"); +@@ -435,7 +437,7 @@ enum {OPT_VERSION = 1000, OPT_DAEMON, OP + OPT_FILTER, OPT_COMPARE_DEST, OPT_COPY_DEST, OPT_LINK_DEST, OPT_HELP, + OPT_INCLUDE, OPT_INCLUDE_FROM, OPT_MODIFY_WINDOW, OPT_MIN_SIZE, OPT_CHMOD, OPT_READ_BATCH, OPT_WRITE_BATCH, OPT_ONLY_WRITE_BATCH, OPT_MAX_SIZE, - OPT_REFUSED_BASE = 9000}; +- OPT_NO_D, OPT_APPEND, ++ OPT_NO_D, OPT_APPEND, OPT_LINK_BY_HASH, + OPT_SERVER, OPT_REFUSED_BASE = 9000}; -@@ -446,6 +448,7 @@ static struct poptOption long_options[] + static struct poptOption long_options[] = { +@@ -554,6 +556,7 @@ static struct poptOption long_options[] {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 }, {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 }, {"link-dest", 0, POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 }, + {"link-by-hash", 0, POPT_ARG_STRING, 0, OPT_LINK_BY_HASH, 0, 0}, {"fuzzy", 'y', POPT_ARG_NONE, &fuzzy_basis, 0, 0, 0 }, - {"compress", 'z', POPT_ARG_NONE, &do_compression, 0, 0, 0 }, - {0, 'P', POPT_ARG_NONE, 0, 'P', 0, 0 }, -@@ -916,6 +919,21 @@ int parse_arguments(int *argc, const cha - basis_dir[basis_dir_cnt++] = (char *)arg; - break; + {"compress", 'z', POPT_ARG_NONE, 0, 'z', 0, 0 }, + {"no-compress", 0, POPT_ARG_VAL, &do_compression, 0, 0, 0 }, +@@ -1205,6 +1208,21 @@ int parse_arguments(int *argc, const cha + return 0; + #endif + case OPT_LINK_BY_HASH: -+#if HAVE_LINK ++#ifdef HAVE_LINK + arg = poptGetOptArg(pc); + if (sanitize_paths) -+ arg = sanitize_path(NULL, arg, NULL, 0); ++ arg = sanitize_path(NULL, arg, NULL, 0, NULL); + link_by_hash_dir = (char *)arg; + break; +#else @@ -422,9 +438,9 @@ the file's name. default: /* A large opt value means that set_refuse_options() * turned this option off. */ -@@ -1507,6 +1525,11 @@ void server_options(char **args,int *arg - } - } +@@ -1929,6 +1947,11 @@ void server_options(char **args,int *arg + } else if (inplace) + args[ac++] = "--inplace"; + if (link_by_hash_dir && am_sender) { + args[ac++] = "--link-by-hash"; @@ -434,32 +450,25 @@ the file's name. if (files_from && (!am_sender || filesfrom_host)) { if (filesfrom_host) { args[ac++] = "--files-from"; ---- orig/receiver.c 2005-08-17 06:45:08 -+++ receiver.c 2005-01-15 21:29:13 -@@ -53,6 +53,7 @@ extern int delay_updates; - extern struct stats stats; - extern char *log_format; - extern char *tmpdir; -+extern char *link_by_hash_dir; - extern char *partial_dir; - extern char *basis_dir[]; - extern struct file_list *the_file_list; -@@ -186,12 +187,13 @@ static int get_tmpname(char *fnametmp, c +--- old/receiver.c ++++ new/receiver.c +@@ -125,12 +125,14 @@ int get_tmpname(char *fnametmp, char *fn static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r, -- char *fname, int fd, OFF_T total_size) -+ char *fname, int fd, OFF_T total_size, char *md4) +- const char *fname, int fd, OFF_T total_size) ++ const char *fname, int fd, OFF_T total_size, ++ const char *md4) { - static char file_sum1[MD4_SUM_LENGTH]; - static char file_sum2[MD4_SUM_LENGTH]; + static char file_sum1[MAX_DIGEST_LEN]; + static char file_sum2[MAX_DIGEST_LEN]; struct map_struct *mapbuf; struct sum_struct sum; -+ struct mdfour mdfour_data; - int32 len; ++ md_context mdfour_data; + int32 len, sum_len; OFF_T offset = 0; OFF_T offset2; -@@ -211,6 +213,9 @@ static int receive_data(int f_in, char * +@@ -150,6 +152,9 @@ static int receive_data(int f_in, char * } else mapbuf = NULL; @@ -468,35 +477,35 @@ the file's name. + sum_init(checksum_seed); - if (append_mode) { -@@ -253,6 +258,8 @@ static int receive_data(int f_in, char * + if (append_mode > 0) { +@@ -194,6 +199,8 @@ static int receive_data(int f_in, char * cleanup_got_literal = 1; sum_update(data, i); + if (md4) -+ mdfour_update(&mdfour_data,data,i); ++ mdfour_update(&mdfour_data, (uchar*)data, i); if (fd != -1 && write_file(fd,data,i) != i) goto report_write_error; -@@ -279,6 +286,8 @@ static int receive_data(int f_in, char * +@@ -220,6 +227,8 @@ static int receive_data(int f_in, char * see_token(map, len); sum_update(map, len); + if (md4) -+ mdfour_update(&mdfour_data,map,len); ++ mdfour_update(&mdfour_data, (uchar*)map, len); } - if (inplace) { -@@ -319,6 +328,8 @@ static int receive_data(int f_in, char * + if (updating_basis) { +@@ -262,6 +271,8 @@ static int receive_data(int f_in, char * } - sum_end(file_sum1); + sum_len = sum_end(file_sum1); + if (md4) -+ mdfour_result(&mdfour_data, (unsigned char*)md4); ++ mdfour_result(&mdfour_data, (uchar*)md4); if (mapbuf) unmap_file(mapbuf); -@@ -334,7 +345,7 @@ static int receive_data(int f_in, char * +@@ -277,7 +288,7 @@ static int receive_data(int f_in, char * static void discard_receive_data(int f_in, OFF_T length) { @@ -504,48 +513,47 @@ the file's name. + receive_data(f_in, NULL, -1, 0, NULL, -1, length, NULL); } - static void handle_delayed_updates(struct file_list *flist, char *local_name) -@@ -663,8 +674,12 @@ int recv_files(int f_in, struct file_lis - rprintf(FINFO, "%s\n", safe_fname(fname)); + static void handle_delayed_updates(char *local_name) +@@ -665,7 +676,7 @@ int recv_files(int f_in, char *local_nam /* recv file data */ -+#if HAVE_LINK -+ if (link_by_hash_dir) -+ file->u.sum = new_array(char, MD4_SUM_LENGTH); -+#endif recv_ok = receive_data(f_in, fnamecmp, fd1, st.st_size, -- fname, fd2, file->length); -+ fname, fd2, file->length, file->u.sum); +- fname, fd2, F_LENGTH(file)); ++ fname, fd2, F_LENGTH(file), F_SUM(file)); + + log_item(log_code, file, &initial_stats, iflags, NULL); - if (!log_before_transfer) - log_item(file, &initial_stats, iflags, NULL); ---- orig/rsync.c 2005-07-27 23:31:12 -+++ rsync.c 2005-02-21 11:04:36 -@@ -38,6 +38,7 @@ extern int inplace; +--- old/rsync.c ++++ new/rsync.c +@@ -48,6 +48,7 @@ extern int inplace; + extern int flist_eof; extern int keep_dirlinks; extern int make_backups; - extern struct stats stats; +extern char *link_by_hash_dir; - - - /* -@@ -190,7 +191,12 @@ void finish_transfer(char *fname, char * - rprintf(FINFO, "renaming %s to %s\n", - safe_fname(fnametmp), safe_fname(fname)); - } -- ret = robust_rename(fnametmp, fname, file->mode & INITACCESSPERMS); -+#if HAVE_LINK + extern struct file_list *cur_flist, *first_flist, *dir_flist; + extern struct chmod_mode_struct *daemon_chmod_modes; + #ifdef ICONV_OPTION +@@ -458,8 +459,15 @@ void finish_transfer(const char *fname, + /* move tmp file over real file */ + if (verbose > 2) + rprintf(FINFO, "renaming %s to %s\n", fnametmp, fname); +- ret = robust_rename(fnametmp, fname, partialptr, +- file->mode & INITACCESSPERMS); ++#ifdef HAVE_LINK + if (link_by_hash_dir) + ret = link_by_hash(fnametmp, fname, file); + else +#endif -+ ret = robust_rename(fnametmp, fname, file->mode & INITACCESSPERMS); ++ { ++ ret = robust_rename(fnametmp, fname, partialptr, ++ file->mode & INITACCESSPERMS); ++ } if (ret < 0) { rsyserr(FERROR, errno, "%s %s -> \"%s\"", - ret == -2 ? "copy" : "rename", ---- orig/rsync.h 2005-08-17 06:45:08 -+++ rsync.h 2004-07-03 20:20:15 -@@ -639,6 +639,14 @@ struct stats { + ret == -2 ? "copy" : "rename", +--- old/rsync.h ++++ new/rsync.h +@@ -778,6 +778,14 @@ struct stats { int current_file_index; }; @@ -557,16 +565,16 @@ the file's name. + uint32 nlink; +}; + + struct chmod_mode_struct; - #include "byteorder.h" - #include "lib/mdfour.h" ---- orig/rsync.yo 2005-08-27 21:05:12 -+++ rsync.yo 2005-02-13 06:58:47 -@@ -356,6 +356,7 @@ to the detailed description below for a + #define EMPTY_ITEM_LIST {NULL, 0, 0} +--- old/rsync.yo ++++ new/rsync.yo +@@ -392,6 +392,7 @@ to the detailed description below for a --compare-dest=DIR also compare received files relative to DIR --copy-dest=DIR ... and include copies of unchanged files --link-dest=DIR hardlink to files in DIR when unchanged + --link-by-hash=DIR create hardlinks by hash into DIR -z, --compress compress file data during the transfer - -C, --cvs-exclude auto-ignore files in the same way CVS does - -f, --filter=RULE add a file-filtering RULE + --compress-level=NUM explicitly set compression level + --skip-compress=LIST skip compressing files with suffix in LIST