This patch adds the --link-by-hash=DIR option, which hard links received
files in a link farm arranged by MD4 file hash. The result is that the system
will only store one copy of the unique contents of each file, regardless of
the file's name.
This patch adds the --link-by-hash=DIR option, which hard links received
files in a link farm arranged by MD4 file hash. The result is that the system
will only store one copy of the unique contents of each file, regardless of
the file's name.
---- orig/Makefile.in 2004-11-02 16:47:15
-+++ Makefile.in 2004-07-03 20:20:15
-@@ -35,7 +35,7 @@ OBJS1=rsync.o generator.o receiver.o cle
- main.o checksum.o match.o syscall.o log.o backup.o
- OBJS2=options.o flist.o io.o compat.o hlink.o token.o uidlist.o socket.o \
- fileio.o batch.o clientname.o
+ patch -p1 <patches/link-by-hash.diff
+ ./prepare-source
+ ./configure
+ make
+
+diff --git a/Makefile.in b/Makefile.in
+--- a/Makefile.in
++++ b/Makefile.in
+@@ -36,7 +36,7 @@ OBJS1=flist.o rsync.o generator.o receiver.o cleanup.o sender.o exclude.o \
+ util.o main.o checksum.o match.o syscall.o log.o backup.o
+ OBJS2=options.o io.o compat.o hlink.o token.o uidlist.o socket.o hashtable.o \
+ fileio.o batch.o clientname.o chmod.o acls.o xattrs.o
-OBJS3=progress.o pipe.o
+OBJS3=progress.o pipe.o hashlink.o
DAEMON_OBJ = params.o loadparm.o clientserver.o access.o connection.o authenticate.o
popt_OBJS=popt/findme.o popt/popt.o popt/poptconfig.o \
popt/popthelp.o popt/poptparse.o
-OBJS3=progress.o pipe.o
+OBJS3=progress.o pipe.o hashlink.o
DAEMON_OBJ = params.o loadparm.o clientserver.o access.o connection.o authenticate.o
popt_OBJS=popt/findme.o popt/popt.o popt/poptconfig.o \
popt/popthelp.o popt/poptparse.o
---- orig/hashlink.c 2004-09-24 16:44:25
-+++ hashlink.c 2004-09-24 16:44:25
-@@ -0,0 +1,340 @@
+diff --git a/flist.c b/flist.c
+--- a/flist.c
++++ b/flist.c
+@@ -70,6 +70,7 @@ extern int need_unsorted_flist;
+ extern int unsort_ndx;
+ extern struct stats stats;
+ extern char *filesfrom_host;
++extern char *link_by_hash_dir;
+
+ extern char curr_dir[MAXPATHLEN];
+
+@@ -824,7 +825,7 @@ static struct file_struct *recv_file_entry(struct file_list *flist,
+ extra_len += (S_ISDIR(mode) ? 2 : 1) * EXTRA_LEN;
+ #endif
+
+- if (always_checksum && S_ISREG(mode))
++ if ((always_checksum || link_by_hash_dir) && S_ISREG(mode))
+ extra_len += SUM_EXTRA_CNT * EXTRA_LEN;
+
+ if (file_length > 0xFFFFFFFFu && S_ISREG(mode))
+diff --git a/hashlink.c b/hashlink.c
+new file mode 100644
+--- /dev/null
++++ b/hashlink.c
+@@ -0,0 +1,336 @@
+ for (dst = hash, i = 0; i < 4; i++, src++) {
+ c = *src >> 4;
+ *(dst++) = (c >= 10) ? (c - 10 + 'a') : (c + '0');
+ for (dst = hash, i = 0; i < 4; i++, src++) {
+ c = *src >> 4;
+ *(dst++) = (c >= 10) ? (c - 10 + 'a') : (c + '0');
-+ if (file->length == 0) {
-+ return robust_rename(fnametmp,fname,0644);
-+ }
++ if (F_LENGTH(file) == 0)
++ return robust_rename(fnametmp, fname, NULL, 0644);
+ }
+ free(dirname);
+
+ if (do_mkdir(hashname, 0755) == -1 && errno != EEXIST) {
+ rsyserr(FERROR, errno, "mkdir failed: %s", hashname);
+ free(hashname);
+ }
+ free(dirname);
+
+ if (do_mkdir(hashname, 0755) == -1 && errno != EEXIST) {
+ rsyserr(FERROR, errno, "mkdir failed: %s", hashname);
+ free(hashname);
+ } else {
+ rsyserr(FERROR, errno, "link \"%s\" -> \"%s\"",
+ linkname, full_fname(fname));
+ } else {
+ rsyserr(FERROR, errno, "link \"%s\" -> \"%s\"",
+ linkname, full_fname(fname));
+ rprintf(FINFO, "link-by-hash (new): %s -> \"%s\"\n",
+ full_fname(fname),linkname);
+
+ rprintf(FINFO, "link-by-hash (new): %s -> \"%s\"\n",
+ full_fname(fname),linkname);
+
+ if (rc != 0) {
+ rsyserr(FERROR, errno, "rename \"%s\" -> \"%s\"",
+ full_fname(fnametmp), full_fname(fname));
+ if (rc != 0) {
+ rsyserr(FERROR, errno, "rename \"%s\" -> \"%s\"",
+ full_fname(fnametmp), full_fname(fname));
---- orig/options.c 2005-05-19 08:52:42
-+++ options.c 2005-05-19 08:55:42
-@@ -141,6 +141,7 @@ char *log_format = NULL;
- char *password_file = NULL;
- char *rsync_path = RSYNC_PATH;
- char *backup_dir = NULL;
+diff --git a/options.c b/options.c
+--- a/options.c
++++ b/options.c
+@@ -156,6 +156,7 @@ char *backup_suffix = NULL;
+ char *tmpdir = NULL;
+ char *partial_dir = NULL;
+ char *basis_dir[MAX_BASIS_DIRS+1];
- char backup_dir_buf[MAXPATHLEN];
- int rsync_port = 0;
- int compare_dest = 0;
-@@ -321,6 +322,7 @@ void usage(enum logcode F)
+ char *config_file = NULL;
+ char *shell_cmd = NULL;
+ char *logfile_name = NULL;
+@@ -393,6 +394,7 @@ void usage(enum logcode F)
rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n");
rprintf(F," --copy-dest=DIR ... and include copies of unchanged files\n");
rprintf(F," --link-dest=DIR hardlink to files in DIR when unchanged\n");
+ rprintf(F," --link-by-hash=DIR create hardlinks by hash into DIR\n");
rprintf(F," -z, --compress compress file data during the transfer\n");
rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n");
rprintf(F," --copy-dest=DIR ... and include copies of unchanged files\n");
rprintf(F," --link-dest=DIR hardlink to files in DIR when unchanged\n");
+ rprintf(F," --link-by-hash=DIR create hardlinks by hash into DIR\n");
rprintf(F," -z, --compress compress file data during the transfer\n");
- rprintf(F," -C, --cvs-exclude auto-ignore files the same way CVS does\n");
- rprintf(F," -f, --filter=RULE add a file-filtering RULE\n");
-@@ -362,7 +364,7 @@ void usage(enum logcode F)
-
- enum {OPT_VERSION = 1000, OPT_DAEMON, OPT_SENDER, OPT_EXCLUDE, OPT_EXCLUDE_FROM,
- OPT_FILTER, OPT_COMPARE_DEST, OPT_COPY_DEST, OPT_LINK_DEST,
-- OPT_INCLUDE, OPT_INCLUDE_FROM, OPT_MODIFY_WINDOW,
-+ OPT_INCLUDE, OPT_INCLUDE_FROM, OPT_MODIFY_WINDOW, OPT_LINK_BY_HASH,
+ rprintf(F," --compress-level=NUM explicitly set compression level\n");
+ rprintf(F," --skip-compress=LIST skip compressing files with a suffix in LIST\n");
+@@ -445,7 +447,7 @@ enum {OPT_VERSION = 1000, OPT_DAEMON, OPT_SENDER, OPT_EXCLUDE, OPT_EXCLUDE_FROM,
+ OPT_FILTER, OPT_COMPARE_DEST, OPT_COPY_DEST, OPT_LINK_DEST, OPT_HELP,
+ OPT_INCLUDE, OPT_INCLUDE_FROM, OPT_MODIFY_WINDOW, OPT_MIN_SIZE, OPT_CHMOD,
OPT_READ_BATCH, OPT_WRITE_BATCH, OPT_ONLY_WRITE_BATCH, OPT_MAX_SIZE,
OPT_READ_BATCH, OPT_WRITE_BATCH, OPT_ONLY_WRITE_BATCH, OPT_MAX_SIZE,
- OPT_REFUSED_BASE = 9000};
+- OPT_NO_D, OPT_APPEND, OPT_NO_ICONV,
++ OPT_NO_D, OPT_APPEND, OPT_NO_ICONV, OPT_LINK_BY_HASH,
+ OPT_SERVER, OPT_REFUSED_BASE = 9000};
-@@ -432,6 +434,7 @@ static struct poptOption long_options[]
+ static struct poptOption long_options[] = {
+@@ -570,6 +572,7 @@ static struct poptOption long_options[] = {
{"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
{"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
{"link-dest", 0, POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 },
+ {"link-by-hash", 0, POPT_ARG_STRING, 0, OPT_LINK_BY_HASH, 0, 0},
{"fuzzy", 'y', POPT_ARG_NONE, &fuzzy_basis, 0, 0, 0 },
{"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
{"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
{"link-dest", 0, POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 },
+ {"link-by-hash", 0, POPT_ARG_STRING, 0, OPT_LINK_BY_HASH, 0, 0},
{"fuzzy", 'y', POPT_ARG_NONE, &fuzzy_basis, 0, 0, 0 },
- /* TODO: Should this take an optional int giving the compression level? */
- {"compress", 'z', POPT_ARG_NONE, &do_compression, 0, 0, 0 },
-@@ -876,6 +879,21 @@ int parse_arguments(int *argc, const cha
- basis_dir[basis_dir_cnt++] = (char *)arg;
- break;
+ {"compress", 'z', POPT_ARG_NONE, 0, 'z', 0, 0 },
+ {"no-compress", 0, POPT_ARG_VAL, &do_compression, 0, 0, 0 },
+@@ -1244,6 +1247,21 @@ int parse_arguments(int *argc_p, const char ***argv_p, int frommain)
+ return 0;
+ #endif
+ arg = poptGetOptArg(pc);
+ if (sanitize_paths)
+ arg = sanitize_path(NULL, arg, NULL, 0);
+ arg = poptGetOptArg(pc);
+ if (sanitize_paths)
+ arg = sanitize_path(NULL, arg, NULL, 0);
-@@ -1458,6 +1476,11 @@ void server_options(char **args,int *arg
- }
- }
+@@ -1997,6 +2015,11 @@ void server_options(char **args, int *argc_p)
+ } else if (inplace)
+ args[ac++] = "--inplace";
---- orig/receiver.c 2005-04-14 01:53:12
-+++ receiver.c 2005-01-15 21:29:13
-@@ -52,6 +52,7 @@ extern int delay_updates;
- extern struct stats stats;
- extern char *log_format;
- extern char *tmpdir;
-+extern char *link_by_hash_dir;
- extern char *partial_dir;
- extern char *basis_dir[];
- extern struct file_list *the_file_list;
-@@ -185,12 +186,13 @@ static int get_tmpname(char *fnametmp, c
-
+diff --git a/receiver.c b/receiver.c
+--- a/receiver.c
++++ b/receiver.c
+@@ -162,12 +162,14 @@ int open_tmpfile(char *fnametmp, const char *fname, struct file_struct *file)
+ }
static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r,
static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r,
-- char *fname, int fd, OFF_T total_size)
-+ char *fname, int fd, OFF_T total_size, char *md4)
+- const char *fname, int fd, OFF_T total_size)
++ const char *fname, int fd, OFF_T total_size,
++ const char *md4)
-@@ -210,6 +212,9 @@ static int receive_data(int f_in, char *
+@@ -187,6 +189,9 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r,
- while ((i = recv_token(f_in, &data)) != 0) {
-@@ -226,6 +231,8 @@ static int receive_data(int f_in, char *
+ if (append_mode > 0) {
+@@ -231,6 +236,8 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r,
-@@ -252,6 +259,8 @@ static int receive_data(int f_in, char *
+@@ -257,6 +264,8 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r,
- if (inplace) {
-@@ -292,6 +301,8 @@ static int receive_data(int f_in, char *
+ if (updating_basis_or_equiv) {
+@@ -299,6 +308,8 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r,
-@@ -307,7 +318,7 @@ static int receive_data(int f_in, char *
+@@ -314,7 +325,7 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r,
+ receive_data(f_in, NULL, -1, 0, NULL, -1, length, NULL);
}
+ receive_data(f_in, NULL, -1, 0, NULL, -1, length, NULL);
}
- static void handle_delayed_updates(struct file_list *flist, char *local_name)
-@@ -635,8 +646,12 @@ int recv_files(int f_in, struct file_lis
- rprintf(FINFO, "%s\n", safe_fname(fname));
+ static void handle_delayed_updates(char *local_name)
+@@ -676,7 +687,7 @@ int recv_files(int f_in, char *local_name)
recv_ok = receive_data(f_in, fnamecmp, fd1, st.st_size,
recv_ok = receive_data(f_in, fnamecmp, fd1, st.st_size,
-- fname, fd2, file->length);
-+ fname, fd2, file->length, file->u.sum);
+- fname, fd2, F_LENGTH(file));
++ fname, fd2, F_LENGTH(file), F_SUM(file));
- if (!log_before_transfer)
- log_item(file, &initial_stats, iflags, NULL);
---- orig/rsync.c 2005-03-16 02:19:30
-+++ rsync.c 2005-02-21 11:04:36
-@@ -38,6 +38,7 @@ extern int inplace;
+ log_item(log_code, file, &initial_stats, iflags, NULL);
+
+diff --git a/rsync.c b/rsync.c
+--- a/rsync.c
++++ b/rsync.c
+@@ -49,6 +49,7 @@ extern int inplace;
+ extern int flist_eof;
-
-
- /*
-@@ -188,7 +189,12 @@ void finish_transfer(char *fname, char *
- rprintf(FINFO, "renaming %s to %s\n",
- safe_fname(fnametmp), safe_fname(fname));
- }
-- ret = robust_rename(fnametmp, fname, file->mode & INITACCESSPERMS);
-+#if HAVE_LINK
+ extern struct file_list *cur_flist, *first_flist, *dir_flist;
+ extern struct chmod_mode_struct *daemon_chmod_modes;
+ #ifdef ICONV_OPTION
+@@ -536,8 +537,15 @@ int finish_transfer(const char *fname, const char *fnametmp,
+ /* move tmp file over real file */
+ if (verbose > 2)
+ rprintf(FINFO, "renaming %s to %s\n", fnametmp, fname);
+- ret = robust_rename(fnametmp, fname, temp_copy_name,
+- file->mode & INITACCESSPERMS);
++#ifdef HAVE_LINK
-+ ret = robust_rename(fnametmp, fname, file->mode & INITACCESSPERMS);
++ {
++ ret = robust_rename(fnametmp, fname, temp_copy_name,
++ file->mode & INITACCESSPERMS);
++ }
- rsyserr(FERROR, errno, "%s %s -> \"%s\"",
- ret == -2 ? "copy" : "rename",
---- orig/rsync.h 2005-05-03 17:11:01
-+++ rsync.h 2004-07-03 20:20:15
-@@ -631,6 +631,14 @@ struct stats {
- int current_file_index;
+ rsyserr(FERROR_XFER, errno, "%s %s -> \"%s\"",
+ ret == -2 ? "copy" : "rename",
+diff --git a/rsync.h b/rsync.h
+--- a/rsync.h
++++ b/rsync.h
+@@ -818,6 +818,14 @@ struct stats {
+ int num_transferred_files;
- #include "byteorder.h"
- #include "lib/mdfour.h"
---- orig/rsync.yo 2005-05-22 20:53:34
-+++ rsync.yo 2005-02-13 06:58:47
-@@ -356,6 +356,7 @@ to the detailed description below for a
+ #define EMPTY_ITEM_LIST {NULL, 0, 0}
+diff --git a/rsync.yo b/rsync.yo
+--- a/rsync.yo
++++ b/rsync.yo
+@@ -388,6 +388,7 @@ to the detailed description below for a complete description. verb(
--compare-dest=DIR also compare received files relative to DIR
--copy-dest=DIR ... and include copies of unchanged files
--link-dest=DIR hardlink to files in DIR when unchanged
+ --link-by-hash=DIR create hardlinks by hash into DIR
-z, --compress compress file data during the transfer
--compare-dest=DIR also compare received files relative to DIR
--copy-dest=DIR ... and include copies of unchanged files
--link-dest=DIR hardlink to files in DIR when unchanged
+ --link-by-hash=DIR create hardlinks by hash into DIR
-z, --compress compress file data during the transfer
- -C, --cvs-exclude auto-ignore files in the same way CVS does
- -f, --filter=RULE add a file-filtering RULE
+ --compress-level=NUM explicitly set compression level
+ --skip-compress=LIST skip compressing files with suffix in LIST