This patch adds the --link-by-hash=DIR option, which hard links received
files in a link farm arranged by MD4 file hash. The result is that the system
will only store one copy of the unique contents of each file, regardless of
the file's name.
This patch adds the --link-by-hash=DIR option, which hard links received
files in a link farm arranged by MD4 file hash. The result is that the system
will only store one copy of the unique contents of each file, regardless of
the file's name.
---- orig/Makefile.in 2004-11-02 16:47:15
-+++ Makefile.in 2004-07-03 20:20:15
-@@ -35,7 +35,7 @@ OBJS1=rsync.o generator.o receiver.o cle
- main.o checksum.o match.o syscall.o log.o backup.o
- OBJS2=options.o flist.o io.o compat.o hlink.o token.o uidlist.o socket.o \
- fileio.o batch.o clientname.o
+--- old/Makefile.in
++++ new/Makefile.in
+@@ -35,7 +35,7 @@ OBJS1=flist.o rsync.o generator.o receiv
+ util.o main.o checksum.o match.o syscall.o log.o backup.o
+ OBJS2=options.o io.o compat.o hlink.o token.o uidlist.o socket.o hashtable.o \
+ fileio.o batch.o clientname.o chmod.o acls.o xattrs.o
-OBJS3=progress.o pipe.o
+OBJS3=progress.o pipe.o hashlink.o
DAEMON_OBJ = params.o loadparm.o clientserver.o access.o connection.o authenticate.o
popt_OBJS=popt/findme.o popt/popt.o popt/poptconfig.o \
popt/popthelp.o popt/poptparse.o
-OBJS3=progress.o pipe.o
+OBJS3=progress.o pipe.o hashlink.o
DAEMON_OBJ = params.o loadparm.o clientserver.o access.o connection.o authenticate.o
popt_OBJS=popt/findme.o popt/popt.o popt/poptconfig.o \
popt/popthelp.o popt/poptparse.o
---- orig/hashlink.c 2004-09-24 16:44:25
-+++ hashlink.c 2004-09-24 16:44:25
-@@ -0,0 +1,340 @@
+--- old/flist.c
++++ new/flist.c
+@@ -65,6 +65,7 @@ extern int protocol_version;
+ extern int sanitize_paths;
+ extern struct stats stats;
+ extern char *filesfrom_host;
++extern char *link_by_hash_dir;
+
+ extern char curr_dir[MAXPATHLEN];
+
+@@ -830,7 +831,7 @@ static struct file_struct *recv_file_ent
+ extra_len += (S_ISDIR(mode) ? 2 : 1) * EXTRA_LEN;
+ #endif
+
+- if (always_checksum && S_ISREG(mode))
++ if ((always_checksum || link_by_hash_dir) && S_ISREG(mode))
+ extra_len += SUM_EXTRA_CNT * EXTRA_LEN;
+
+ if (file_length > 0xFFFFFFFFu && S_ISREG(mode))
+--- old/hashlink.c
++++ new/hashlink.c
+@@ -0,0 +1,336 @@
+ for (dst = hash, i = 0; i < 4; i++, src++) {
+ c = *src >> 4;
+ *(dst++) = (c >= 10) ? (c - 10 + 'a') : (c + '0');
+ for (dst = hash, i = 0; i < 4; i++, src++) {
+ c = *src >> 4;
+ *(dst++) = (c >= 10) ? (c - 10 + 'a') : (c + '0');
+ asprintf(&hashfile->name,"%s/%s",hashname,
+ di->d_name);
+ if (do_stat(hashfile->name,&st) == -1) {
+ asprintf(&hashfile->name,"%s/%s",hashname,
+ di->d_name);
+ if (do_stat(hashfile->name,&st) == -1) {
-+ if (file->length == 0) {
-+ return robust_rename(fnametmp,fname,0644);
-+ }
++ if (F_LENGTH(file) == 0)
++ return robust_rename(fnametmp, fname, NULL, 0644);
+ }
+ free(dirname);
+
+ if (do_mkdir(hashname, 0755) == -1 && errno != EEXIST) {
+ rsyserr(FERROR, errno, "mkdir failed: %s", hashname);
+ free(hashname);
+ }
+ free(dirname);
+
+ if (do_mkdir(hashname, 0755) == -1 && errno != EEXIST) {
+ rsyserr(FERROR, errno, "mkdir failed: %s", hashname);
+ free(hashname);
+ } else {
+ rsyserr(FERROR, errno, "link \"%s\" -> \"%s\"",
+ linkname, full_fname(fname));
+ } else {
+ rsyserr(FERROR, errno, "link \"%s\" -> \"%s\"",
+ linkname, full_fname(fname));
+ rprintf(FINFO, "link-by-hash (new): %s -> \"%s\"\n",
+ full_fname(fname),linkname);
+
+ rprintf(FINFO, "link-by-hash (new): %s -> \"%s\"\n",
+ full_fname(fname),linkname);
+
+ if (rc != 0) {
+ rsyserr(FERROR, errno, "rename \"%s\" -> \"%s\"",
+ full_fname(fnametmp), full_fname(fname));
+ if (rc != 0) {
+ rsyserr(FERROR, errno, "rename \"%s\" -> \"%s\"",
+ full_fname(fnametmp), full_fname(fname));
---- orig/options.c 2005-05-03 16:47:32
-+++ options.c 2005-04-09 18:01:31
-@@ -141,6 +141,7 @@ char *log_format = NULL;
- char *password_file = NULL;
- char *rsync_path = RSYNC_PATH;
- char *backup_dir = NULL;
+--- old/options.c
++++ new/options.c
+@@ -153,6 +153,7 @@ char *backup_suffix = NULL;
+ char *tmpdir = NULL;
+ char *partial_dir = NULL;
+ char *basis_dir[MAX_BASIS_DIRS+1];
- char backup_dir_buf[MAXPATHLEN];
- int rsync_port = 0;
- int compare_dest = 0;
-@@ -321,6 +322,7 @@ void usage(enum logcode F)
+ char *config_file = NULL;
+ char *shell_cmd = NULL;
+ char *logfile_name = NULL;
+@@ -385,6 +386,7 @@ void usage(enum logcode F)
rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n");
rprintf(F," --copy-dest=DIR ... and include copies of unchanged files\n");
rprintf(F," --link-dest=DIR hardlink to files in DIR when unchanged\n");
+ rprintf(F," --link-by-hash=DIR create hardlinks by hash into DIR\n");
rprintf(F," -z, --compress compress file data during the transfer\n");
rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n");
rprintf(F," --copy-dest=DIR ... and include copies of unchanged files\n");
rprintf(F," --link-dest=DIR hardlink to files in DIR when unchanged\n");
+ rprintf(F," --link-by-hash=DIR create hardlinks by hash into DIR\n");
rprintf(F," -z, --compress compress file data during the transfer\n");
- rprintf(F," -C, --cvs-exclude auto-ignore files the same way CVS does\n");
- rprintf(F," -f, --filter=RULE add a file-filtering RULE\n");
-@@ -364,7 +366,7 @@ enum {OPT_VERSION = 1000, OPT_DAEMON, OP
- OPT_FILTER, OPT_COMPARE_DEST, OPT_COPY_DEST, OPT_LINK_DEST,
- OPT_INCLUDE, OPT_INCLUDE_FROM, OPT_MODIFY_WINDOW,
- OPT_READ_BATCH, OPT_WRITE_BATCH, OPT_ONLY_WRITE_BATCH,
-- OPT_TIMEOUT, OPT_MAX_SIZE,
-+ OPT_TIMEOUT, OPT_MAX_SIZE, OPT_LINK_BY_HASH,
- OPT_REFUSED_BASE = 9000};
+ rprintf(F," --compress-level=NUM explicitly set compression level\n");
+ rprintf(F," --skip-compress=LIST skip compressing files with a suffix in LIST\n");
+@@ -437,7 +439,7 @@ enum {OPT_VERSION = 1000, OPT_DAEMON, OP
+ OPT_FILTER, OPT_COMPARE_DEST, OPT_COPY_DEST, OPT_LINK_DEST, OPT_HELP,
+ OPT_INCLUDE, OPT_INCLUDE_FROM, OPT_MODIFY_WINDOW, OPT_MIN_SIZE, OPT_CHMOD,
+ OPT_READ_BATCH, OPT_WRITE_BATCH, OPT_ONLY_WRITE_BATCH, OPT_MAX_SIZE,
+- OPT_NO_D, OPT_APPEND,
++ OPT_NO_D, OPT_APPEND, OPT_LINK_BY_HASH,
+ OPT_SERVER, OPT_REFUSED_BASE = 9000};
{"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
{"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
{"link-dest", 0, POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 },
+ {"link-by-hash", 0, POPT_ARG_STRING, 0, OPT_LINK_BY_HASH, 0, 0},
{"fuzzy", 'y', POPT_ARG_NONE, &fuzzy_basis, 0, 0, 0 },
{"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
{"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
{"link-dest", 0, POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 },
+ {"link-by-hash", 0, POPT_ARG_STRING, 0, OPT_LINK_BY_HASH, 0, 0},
{"fuzzy", 'y', POPT_ARG_NONE, &fuzzy_basis, 0, 0, 0 },
- /* TODO: Should this take an optional int giving the compression level? */
- {"compress", 'z', POPT_ARG_NONE, &do_compression, 0, 0, 0 },
-@@ -882,6 +885,21 @@ int parse_arguments(int *argc, const cha
- basis_dir[basis_dir_cnt++] = (char *)arg;
- break;
+ {"compress", 'z', POPT_ARG_NONE, 0, 'z', 0, 0 },
+ {"no-compress", 0, POPT_ARG_VAL, &do_compression, 0, 0, 0 },
+@@ -1219,6 +1222,21 @@ int parse_arguments(int *argc_p, const c
+ return 0;
+ #endif
-+ arg = sanitize_path(NULL, arg, NULL, 0);
++ arg = sanitize_path(NULL, arg, NULL, 0, NULL);
-@@ -1462,6 +1480,11 @@ void server_options(char **args,int *arg
- }
- }
+@@ -1965,6 +1983,11 @@ void server_options(char **args, int *ar
+ } else if (inplace)
+ args[ac++] = "--inplace";
---- orig/receiver.c 2005-04-14 01:53:12
-+++ receiver.c 2005-01-15 21:29:13
-@@ -52,6 +52,7 @@ extern int delay_updates;
- extern struct stats stats;
- extern char *log_format;
- extern char *tmpdir;
-+extern char *link_by_hash_dir;
- extern char *partial_dir;
- extern char *basis_dir[];
- extern struct file_list *the_file_list;
-@@ -185,12 +186,13 @@ static int get_tmpname(char *fnametmp, c
+--- old/receiver.c
++++ new/receiver.c
+@@ -125,12 +125,14 @@ int get_tmpname(char *fnametmp, char *fn
static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r,
static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r,
-- char *fname, int fd, OFF_T total_size)
-+ char *fname, int fd, OFF_T total_size, char *md4)
+- const char *fname, int fd, OFF_T total_size)
++ const char *fname, int fd, OFF_T total_size,
++ const char *md4)
-@@ -210,6 +212,9 @@ static int receive_data(int f_in, char *
+@@ -150,6 +152,9 @@ static int receive_data(int f_in, char *
- while ((i = recv_token(f_in, &data)) != 0) {
-@@ -226,6 +231,8 @@ static int receive_data(int f_in, char *
+ if (append_mode > 0) {
+@@ -194,6 +199,8 @@ static int receive_data(int f_in, char *
-@@ -252,6 +259,8 @@ static int receive_data(int f_in, char *
+@@ -220,6 +227,8 @@ static int receive_data(int f_in, char *
- if (inplace) {
-@@ -292,6 +301,8 @@ static int receive_data(int f_in, char *
+ if (updating_basis) {
+@@ -262,6 +271,8 @@ static int receive_data(int f_in, char *
-@@ -307,7 +318,7 @@ static int receive_data(int f_in, char *
+@@ -277,7 +288,7 @@ static int receive_data(int f_in, char *
+ receive_data(f_in, NULL, -1, 0, NULL, -1, length, NULL);
}
+ receive_data(f_in, NULL, -1, 0, NULL, -1, length, NULL);
}
- static void handle_delayed_updates(struct file_list *flist, char *local_name)
-@@ -635,8 +646,12 @@ int recv_files(int f_in, struct file_lis
- rprintf(FINFO, "%s\n", safe_fname(fname));
+ static void handle_delayed_updates(char *local_name)
+@@ -665,7 +676,7 @@ int recv_files(int f_in, char *local_nam
recv_ok = receive_data(f_in, fnamecmp, fd1, st.st_size,
recv_ok = receive_data(f_in, fnamecmp, fd1, st.st_size,
-- fname, fd2, file->length);
-+ fname, fd2, file->length, file->u.sum);
+- fname, fd2, F_LENGTH(file));
++ fname, fd2, F_LENGTH(file), F_SUM(file));
+
+ log_item(log_code, file, &initial_stats, iflags, NULL);
- if (!log_before_transfer)
- log_item(file, &initial_stats, iflags, NULL);
---- orig/rsync.c 2005-03-16 02:19:30
-+++ rsync.c 2005-02-21 11:04:36
-@@ -38,6 +38,7 @@ extern int inplace;
+--- old/rsync.c
++++ new/rsync.c
+@@ -47,6 +47,7 @@ extern int inplace;
+ extern int flist_eof;
-
-
- /*
-@@ -188,7 +189,12 @@ void finish_transfer(char *fname, char *
- rprintf(FINFO, "renaming %s to %s\n",
- safe_fname(fnametmp), safe_fname(fname));
- }
-- ret = robust_rename(fnametmp, fname, file->mode & INITACCESSPERMS);
-+#if HAVE_LINK
+ extern struct file_list *cur_flist, *first_flist, *dir_flist;
+ extern struct chmod_mode_struct *daemon_chmod_modes;
+ #ifdef ICONV_OPTION
+@@ -529,8 +530,15 @@ void finish_transfer(const char *fname,
+ /* move tmp file over real file */
+ if (verbose > 2)
+ rprintf(FINFO, "renaming %s to %s\n", fnametmp, fname);
+- ret = robust_rename(fnametmp, fname, partialptr,
+- file->mode & INITACCESSPERMS);
++#ifdef HAVE_LINK
-+ ret = robust_rename(fnametmp, fname, file->mode & INITACCESSPERMS);
++ {
++ ret = robust_rename(fnametmp, fname, partialptr,
++ file->mode & INITACCESSPERMS);
++ }
- ret == -2 ? "copy" : "rename",
---- orig/rsync.h 2005-04-14 01:53:12
-+++ rsync.h 2004-07-03 20:20:15
-@@ -631,6 +631,14 @@ struct stats {
+ ret == -2 ? "copy" : "rename",
+--- old/rsync.h
++++ new/rsync.h
+@@ -768,6 +768,14 @@ struct stats {
- #include "byteorder.h"
- #include "lib/mdfour.h"
---- orig/rsync.yo 2005-05-03 16:47:33
-+++ rsync.yo 2005-02-13 06:58:47
-@@ -364,6 +364,7 @@ to the detailed description below for a
+ #define EMPTY_ITEM_LIST {NULL, 0, 0}
+--- old/rsync.yo
++++ new/rsync.yo
+@@ -392,6 +392,7 @@ to the detailed description below for a
--compare-dest=DIR also compare received files relative to DIR
--copy-dest=DIR ... and include copies of unchanged files
--link-dest=DIR hardlink to files in DIR when unchanged
+ --link-by-hash=DIR create hardlinks by hash into DIR
-z, --compress compress file data during the transfer
--compare-dest=DIR also compare received files relative to DIR
--copy-dest=DIR ... and include copies of unchanged files
--link-dest=DIR hardlink to files in DIR when unchanged
+ --link-by-hash=DIR create hardlinks by hash into DIR
-z, --compress compress file data during the transfer
- -C, --cvs-exclude auto-ignore files in the same way CVS does
- -f, --filter=RULE add a file-filtering RULE
+ --compress-level=NUM explicitly set compression level
+ --skip-compress=LIST skip compressing files with suffix in LIST