-After applying this patch and running configure, you MUST run this
-command before "make":
-
- make proto
-
-Jason M. Felice writes:
+Jason M. Felice wrote:
This patch adds the --link-by-hash=DIR option, which hard links received
files in a link farm arranged by MD4 file hash. The result is that the system
will only store one copy of the unique contents of each file, regardless of
the file's name.
+To use this patch, run these commands for a successful build:
+
+ patch -p1 <patches/link-by-hash.diff
+ ./prepare-source
+ ./configure
+ make
---- orig/Makefile.in 2004-08-13 07:18:58
-+++ Makefile.in 2004-07-03 20:20:15
-@@ -35,7 +35,7 @@ OBJS1=rsync.o generator.o receiver.o cle
- main.o checksum.o match.o syscall.o log.o backup.o
- OBJS2=options.o flist.o io.o compat.o hlink.o token.o uidlist.o socket.o \
- fileio.o batch.o clientname.o
+based-on: a01e3b490eb36ccf9e704840e1b6683dab867550
+diff --git a/Makefile.in b/Makefile.in
+--- a/Makefile.in
++++ b/Makefile.in
+@@ -37,7 +37,7 @@ OBJS1=flist.o rsync.o generator.o receiver.o cleanup.o sender.o exclude.o \
+ util.o main.o checksum.o match.o syscall.o log.o backup.o delete.o
+ OBJS2=options.o io.o compat.o hlink.o token.o uidlist.o socket.o hashtable.o \
+ fileio.o batch.o clientname.o chmod.o acls.o xattrs.o
-OBJS3=progress.o pipe.o
+OBJS3=progress.o pipe.o hashlink.o
DAEMON_OBJ = params.o loadparm.o clientserver.o access.o connection.o authenticate.o
popt_OBJS=popt/findme.o popt/popt.o popt/poptconfig.o \
popt/popthelp.o popt/poptparse.o
---- orig/hashlink.c 2004-08-13 18:04:59
-+++ hashlink.c 2004-08-13 18:04:59
-@@ -0,0 +1,342 @@
+diff --git a/flist.c b/flist.c
+--- a/flist.c
++++ b/flist.c
+@@ -74,6 +74,7 @@ extern int sender_keeps_checksum;
+ extern int unsort_ndx;
+ extern struct stats stats;
+ extern char *filesfrom_host;
++extern char *link_by_hash_dir;
+ extern char *usermap, *groupmap;
+
+ extern char curr_dir[MAXPATHLEN];
+@@ -910,7 +911,7 @@ static struct file_struct *recv_file_entry(int f, struct file_list *flist, int x
+ extra_len += EXTRA_LEN;
+ #endif
+
+- if (always_checksum && S_ISREG(mode))
++ if ((always_checksum || link_by_hash_dir) && S_ISREG(mode))
+ extra_len += SUM_EXTRA_CNT * EXTRA_LEN;
+
+ #if SIZEOF_INT64 >= 8
+diff --git a/hashlink.c b/hashlink.c
+new file mode 100644
+--- /dev/null
++++ b/hashlink.c
+@@ -0,0 +1,339 @@
+/*
+ Copyright (C) Cronosys, LLC 2004
+
+
+#ifdef HAVE_LINK
+
-+char* make_hash_name(struct file_struct *file)
++char *make_hash_name(struct file_struct *file)
+{
+ char hash[33], *dst;
-+ unsigned char *src;
-+ unsigned char c;
++ uchar c, *src = (uchar*)F_SUM(file);
+ int i;
+
-+ src = (unsigned char*)file->u.sum;
+ for (dst = hash, i = 0; i < 4; i++, src++) {
+ c = *src >> 4;
+ *(dst++) = (c >= 10) ? (c - 10 + 'a') : (c + '0');
+ }
+ *dst = 0;
+
-+ asprintf(&dst,"%s/%s",link_by_hash_dir,hash);
++ if (asprintf(&dst,"%s/%s",link_by_hash_dir,hash) < 0)
++ out_of_memory("make_hash_name");
+ return dst;
+}
+
+ long this_fnbr;
+
+ *fnbr = 0;
-+
++
+ /* Build a list of potential candidates and open
+ * them. */
+ if ((d = opendir(hashname)) == NULL) {
+ if (this_fnbr > *fnbr)
+ *fnbr = this_fnbr;
+
-+ hashfile = (struct hashfile_struct*)malloc(sizeof(struct hashfile_struct));
-+ asprintf(&hashfile->name,"%s/%s",hashname,
-+ di->d_name);
++ hashfile = new_array(struct hashfile_struct, 1);
++ if (asprintf(&hashfile->name,"%s/%s",hashname, di->d_name) < 0)
++ out_of_memory("find_hashfiles");
+ if (do_stat(hashfile->name,&st) == -1) {
+ rsyserr(FERROR, errno, "stat failed: %s", hashfile->name);
+ kill_hashfile(hashfile);
+ /* There are no matches. */
+ return NULL;
+ }
-+
+ }
+
+ if (amt == -1) {
+}
+
+
-+int link_by_hash(char *fnametmp,char *fname,struct file_struct *file)
++int link_by_hash(const char *fnametmp, const char *fname, struct file_struct *file)
+{
+ STRUCT_STAT st;
-+ char *hashname = make_hash_name(file);
++ char *hashname = make_hash_name(file);
+ int first = 0, rc;
+ char *linkname;
+ long last_fnbr;
+
-+ if (file->length == 0) {
-+ return robust_rename(fnametmp,fname,0644);
-+ }
++ if (F_LENGTH(file) == 0)
++ return robust_rename(fnametmp, fname, NULL, 0644);
+
+ if (do_stat(hashname, &st) == -1) {
+ char *dirname;
+ rsyserr(FERROR, errno, "mkdir failed: %s", dirname);
+ free(hashname);
+ free(dirname);
-+ return robust_rename(fnametmp,fname,0644);
++ return robust_rename(fnametmp, fname, NULL, 0644);
+ }
+ free(dirname);
+
+ if (do_mkdir(hashname, 0755) == -1 && errno != EEXIST) {
+ rsyserr(FERROR, errno, "mkdir failed: %s", hashname);
+ free(hashname);
-+ return robust_rename(fnametmp,fname,0644);
++ return robust_rename(fnametmp, fname, NULL, 0644);
+ }
+
+ first = 1;
-+ asprintf(&linkname,"%s/0",hashname);
++ if (asprintf(&linkname,"%s/0",hashname) < 0)
++ out_of_memory("link_by_hash");
+ rprintf(FINFO, "(1) linkname = %s\n", linkname);
-+
+ } else {
+ struct hashfile_struct *hashfiles, *hashfile;
-+ int fd;
+
+ if (do_stat(fnametmp,&st) == -1) {
+ rsyserr(FERROR, errno, "stat failed: %s", fname);
+
+ if (hashfiles == NULL) {
+ first = 1;
-+ asprintf(&linkname,"%s/0",hashname);
++ if (asprintf(&linkname,"%s/0",hashname) < 0)
++ out_of_memory("link_by_hash");
+ rprintf(FINFO, "(2) linkname = %s\n", linkname);
+ } else {
-+
++ int fd;
+ /* Search for one identical to us. */
+ if ((fd = open(fnametmp,O_RDONLY|O_BINARY)) == -1) {
+ rsyserr(FERROR, errno, "open failed: %s", fnametmp);
+ }
+ hashfile = compare_hashfiles(fd, hashfiles);
+ hashfiles = NULL;
++ close(fd);
+
+ if (hashfile) {
+ first = 0;
+ kill_hashfile(hashfile);
+ } else {
+ first = 1;
-+ asprintf(&linkname, "%s/%ld", hashname,
-+ last_fnbr + 1);
++ if (asprintf(&linkname, "%s/%ld", hashname, last_fnbr + 1) < 0)
++ out_of_memory("link_by_hash");
+ rprintf(FINFO, "(4) linkname = %s\n", linkname);
+ }
+ }
+ if (!first) {
+ rprintf(FINFO, "link-by-hash (existing): \"%s\" -> %s\n",
+ linkname, full_fname(fname));
++ robust_unlink(fname);
+ rc = do_link(linkname, fname);
+ if (rc == -1) {
+ if (errno == EMLINK) {
+ first = 1;
+ free(linkname);
-+ asprintf(&linkname,"%s/%ld",hashname,
-+ last_fnbr + 1);
++ if (asprintf(&linkname,"%s/%ld",hashname, last_fnbr + 1) < 0)
++ out_of_memory("link_by_hash");
+ rprintf(FINFO, "(5) linkname = %s\n", linkname);
+ rprintf(FINFO,"link-by-hash: max link count exceeded, starting new file \"%s\".\n", linkname);
+ } else {
+ rsyserr(FERROR, errno, "link \"%s\" -> \"%s\"",
+ linkname, full_fname(fname));
-+ robust_unlink(fname);
-+ rc = robust_rename(fnametmp,fname,0644);
++ rc = robust_rename(fnametmp, fname, NULL, 0644);
+ }
+ } else {
+ do_unlink(fnametmp);
+ rprintf(FINFO, "link-by-hash (new): %s -> \"%s\"\n",
+ full_fname(fname),linkname);
+
-+ rc = robust_rename(fnametmp,fname,0644);
++ rc = robust_rename(fnametmp, fname, NULL, 0644);
+ if (rc != 0) {
+ rsyserr(FERROR, errno, "rename \"%s\" -> \"%s\"",
+ full_fname(fnametmp), full_fname(fname));
+ free(hashname);
+ return rc;
+}
-+
+#endif
---- orig/options.c 2004-09-20 05:10:48
-+++ options.c 2004-08-13 18:13:18
-@@ -126,6 +126,7 @@ char *log_format = NULL;
- char *password_file = NULL;
- char *rsync_path = RSYNC_PATH;
- char *backup_dir = NULL;
+diff --git a/options.c b/options.c
+--- a/options.c
++++ b/options.c
+@@ -158,6 +158,7 @@ char *backup_suffix = NULL;
+ char *tmpdir = NULL;
+ char *partial_dir = NULL;
+ char *basis_dir[MAX_BASIS_DIRS+1];
+char *link_by_hash_dir = NULL;
- char backup_dir_buf[MAXPATHLEN];
- int rsync_port = RSYNC_PORT;
- int link_dest = 0;
-@@ -279,6 +280,7 @@ void usage(enum logcode F)
- rprintf(F," -T, --temp-dir=DIR create temporary files in directory DIR\n");
+ char *config_file = NULL;
+ char *shell_cmd = NULL;
+ char *logfile_name = NULL;
+@@ -746,6 +747,7 @@ void usage(enum logcode F)
rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n");
- rprintf(F," --link-dest=DIR create hardlinks to DIR for unchanged files\n");
-+ rprintf(F," --link-by-hash=DIR create hardlinks by hash to DIR for regular files\n");
- rprintf(F," -P equivalent to --partial --progress\n");
- rprintf(F," -z, --compress compress file data\n");
- rprintf(F," -C, --cvs-exclude auto ignore files in the same way CVS does\n");
-@@ -319,7 +321,7 @@ void usage(enum logcode F)
- enum {OPT_VERSION = 1000, OPT_SENDER, OPT_EXCLUDE, OPT_EXCLUDE_FROM,
- OPT_DELETE_AFTER, OPT_DELETE_EXCLUDED, OPT_LINK_DEST,
- OPT_INCLUDE, OPT_INCLUDE_FROM, OPT_MODIFY_WINDOW,
-- OPT_READ_BATCH, OPT_WRITE_BATCH, OPT_TIMEOUT,
-+ OPT_READ_BATCH, OPT_WRITE_BATCH, OPT_TIMEOUT, OPT_LINK_BY_HASH,
- OPT_REFUSED_BASE = 9000};
+ rprintf(F," --copy-dest=DIR ... and include copies of unchanged files\n");
+ rprintf(F," --link-dest=DIR hardlink to files in DIR when unchanged\n");
++ rprintf(F," --link-by-hash=DIR create hardlinks by hash into DIR\n");
+ rprintf(F," -z, --compress compress file data during the transfer\n");
+ rprintf(F," --compress-level=NUM explicitly set compression level\n");
+ rprintf(F," --skip-compress=LIST skip compressing files with a suffix in LIST\n");
+@@ -798,7 +800,7 @@ enum {OPT_VERSION = 1000, OPT_DAEMON, OPT_SENDER, OPT_EXCLUDE, OPT_EXCLUDE_FROM,
+ OPT_FILTER, OPT_COMPARE_DEST, OPT_COPY_DEST, OPT_LINK_DEST, OPT_HELP,
+ OPT_INCLUDE, OPT_INCLUDE_FROM, OPT_MODIFY_WINDOW, OPT_MIN_SIZE, OPT_CHMOD,
+ OPT_READ_BATCH, OPT_WRITE_BATCH, OPT_ONLY_WRITE_BATCH, OPT_MAX_SIZE,
+- OPT_NO_D, OPT_APPEND, OPT_NO_ICONV, OPT_INFO, OPT_DEBUG,
++ OPT_NO_D, OPT_APPEND, OPT_NO_ICONV, OPT_INFO, OPT_DEBUG, OPT_LINK_BY_HASH,
+ OPT_USERMAP, OPT_GROUPMAP, OPT_CHOWN, OPT_BWLIMIT,
+ OPT_SERVER, OPT_REFUSED_BASE = 9000};
- static struct poptOption long_options[] = {
-@@ -378,6 +380,7 @@ static struct poptOption long_options[]
- {"temp-dir", 'T', POPT_ARG_STRING, &tmpdir, 0, 0, 0 },
- {"compare-dest", 0, POPT_ARG_STRING, &compare_dest, 0, 0, 0 },
- {"link-dest", 0, POPT_ARG_STRING, &compare_dest, OPT_LINK_DEST, 0, 0 },
-+ {"link-by-hash", 0, POPT_ARG_STRING, 0, OPT_LINK_BY_HASH, 0, 0},
- /* TODO: Should this take an optional int giving the compression level? */
- {"compress", 'z', POPT_ARG_NONE, &do_compression, 0, 0, 0 },
- {"daemon", 0, POPT_ARG_NONE, &daemon_opt, 0, 0, 0 },
-@@ -616,6 +619,22 @@ int parse_arguments(int *argc, const cha
+@@ -938,6 +940,7 @@ static struct poptOption long_options[] = {
+ {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
+ {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
+ {"link-dest", 0, POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 },
++ {"link-by-hash", 0, POPT_ARG_STRING, 0, OPT_LINK_BY_HASH, 0, 0},
+ {"fuzzy", 'y', POPT_ARG_VAL, &fuzzy_basis, 1, 0, 0 },
+ {"no-fuzzy", 0, POPT_ARG_VAL, &fuzzy_basis, 0, 0, 0 },
+ {"no-y", 0, POPT_ARG_VAL, &fuzzy_basis, 0, 0, 0 },
+@@ -1764,6 +1767,21 @@ int parse_arguments(int *argc_p, const char ***argv_p)
return 0;
#endif
+ case OPT_LINK_BY_HASH:
-+#if HAVE_LINK
++#ifdef HAVE_LINK
+ arg = poptGetOptArg(pc);
+ if (sanitize_paths)
-+ arg = sanitize_path(NULL, arg, NULL, 0);
++ arg = sanitize_path(NULL, arg, NULL, 0, SP_DEFAULT);
+ link_by_hash_dir = (char *)arg;
-+ checksum_seed = 12345;
+ break;
+#else
+ snprintf(err_buf, sizeof err_buf,
+
default:
/* A large opt value means that set_refuse_options()
- * turned this option off (opt-BASE is its index). */
-@@ -1083,6 +1102,11 @@ void server_options(char **args,int *arg
- args[ac++] = compare_dest;
- }
+ * turned this option off. */
+@@ -2636,6 +2654,11 @@ void server_options(char **args, int *argc_p)
+ } else if (inplace)
+ args[ac++] = "--inplace";
+ if (link_by_hash_dir && am_sender) {
+ args[ac++] = "--link-by-hash";
+ args[ac++] = link_by_hash_dir;
+ }
+
- if (files_from && (!am_sender || remote_filesfrom_file)) {
- if (remote_filesfrom_file) {
+ if (files_from && (!am_sender || filesfrom_host)) {
+ if (filesfrom_host) {
args[ac++] = "--files-from";
---- orig/receiver.c 2004-09-21 09:40:27
-+++ receiver.c 2004-07-20 21:44:05
-@@ -39,6 +39,7 @@ extern int io_error;
- extern char *tmpdir;
- extern char *partial_dir;
- extern char *compare_dest;
-+extern char *link_by_hash_dir;
- extern int make_backups;
- extern int do_progress;
- extern char *backup_dir;
-@@ -202,12 +203,13 @@ static int get_tmpname(char *fnametmp, c
-
+diff --git a/receiver.c b/receiver.c
+--- a/receiver.c
++++ b/receiver.c
+@@ -196,11 +196,13 @@ int open_tmpfile(char *fnametmp, const char *fname, struct file_struct *file)
+ }
static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r,
-- char *fname, int fd, OFF_T total_size)
-+ char *fname, int fd, OFF_T total_size, char *md4)
+- const char *fname, int fd, OFF_T total_size)
++ const char *fname, int fd, OFF_T total_size,
++ const char *md4)
{
- static char file_sum1[MD4_SUM_LENGTH];
- static char file_sum2[MD4_SUM_LENGTH];
+ static char file_sum1[MAX_DIGEST_LEN];
struct map_struct *mapbuf;
struct sum_struct sum;
-+ struct mdfour mdfour_data;
- unsigned int len;
++ md_context mdfour_data;
+ int32 len;
OFF_T offset = 0;
OFF_T offset2;
-@@ -227,6 +229,9 @@ static int receive_data(int f_in, char *
+@@ -220,6 +222,9 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r,
} else
mapbuf = NULL;
+
sum_init(checksum_seed);
- while ((i = recv_token(f_in, &data)) != 0) {
-@@ -243,6 +248,8 @@ static int receive_data(int f_in, char *
+ if (append_mode > 0) {
+@@ -264,6 +269,8 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r,
cleanup_got_literal = 1;
- sum_update(data,i);
+ sum_update(data, i);
+ if (md4)
-+ mdfour_update(&mdfour_data,data,i);
++ mdfour_update(&mdfour_data, (uchar*)data, i);
if (fd != -1 && write_file(fd,data,i) != i)
goto report_write_error;
-@@ -267,6 +274,8 @@ static int receive_data(int f_in, char *
+@@ -290,6 +297,8 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r,
see_token(map, len);
- sum_update(map,len);
+ sum_update(map, len);
+ if (md4)
-+ mdfour_update(&mdfour_data,map,len);
++ mdfour_update(&mdfour_data, (uchar*)map, len);
}
- if (inplace) {
-@@ -306,6 +315,8 @@ static int receive_data(int f_in, char *
- }
+ if (updating_basis_or_equiv) {
+@@ -337,6 +346,9 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r,
+ if (sum_end(file_sum1) != checksum_len)
+ overflow_exit("checksum_len"); /* Impossible... */
- sum_end(file_sum1);
+ if (md4)
-+ mdfour_result(&mdfour_data, (unsigned char*)md4);
-
++ mdfour_result(&mdfour_data, (uchar*)md4);
++
if (mapbuf)
unmap_file(mapbuf);
-@@ -321,7 +332,7 @@ static int receive_data(int f_in, char *
+
+@@ -351,7 +363,7 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r,
static void discard_receive_data(int f_in, OFF_T length)
{
+ receive_data(f_in, NULL, -1, 0, NULL, -1, length, NULL);
}
-
-@@ -542,8 +553,12 @@ int recv_files(int f_in, struct file_lis
- rprintf(FINFO, "%s\n", safe_fname(fname));
+ static void handle_delayed_updates(char *local_name)
+@@ -779,7 +791,7 @@ int recv_files(int f_in, int f_out, char *local_name)
/* recv file data */
-+#ifdef HAVE_LINK
-+ if (link_by_hash_dir)
-+ file->u.sum = (char*)malloc(MD4_SUM_LENGTH);
-+#endif
recv_ok = receive_data(f_in, fnamecmp, fd1, st.st_size,
-- fname, fd2, file->length);
-+ fname, fd2, file->length, file->u.sum);
+- fname, fd2, F_LENGTH(file));
++ fname, fd2, F_LENGTH(file), F_SUM(file));
- log_recv(file, &initial_stats);
+ log_item(log_code, file, iflags, NULL);
---- orig/rsync.c 2004-09-07 21:45:30
-+++ rsync.c 2004-08-13 18:14:34
-@@ -34,6 +34,7 @@ extern int force_delete;
- extern int recurse;
+diff --git a/rsync.c b/rsync.c
+--- a/rsync.c
++++ b/rsync.c
+@@ -49,6 +49,7 @@ extern int flist_eof;
+ extern int file_old_total;
extern int keep_dirlinks;
extern int make_backups;
+extern char *link_by_hash_dir;
- extern char *backup_dir;
- extern int inplace;
-
-@@ -254,7 +255,12 @@ void finish_transfer(char *fname, char *
+ extern struct file_list *cur_flist, *first_flist, *dir_flist;
+ extern struct chmod_mode_struct *daemon_chmod_modes;
+ #ifdef ICONV_OPTION
+@@ -644,8 +645,15 @@ int finish_transfer(const char *fname, const char *fnametmp,
/* move tmp file over real file */
- if (verbose > 2)
+ if (DEBUG_GTE(RECV, 1))
rprintf(FINFO, "renaming %s to %s\n", fnametmp, fname);
-- ret = robust_rename(fnametmp, fname, file->mode & INITACCESSPERMS);
+- ret = robust_rename(fnametmp, fname, temp_copy_name,
+- file->mode & INITACCESSPERMS);
+#ifdef HAVE_LINK
+ if (link_by_hash_dir)
+ ret = link_by_hash(fnametmp, fname, file);
+ else
+#endif
-+ ret = robust_rename(fnametmp, fname, file->mode & INITACCESSPERMS);
++ {
++ ret = robust_rename(fnametmp, fname, temp_copy_name,
++ file->mode & INITACCESSPERMS);
++ }
if (ret < 0) {
- rsyserr(FERROR, errno, "%s %s -> \"%s\"",
- ret == -2 ? "copy" : "rename",
---- orig/rsync.h 2004-08-03 15:41:32
-+++ rsync.h 2004-07-03 20:20:15
-@@ -525,6 +525,14 @@ struct stats {
- int current_file_index;
+ rsyserr(FERROR_XFER, errno, "%s %s -> \"%s\"",
+ ret == -2 ? "copy" : "rename",
+diff --git a/rsync.h b/rsync.h
+--- a/rsync.h
++++ b/rsync.h
+@@ -865,6 +865,14 @@ struct stats {
+ int xferred_files;
};
+struct hashfile_struct {
+ uint32 nlink;
+};
+
+ struct chmod_mode_struct;
- /* we need this function because of the silly way in which duplicate
- entries are handled in the file lists - we can't change this
+ struct flist_ndx_item {
+diff --git a/rsync.yo b/rsync.yo
+--- a/rsync.yo
++++ b/rsync.yo
+@@ -400,6 +400,7 @@ to the detailed description below for a complete description. verb(
+ --compare-dest=DIR also compare received files relative to DIR
+ --copy-dest=DIR ... and include copies of unchanged files
+ --link-dest=DIR hardlink to files in DIR when unchanged
++ --link-by-hash=DIR create hardlinks by hash into DIR
+ -z, --compress compress file data during the transfer
+ --compress-level=NUM explicitly set compression level
+ --skip-compress=LIST skip compressing files with suffix in LIST