-After applying this patch, run these commands for a successful build:
-
- ./prepare-source
- ./configure (optional if already run)
- make
-
-Jason M. Felice writes:
+Jason M. Felice wrote:
This patch adds the --link-by-hash=DIR option, which hard links received
files in a link farm arranged by MD4 file hash. The result is that the system
will only store one copy of the unique contents of each file, regardless of
the file's name.
+To use this patch, run these commands for a successful build:
+
+ patch -p1 <patches/link-by-hash.diff
+ ./prepare-source
+ ./configure
+ make
--- old/Makefile.in
+++ new/Makefile.in
-@@ -34,7 +34,7 @@ OBJS1=rsync.o generator.o receiver.o cle
- main.o checksum.o match.o syscall.o log.o backup.o
- OBJS2=options.o flist.o io.o compat.o hlink.o token.o uidlist.o socket.o \
- fileio.o batch.o clientname.o chmod.o
+@@ -35,7 +35,7 @@ OBJS1=flist.o rsync.o generator.o receiv
+ util.o main.o checksum.o match.o syscall.o log.o backup.o
+ OBJS2=options.o io.o compat.o hlink.o token.o uidlist.o socket.o \
+ fileio.o batch.o clientname.o chmod.o acls.o
-OBJS3=progress.o pipe.o
+OBJS3=progress.o pipe.o hashlink.o
DAEMON_OBJ = params.o loadparm.o clientserver.o access.o connection.o authenticate.o
popt_OBJS=popt/findme.o popt/popt.o popt/poptconfig.o \
popt/popthelp.o popt/poptparse.o
+--- old/flist.c
++++ new/flist.c
+@@ -60,6 +60,7 @@ extern int copy_unsafe_links;
+ extern int protocol_version;
+ extern int sanitize_paths;
+ extern struct stats stats;
++extern char *link_by_hash_dir;
+
+ extern char curr_dir[MAXPATHLEN];
+
+@@ -747,7 +748,7 @@ static struct file_struct *recv_file_ent
+ extra_len += (S_ISDIR(mode) ? 2 : 1) * EXTRA_LEN;
+ #endif
+
+- if (always_checksum && S_ISREG(mode))
++ if ((always_checksum || link_by_hash_dir) && S_ISREG(mode))
+ extra_len += SUM_EXTRA_CNT * EXTRA_LEN;
+
+ if (file_length > 0xFFFFFFFFu && S_ISREG(mode))
--- old/hashlink.c
+++ new/hashlink.c
-@@ -0,0 +1,339 @@
+@@ -0,0 +1,336 @@
+/*
+ Copyright (C) Cronosys, LLC 2004
+
+
+extern char *link_by_hash_dir;
+
-+#if HAVE_LINK
++#ifdef HAVE_LINK
+
-+char* make_hash_name(struct file_struct *file)
++char *make_hash_name(struct file_struct *file)
+{
+ char hash[33], *dst;
-+ unsigned char *src;
-+ unsigned char c;
++ uchar c, *src = (uchar*)F_SUM(file);
+ int i;
+
-+ src = (unsigned char*)file->u.sum;
+ for (dst = hash, i = 0; i < 4; i++, src++) {
+ c = *src >> 4;
+ *(dst++) = (c >= 10) ? (c - 10 + 'a') : (c + '0');
+ char *linkname;
+ long last_fnbr;
+
-+ if (file->length == 0)
++ if (F_LENGTH(file) == 0)
+ return robust_rename(fnametmp, fname, NULL, 0644);
+
+ if (do_stat(hashname, &st) == -1) {
+ free(hashname);
+ return rc;
+}
-+
+#endif
--- old/options.c
+++ new/options.c
+char *link_by_hash_dir = NULL;
char *config_file = NULL;
char *shell_cmd = NULL;
- char *log_format = NULL;
-@@ -338,6 +339,7 @@ void usage(enum logcode F)
+ char *logfile_name = NULL;
+@@ -355,6 +356,7 @@ void usage(enum logcode F)
rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n");
rprintf(F," --copy-dest=DIR ... and include copies of unchanged files\n");
rprintf(F," --link-dest=DIR hardlink to files in DIR when unchanged\n");
rprintf(F," -z, --compress compress file data during the transfer\n");
rprintf(F," --compress-level=NUM explicitly set compression level\n");
rprintf(F," -C, --cvs-exclude auto-ignore files the same way CVS does\n");
-@@ -384,7 +386,7 @@ enum {OPT_VERSION = 1000, OPT_DAEMON, OP
+@@ -404,7 +406,7 @@ enum {OPT_VERSION = 1000, OPT_DAEMON, OP
OPT_FILTER, OPT_COMPARE_DEST, OPT_COPY_DEST, OPT_LINK_DEST, OPT_HELP,
OPT_INCLUDE, OPT_INCLUDE_FROM, OPT_MODIFY_WINDOW, OPT_MIN_SIZE, OPT_CHMOD,
OPT_READ_BATCH, OPT_WRITE_BATCH, OPT_ONLY_WRITE_BATCH, OPT_MAX_SIZE,
OPT_SERVER, OPT_REFUSED_BASE = 9000};
static struct poptOption long_options[] = {
-@@ -478,6 +480,7 @@ static struct poptOption long_options[]
+@@ -513,6 +515,7 @@ static struct poptOption long_options[]
{"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 },
{"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
{"link-dest", 0, POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 },
{"fuzzy", 'y', POPT_ARG_NONE, &fuzzy_basis, 0, 0, 0 },
{"compress", 'z', POPT_ARG_NONE, 0, 'z', 0, 0 },
{"compress-level", 0, POPT_ARG_INT, &def_compress_level, 'z', 0, 0 },
-@@ -1062,6 +1065,21 @@ int parse_arguments(int *argc, const cha
- usage(FINFO);
- exit_cleanup(0);
+@@ -1127,6 +1130,21 @@ int parse_arguments(int *argc, const cha
+ #endif
+
+ case OPT_LINK_BY_HASH:
-+#if HAVE_LINK
++#ifdef HAVE_LINK
+ arg = poptGetOptArg(pc);
+ if (sanitize_paths)
-+ arg = sanitize_path(NULL, arg, NULL, 0);
++ arg = sanitize_path(NULL, arg, NULL, 0, NULL);
+ link_by_hash_dir = (char *)arg;
+ break;
+#else
default:
/* A large opt value means that set_refuse_options()
* turned this option off. */
-@@ -1710,6 +1728,11 @@ void server_options(char **args,int *arg
+@@ -1789,6 +1807,11 @@ void server_options(char **args,int *arg
}
}
args[ac++] = "--files-from";
--- old/receiver.c
+++ new/receiver.c
-@@ -54,6 +54,7 @@ extern int delay_updates;
- extern struct stats stats;
- extern char *log_format;
- extern char *tmpdir;
-+extern char *link_by_hash_dir;
- extern char *partial_dir;
- extern char *basis_dir[];
- extern struct file_list *the_file_list;
-@@ -125,12 +126,13 @@ static int get_tmpname(char *fnametmp, c
+@@ -123,12 +123,14 @@ int get_tmpname(char *fnametmp, char *fn
static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r,
-- char *fname, int fd, OFF_T total_size)
-+ char *fname, int fd, OFF_T total_size, char *md4)
+- const char *fname, int fd, OFF_T total_size)
++ const char *fname, int fd, OFF_T total_size,
++ const char *md4)
{
- static char file_sum1[MD4_SUM_LENGTH];
- static char file_sum2[MD4_SUM_LENGTH];
+ static char file_sum1[MAX_DIGEST_LEN];
+ static char file_sum2[MAX_DIGEST_LEN];
struct map_struct *mapbuf;
struct sum_struct sum;
-+ struct mdfour mdfour_data;
- int32 len;
++ md_context mdfour_data;
+ int32 len, sum_len;
OFF_T offset = 0;
OFF_T offset2;
-@@ -150,6 +152,9 @@ static int receive_data(int f_in, char *
+@@ -148,6 +150,9 @@ static int receive_data(int f_in, char *
} else
mapbuf = NULL;
+
sum_init(checksum_seed);
- if (append_mode) {
-@@ -192,6 +197,8 @@ static int receive_data(int f_in, char *
+ if (append_mode > 0) {
+@@ -190,6 +195,8 @@ static int receive_data(int f_in, char *
cleanup_got_literal = 1;
sum_update(data, i);
if (fd != -1 && write_file(fd,data,i) != i)
goto report_write_error;
-@@ -218,6 +225,8 @@ static int receive_data(int f_in, char *
+@@ -216,6 +223,8 @@ static int receive_data(int f_in, char *
see_token(map, len);
sum_update(map, len);
+ mdfour_update(&mdfour_data, (uchar*)map, len);
}
- if (inplace) {
+ if (updating_basis) {
@@ -258,6 +267,8 @@ static int receive_data(int f_in, char *
}
- sum_end(file_sum1);
+ sum_len = sum_end(file_sum1);
+ if (md4)
-+ mdfour_result(&mdfour_data, (unsigned char*)md4);
++ mdfour_result(&mdfour_data, (uchar*)md4);
if (mapbuf)
unmap_file(mapbuf);
+ receive_data(f_in, NULL, -1, 0, NULL, -1, length, NULL);
}
- static void handle_delayed_updates(struct file_list *flist, char *local_name)
-@@ -605,8 +616,12 @@ int recv_files(int f_in, struct file_lis
- rprintf(FINFO, "%s\n", fname);
+ static void handle_delayed_updates(char *local_name)
+@@ -635,7 +646,7 @@ int recv_files(int f_in, char *local_nam
/* recv file data */
-+#if HAVE_LINK
-+ if (link_by_hash_dir)
-+ file->u.sum = new_array(char, MD4_SUM_LENGTH);
-+#endif
recv_ok = receive_data(f_in, fnamecmp, fd1, st.st_size,
-- fname, fd2, file->length);
-+ fname, fd2, file->length, file->u.sum);
+- fname, fd2, F_LENGTH(file));
++ fname, fd2, F_LENGTH(file), F_SUM(file));
+
+ log_item(log_code, file, &initial_stats, iflags, NULL);
- if (!log_before_transfer)
- log_item(file, &initial_stats, iflags, NULL);
--- old/rsync.c
+++ new/rsync.c
@@ -50,6 +50,7 @@ extern int inplace;
+ extern int flist_eof;
extern int keep_dirlinks;
extern int make_backups;
- extern struct stats stats;
+extern char *link_by_hash_dir;
+ extern struct file_list *cur_flist, *first_flist, *dir_flist;
+ extern struct chmod_mode_struct *daemon_chmod_modes;
- #if defined HAVE_ICONV_OPEN && defined HAVE_ICONV_H
- iconv_t ic_chck = (iconv_t)-1;
-@@ -266,8 +267,15 @@ void finish_transfer(char *fname, char *
+@@ -401,8 +402,15 @@ void finish_transfer(char *fname, char *
/* move tmp file over real file */
if (verbose > 2)
rprintf(FINFO, "renaming %s to %s\n", fnametmp, fname);
- ret = robust_rename(fnametmp, fname, partialptr,
- file->mode & INITACCESSPERMS);
-+#if HAVE_LINK
++#ifdef HAVE_LINK
+ if (link_by_hash_dir)
+ ret = link_by_hash(fnametmp, fname, file);
+ else
ret == -2 ? "copy" : "rename",
--- old/rsync.h
+++ new/rsync.h
-@@ -640,6 +640,14 @@ struct stats {
+@@ -756,6 +756,14 @@ struct stats {
int current_file_index;
};
+
struct chmod_mode_struct;
- #include "byteorder.h"
+ #define EMPTY_ITEM_LIST {NULL, 0, 0}
--- old/rsync.yo
+++ new/rsync.yo
-@@ -361,6 +361,7 @@ to the detailed description below for a
+@@ -368,6 +368,7 @@ to the detailed description below for a
--compare-dest=DIR also compare received files relative to DIR
--copy-dest=DIR ... and include copies of unchanged files
--link-dest=DIR hardlink to files in DIR when unchanged