-To: rsync@lists.samba.org
-From: "Jason M. Felice" <jfelice@cronosys.com>
-Subject: [patch] Add `--link-by-hash' option (rev 5).
-Date: Mon, 23 Feb 2004 13:29:08 -0500
+After applying this patch and running configure, you MUST run this
+command before "make":
+
+ make proto
+
+Jason M. Felice writes:
This patch adds the --link-by-hash=DIR option, which hard links received
files in a link farm arranged by MD4 file hash. The result is that the system
will only store one copy of the unique contents of each file, regardless of
the file's name.
-(rev 5)
-* Fixed silly logic error.
-
-(rev 4)
-* Updated for committed robust_rename() patch, other changes in CVS.
-
-(rev 3)
-* Don't link empty files.
-* Roll over to new file when filesystem maximum link count is reached.
-* If link fails for another reason, leave non-linked file there.
-* Depends on rsync-rename.diff
-
-(rev 2)
-* This revision is actually against CVS HEAD (I didn't realize I was working
- from a stale rsync'd CVS).
-* Apply permissions after linking (permissions were lost if we already had
- a copy of the file in the link farm).
-
-Patch Summary:
- -1 +1 Makefile.in
- -0 +351 hashlink.c (new)
- -1 +22 options.c
- -0 +6 proto.h
- -6 +21 receiver.c
- -2 +8 rsync.c
- -0 +8 rsync.h
-
---- hashlink.c 1969-12-31 19:00:00.000000000 -0500
-+++ hashlink.c 2004-02-23 10:30:45.000000000 -0500
-@@ -0,0 +1,351 @@
+--- orig/Makefile.in 2004-07-04 08:59:17
++++ Makefile.in 2004-07-03 20:20:15
+@@ -35,7 +35,7 @@ OBJS1=rsync.o generator.o receiver.o cle
+ main.o checksum.o match.o syscall.o log.o backup.o
+ OBJS2=options.o flist.o io.o compat.o hlink.o token.o uidlist.o socket.o \
+ fileio.o batch.o clientname.o
+-OBJS3=progress.o pipe.o
++OBJS3=progress.o pipe.o hashlink.o
+ DAEMON_OBJ = params.o loadparm.o clientserver.o access.o connection.o authenticate.o
+ popt_OBJS=popt/findme.o popt/popt.o popt/poptconfig.o \
+ popt/popthelp.o popt/poptparse.o
+--- orig/hashlink.c 2004-07-02 21:41:54
++++ hashlink.c 2004-07-02 21:41:54
+@@ -0,0 +1,342 @@
+/*
+ Copyright (C) Cronosys, LLC 2004
+
+ /* Build a list of potential candidates and open
+ * them. */
+ if ((d = opendir(hashname)) == NULL) {
-+ rprintf(FERROR,"opendir \"%s\": %s\n",
-+ hashname, strerror(errno));
++ rsyserr(FERROR, errno, "opendir \"%s\"", hashname);
+ free(hashname);
+ return NULL;
+ }
+ asprintf(&hashfile->name,"%s/%s",hashname,
+ di->d_name);
+ if (do_stat(hashfile->name,&st) == -1) {
-+ rprintf(FERROR,"%s: %s", hashfile->name,
-+ strerror(errno));
++ rsyserr(FERROR, errno, "%s: %s", hashfile->name);
+ kill_hashfile(hashfile);
+ continue;
+ }
+ hashfile->nlink = st.st_nlink;
+ hashfile->fd = open(hashfile->name,O_RDONLY|O_BINARY);
+ if (hashfile->fd == -1) {
-+ rprintf(FERROR,"%s: %s\n", hashfile->name,
-+ strerror(errno));
++ rsyserr(FERROR, errno, "%s", hashfile->name);
+ kill_hashfile(hashfile);
+ continue;
+ }
+ }
+
+ if (amt == -1) {
-+ rprintf(FERROR,"%s",strerror(errno));
++ rsyserr(FERROR, errno, "%s");
+ kill_hashfiles(files);
+ return NULL;
+ }
+ dirname = strdup(hashname);
+ *strrchr(dirname,'/') = 0;
+ if (do_mkdir(dirname, 0755) == -1 && errno != EEXIST) {
-+ rprintf(FERROR, "mkdir %s: %s\n", dirname,
-+ strerror(errno));
++ rsyserr(FERROR, errno, "mkdir %s", dirname);
+ free(hashname);
+ free(dirname);
+ return robust_rename(fnametmp,fname,0644);
+ free(dirname);
+
+ if (do_mkdir(hashname, 0755) == -1 && errno != EEXIST) {
-+ rprintf(FERROR, "mkdir %s: %s\n", hashname,
-+ strerror(errno));
++ rsyserr(FERROR, errno, "mkdir %s", hashname);
+ free(hashname);
+ return robust_rename(fnametmp,fname,0644);
+ }
+ int fd;
+
+ if (do_stat(fnametmp,&st) == -1) {
-+ rprintf(FERROR,"%s: %s\n",fname,strerror(errno));
++ rsyserr(FERROR, errno, "%s", fname);
+ return -1;
+ }
+ hashfiles = find_hashfiles(hashname, st.st_size, &last_fnbr);
+
+ /* Search for one identical to us. */
+ if ((fd = open(fnametmp,O_RDONLY|O_BINARY)) == -1) {
-+ rprintf(FERROR,"%s: %s\n",fnametmp,
-+ strerror(errno));
++ rsyserr(FERROR, errno, "%s", fnametmp);
+ kill_hashfiles(hashfiles);
+ return -1;
+ }
+ rprintf(FINFO, "(5) linkname = %s\n", linkname);
+ rprintf(FINFO,"link-by-hash: max link count exceeded, starting new file \"%s\".\n", linkname);
+ } else {
-+ rprintf(FERROR,"link \"%s\" -> %s: %s\n",
-+ linkname,full_fname(fname),
-+ strerror(errno));
++ rsyserr(FERROR, errno, "link \"%s\" -> \"%s\"",
++ linkname, full_fname(fname));
+ robust_unlink(fname);
+ rc = robust_rename(fnametmp,fname,0644);
+ }
+
+ rc = robust_rename(fnametmp,fname,0644);
+ if (rc != 0) {
-+ rprintf(FERROR,"rename \"%s\" -> \"%s\": %s\n",
-+ full_fname(fnametmp),full_fname(fname),
-+ strerror(errno));
++ rsyserr(FERROR, errno, "rename \"%s\" -> \"%s\"",
++ full_fname(fnametmp), full_fname(fname));
+ }
+ rc = do_link(fname,linkname);
+ if (rc != 0) {
-+ rprintf(FERROR,"link \"%s\" -> \"%s\": %s\n",
-+ full_fname(fname),linkname,
-+ strerror(errno));
++ rsyserr(FERROR, errno, "link \"%s\" -> \"%s\"",
++ full_fname(fname), linkname);
+ }
+ }
+
+}
+
+#endif
---- Makefile.in 10 Feb 2004 17:06:11 -0000 1.98
-+++ Makefile.in 15 Apr 2004 19:18:59 -0000
-@@ -35,7 +35,7 @@ OBJS1=rsync.o generator.o receiver.o cle
- main.o checksum.o match.o syscall.o log.o backup.o
- OBJS2=options.o flist.o io.o compat.o hlink.o token.o uidlist.o socket.o \
- fileio.o batch.o clientname.o
--OBJS3=progress.o pipe.o
-+OBJS3=progress.o pipe.o hashlink.o
- DAEMON_OBJ = params.o loadparm.o clientserver.o access.o connection.o authenticate.o
- popt_OBJS=popt/findme.o popt/popt.o popt/poptconfig.o \
- popt/popthelp.o popt/poptparse.o
---- options.c 14 Apr 2004 23:33:34 -0000 1.146
-+++ options.c 15 Apr 2004 19:19:00 -0000
-@@ -121,6 +121,7 @@ char *log_format = NULL;
+--- orig/options.c 2004-07-16 20:07:22
++++ options.c 2004-07-03 20:20:15
+@@ -125,6 +125,7 @@ char *log_format = NULL;
char *password_file = NULL;
char *rsync_path = RSYNC_PATH;
char *backup_dir = NULL;
char backup_dir_buf[MAXPATHLEN];
int rsync_port = RSYNC_PORT;
int link_dest = 0;
-@@ -266,6 +267,7 @@ void usage(enum logcode F)
+@@ -276,6 +277,7 @@ void usage(enum logcode F)
rprintf(F," -T --temp-dir=DIR create temporary files in directory DIR\n");
rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n");
rprintf(F," --link-dest=DIR create hardlinks to DIR for unchanged files\n");
rprintf(F," -P equivalent to --partial --progress\n");
rprintf(F," -z, --compress compress file data\n");
rprintf(F," -C, --cvs-exclude auto ignore files in the same way CVS does\n");
-@@ -305,7 +307,7 @@ void usage(enum logcode F)
+@@ -316,7 +318,7 @@ void usage(enum logcode F)
enum {OPT_VERSION = 1000, OPT_SENDER, OPT_EXCLUDE, OPT_EXCLUDE_FROM,
OPT_DELETE_AFTER, OPT_DELETE_EXCLUDED, OPT_LINK_DEST,
OPT_INCLUDE, OPT_INCLUDE_FROM, OPT_MODIFY_WINDOW,
-- OPT_READ_BATCH, OPT_WRITE_BATCH,
-+ OPT_READ_BATCH, OPT_WRITE_BATCH, OPT_LINK_BY_HASH,
+- OPT_READ_BATCH, OPT_WRITE_BATCH, OPT_TIMEOUT,
++ OPT_READ_BATCH, OPT_WRITE_BATCH, OPT_TIMEOUT, OPT_LINK_BY_HASH,
OPT_REFUSED_BASE = 9000};
static struct poptOption long_options[] = {
-@@ -362,6 +364,7 @@ static struct poptOption long_options[]
+@@ -375,6 +377,7 @@ static struct poptOption long_options[]
{"temp-dir", 'T', POPT_ARG_STRING, &tmpdir, 0, 0, 0 },
{"compare-dest", 0, POPT_ARG_STRING, &compare_dest, 0, 0, 0 },
{"link-dest", 0, POPT_ARG_STRING, &compare_dest, OPT_LINK_DEST, 0, 0 },
/* TODO: Should this take an optional int giving the compression level? */
{"compress", 'z', POPT_ARG_NONE, &do_compression, 0, 0, 0 },
{"daemon", 0, POPT_ARG_NONE, &daemon_opt, 0, 0, 0 },
-@@ -584,6 +587,19 @@ int parse_arguments(int *argc, const cha
+@@ -601,6 +604,19 @@ int parse_arguments(int *argc, const cha
return 0;
#endif
default:
/* A large opt value means that set_refuse_options()
* turned this option off (opt-BASE is its index). */
-@@ -951,6 +967,11 @@ void server_options(char **args,int *arg
- */
- args[ac++] = link_dest ? "--link-dest" : "--compare-dest";
+@@ -1007,6 +1023,11 @@ void server_options(char **args,int *arg
args[ac++] = compare_dest;
-+ }
-+
+ }
+
+ if (link_by_hash_dir && am_sender) {
+ args[ac++] = "--link-by-hash";
+ args[ac++] = link_by_hash_dir;
- }
-
++ }
++
if (files_from && (!am_sender || remote_filesfrom_file)) {
---- proto.h 14 Apr 2004 23:33:30 -0000 1.188
-+++ proto.h 15 Apr 2004 19:19:00 -0000
-@@ -91,6 +91,12 @@ char *f_name(struct file_struct *f);
- void write_sum_head(int f, struct sum_struct *sum);
- void recv_generator(char *fname, struct file_struct *file, int i, int f_out);
- void generate_files(int f, struct file_list *flist, char *local_name);
-+char* make_hash_name(struct file_struct *file);
-+void kill_hashfile(struct hashfile_struct *hashfile);
-+void kill_hashfiles(struct hashfile_struct *hashfiles);
-+struct hashfile_struct *find_hashfiles(char *hashname, int64 size, long *fnbr);
-+struct hashfile_struct *compare_hashfiles(int fd,struct hashfile_struct *files);
-+int link_by_hash(char *fnametmp,char *fname,struct file_struct *file);
- void init_hard_links(struct file_list *flist);
- int hard_link_check(struct file_struct *file, int skip);
- void do_hard_links(void);
---- receiver.c 23 Mar 2004 16:50:40 -0000 1.75
-+++ receiver.c 15 Apr 2004 19:19:00 -0000
-@@ -45,6 +45,7 @@ extern int cleanup_got_literal;
- extern int module_id;
- extern int ignore_errors;
- extern int orig_umask;
+ if (remote_filesfrom_file) {
+ args[ac++] = "--files-from";
+--- orig/receiver.c 2004-07-16 20:07:22
++++ receiver.c 2004-07-16 20:16:37
+@@ -37,6 +37,7 @@ extern int cvs_exclude;
+ extern int io_error;
+ extern char *tmpdir;
+ extern char *compare_dest;
+extern char *link_by_hash_dir;
-
- static void delete_one(char *fn, int is_dir)
- {
-@@ -190,10 +191,11 @@ static int get_tmpname(char *fnametmp, c
+ extern int make_backups;
+ extern int do_progress;
+ extern char *backup_dir;
+@@ -197,10 +198,11 @@ static int get_tmpname(char *fnametmp, c
static int receive_data(int f_in,struct map_struct *mapbuf,int fd,char *fname,
unsigned int len;
OFF_T offset = 0;
OFF_T offset2;
-@@ -203,7 +205,9 @@ static int receive_data(int f_in,struct
- char *map=NULL;
+@@ -211,6 +213,9 @@ static int receive_data(int f_in,struct
read_sum_head(f_in, &sum);
--
+
+ if (md4)
+ mdfour_begin(&mdfour_data);
-+
- sum_init();
++
+ sum_init(checksum_seed);
while ((i = recv_token(f_in, &data)) != 0) {
-@@ -220,6 +224,8 @@ static int receive_data(int f_in,struct
+@@ -227,6 +232,8 @@ static int receive_data(int f_in,struct
cleanup_got_literal = 1;
sum_update(data,i);
+ mdfour_update(&mdfour_data,data,i);
if (fd != -1 && write_file(fd,data,i) != i) {
- rprintf(FERROR, "write failed on %s: %s\n",
-@@ -247,6 +253,8 @@ static int receive_data(int f_in,struct
+ rsyserr(FERROR, errno, "write failed on %s",
+@@ -254,6 +261,8 @@ static int receive_data(int f_in,struct
see_token(map, len);
sum_update(map,len);
+ mdfour_update(&mdfour_data,map,len);
}
- if (fd != -1 && write_file(fd,map,len) != (int) len) {
-@@ -269,6 +277,8 @@ static int receive_data(int f_in,struct
+ if (!inplace || offset != offset2) {
+@@ -290,6 +299,8 @@ static int receive_data(int f_in,struct
}
sum_end(file_sum1);
+ mdfour_result(&mdfour_data, (unsigned char*)md4);
read_buf(f_in,file_sum2,MD4_SUM_LENGTH);
- if (verbose > 2) {
-@@ -372,7 +382,7 @@ int recv_files(int f_in,struct file_list
+ if (verbose > 2)
+@@ -389,7 +400,7 @@ int recv_files(int f_in, struct file_lis
if (fd1 != -1 && do_fstat(fd1,&st) != 0) {
- rprintf(FERROR, "fstat %s failed: %s\n",
- full_fname(fnamecmp), strerror(errno));
+ rsyserr(FERROR, errno, "fstat %s failed",
+ full_fname(fnamecmp));
- receive_data(f_in,NULL,-1,NULL,file->length);
+ receive_data(f_in,NULL,-1,NULL,file->length,NULL);
close(fd1);
continue;
}
-@@ -385,7 +395,7 @@ int recv_files(int f_in,struct file_list
+@@ -402,7 +413,7 @@ int recv_files(int f_in, struct file_lis
*/
rprintf(FERROR,"recv_files: %s is a directory\n",
full_fname(fnamecmp));
close(fd1);
continue;
}
-@@ -437,7 +447,7 @@ int recv_files(int f_in,struct file_list
- if (fd2 == -1) {
- rprintf(FERROR, "mkstemp %s failed: %s\n",
- full_fname(fnametmp), strerror(errno));
-- receive_data(f_in,mapbuf,-1,NULL,file->length);
-+ receive_data(f_in,mapbuf,-1,NULL,file->length,NULL);
- if (mapbuf) unmap_file(mapbuf);
- if (fd1 != -1) close(fd1);
- continue;
-@@ -450,7 +460,12 @@ int recv_files(int f_in,struct file_list
- }
+@@ -435,7 +446,7 @@ int recv_files(int f_in, struct file_lis
+ if (fd2 == -1) {
+ rsyserr(FERROR, errno, "open %s failed",
+ full_fname(fnamecmp));
+- receive_data(f_in,mapbuf,-1,NULL,file->length);
++ receive_data(f_in,mapbuf,-1,NULL,file->length,NULL);
+ if (mapbuf)
+ unmap_file(mapbuf);
+ if (fd1 != -1)
+@@ -472,7 +483,7 @@ int recv_files(int f_in, struct file_lis
+ if (fd2 == -1) {
+ rsyserr(FERROR, errno, "mkstemp %s failed",
+ full_fname(fnametmp));
+- receive_data(f_in,mapbuf,-1,NULL,file->length);
++ receive_data(f_in,mapbuf,-1,NULL,file->length,NULL);
+ if (mapbuf)
+ unmap_file(mapbuf);
+ if (fd1 != -1)
+@@ -487,7 +498,11 @@ int recv_files(int f_in, struct file_lis
+ rprintf(FINFO, "%s\n", fname);
/* recv file data */
- recv_ok = receive_data(f_in,mapbuf,fd2,fname,file->length);
+#ifdef HAVE_LINK
-+ if (link_by_hash_dir) {
-+ file->u.sum = (char*)malloc (MD4_SUM_LENGTH);
-+ }
++ if (link_by_hash_dir)
++ file->u.sum = (char*)malloc(MD4_SUM_LENGTH);
+#endif
+ recv_ok = receive_data(f_in,mapbuf,fd2,fname,file->length,file->u.sum);
log_recv(file, &initial_stats);
---- rsync.c 23 Mar 2004 16:16:15 -0000 1.135
-+++ rsync.c 15 Apr 2004 19:19:00 -0000
-@@ -33,6 +33,7 @@ extern int preserve_uid;
- extern int preserve_gid;
- extern int preserve_perms;
+--- orig/rsync.c 2004-07-16 20:07:23
++++ rsync.c 2004-07-16 20:16:53
+@@ -33,6 +33,7 @@ extern int preserve_gid;
+ extern int force_delete;
+ extern int recurse;
extern int make_backups;
+extern char *link_by_hash_dir;
+ extern char *backup_dir;
+ extern int inplace;
-
- /*
-@@ -235,8 +236,12 @@ void finish_transfer(char *fname, char *
- if (make_backups && !make_backup(fname))
- return;
-
-- /* move tmp file over real file */
-- ret = robust_rename(fnametmp, fname, file->mode & INITACCESSPERMS);
+@@ -250,6 +251,12 @@ void finish_transfer(char *fname, char *
+ /* move tmp file over real file */
+ if (verbose > 2)
+ rprintf(FINFO, "renaming %s to %s\n", fnametmp, fname);
+#ifdef HAVE_LINK
+ if (link_by_hash_dir)
-+ ret = link_by_hash(fnametmp,fname,file);
++ ret = link_by_hash(fnametmp, fname, file);
+ else
+#endif
+ ret = robust_rename(fnametmp, fname, file->mode & INITACCESSPERMS);
+ ret = robust_rename(fnametmp, fname, file->mode & INITACCESSPERMS);
if (ret < 0) {
- rprintf(FERROR, "%s %s -> \"%s\": %s\n",
- ret == -2 ? "copy" : "rename",
---- rsync.h 14 Apr 2004 23:33:37 -0000 1.196
-+++ rsync.h 15 Apr 2004 19:19:00 -0000
-@@ -518,6 +518,14 @@ struct stats {
+ rsyserr(FERROR, errno, "%s %s -> \"%s\"",
+--- orig/rsync.h 2004-07-16 20:07:23
++++ rsync.h 2004-07-03 20:20:15
+@@ -524,6 +524,14 @@ struct stats {
int current_file_index;
};