I have benchmarked this a little, and it appears to slow things down
for a local copy, so the old algorithm is used for local copies.
---- orig/flist.c 2005-11-10 16:58:36
-+++ flist.c 2005-09-16 16:41:25
-@@ -36,6 +36,7 @@ extern int am_daemon;
+To use this patch, run these commands for a successful build:
+
+ patch -p1 <patches/early-checksum.diff
+ ./configure (optional if already run)
+ make
+
+--- old/flist.c
++++ new/flist.c
+@@ -31,6 +31,7 @@ extern int am_daemon;
extern int am_sender;
extern int do_progress;
extern int always_checksum;
extern int module_id;
extern int ignore_errors;
extern int numeric_ids;
-@@ -707,6 +708,16 @@ static struct file_struct *receive_file_
+@@ -697,6 +698,16 @@ static struct file_struct *receive_file_
sum = empty_sum;
}
read_buf(f, sum, checksum_len);
+ if (pre_checksum) {
+ char sum2[MD4_SUM_LENGTH];
+ STRUCT_STAT st;
-+ char *fname = f_name(file);
++ char *fname = f_name(file, NULL);
+ if (stat(fname, &st) == 0 && st.st_size == file_length) {
+ file_checksum(fname, sum2, st.st_size);
+ if (memcmp(sum, sum2, checksum_len) != 0)
+ }
}
- if (!preserve_perms) {
---- orig/generator.c 2005-11-10 16:58:36
-+++ generator.c 2005-11-10 03:16:39
-@@ -69,6 +69,7 @@ extern int ignore_timeout;
+ return file;
+--- old/generator.c
++++ new/generator.c
+@@ -70,6 +70,7 @@ extern int ignore_timeout;
extern int protocol_version;
extern int fuzzy_basis;
extern int always_checksum;
extern int checksum_len;
extern char *partial_dir;
extern char *basis_dir[];
-@@ -375,7 +376,8 @@ void itemize(struct file_struct *file, i
+@@ -376,7 +377,8 @@ void itemize(struct file_struct *file, i
/* Perform our quick-check heuristic for determining if a file is unchanged. */
--static int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st)
-+static int unchanged_file(char *fn, int fnamecmp_type, struct file_struct *file,
-+ STRUCT_STAT *st)
+-int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st)
++int unchanged_file(char *fn, int fnamecmp_type, struct file_struct *file,
++ STRUCT_STAT *st)
{
if (st->st_size != file->length)
return 0;
-@@ -384,6 +386,8 @@ static int unchanged_file(char *fn, stru
+@@ -385,6 +387,8 @@ int unchanged_file(char *fn, struct file
of the file time to determine whether to sync */
if (always_checksum && S_ISREG(st->st_mode)) {
char sum[MD4_SUM_LENGTH];
file_checksum(fn, sum, st->st_size);
return memcmp(sum, file->u.sum, checksum_len) == 0;
}
-@@ -940,7 +944,7 @@ static void recv_generator(char *fname,
- match_level = 1;
- /* FALL THROUGH */
- case 1:
-- if (!unchanged_file(fnamecmpbuf, file, &st))
-+ if (!unchanged_file(fnamecmpbuf, 0, file, &st))
- continue;
- best_match = i;
- match_level = 2;
-@@ -1068,7 +1072,7 @@ static void recv_generator(char *fname,
+@@ -622,7 +626,7 @@ static int try_dests_reg(struct file_str
+ match_level = 1;
+ /* FALL THROUGH */
+ case 1:
+- if (!unchanged_file(cmpbuf, file, stp))
++ if (!unchanged_file(cmpbuf, 0, file, stp))
+ continue;
+ best_match = j;
+ match_level = 2;
+@@ -1215,7 +1219,7 @@ static void recv_generator(char *fname,
;
else if (fnamecmp_type == FNAMECMP_FUZZY)
;
if (partialptr) {
do_unlink(partialptr);
handle_partial_dir(partialptr, PDIR_DELETE);
---- orig/main.c 2005-11-10 16:58:36
-+++ main.c 2005-11-07 05:00:49
-@@ -45,6 +45,7 @@ extern int copy_links;
+--- old/hlink.c
++++ new/hlink.c
+@@ -224,7 +224,7 @@ int hard_link_check(struct file_struct *
+ }
+ break;
+ }
+- if (!unchanged_file(cmpbuf, file, &st3))
++ if (!unchanged_file(cmpbuf, 0, file, &st3))
+ continue;
+ statret = 1;
+ st = &st3;
+--- old/main.c
++++ new/main.c
+@@ -47,6 +47,7 @@ extern int copy_dirlinks;
extern int keep_dirlinks;
extern int preserve_hard_links;
extern int protocol_version;
+extern int always_checksum;
extern int recurse;
extern int relative_paths;
- extern int rsync_port;
-@@ -60,8 +61,10 @@ extern char *filesfrom_host;
- extern char *rsync_path;
- extern char *shell_cmd;
- extern char *batch_name;
-+extern char curr_dir[MAXPATHLEN];
+ extern int sanitize_paths;
+@@ -71,6 +72,9 @@ extern char *batch_name;
+ extern char curr_dir[MAXPATHLEN];
+ extern struct filter_list_struct server_filter_list;
- int local_server = 0;
++extern char curr_dir[MAXPATHLEN];
++
+int pre_checksum = 0;
- struct file_list *the_file_list;
-
- /* There's probably never more than at most 2 outstanding child processes,
-@@ -633,6 +636,7 @@ static void do_server_recv(int f_in, int
+ int local_server = 0;
+ int new_root_dir = 0;
+ mode_t orig_umask = 0;
+@@ -784,6 +788,7 @@ static void do_server_recv(int f_in, int
struct file_list *flist;
char *local_name = NULL;
char *dir = NULL;
int save_verbose = verbose;
if (filesfrom_fd >= 0) {
-@@ -676,6 +680,10 @@ static void do_server_recv(int f_in, int
+@@ -827,6 +832,10 @@ static void do_server_recv(int f_in, int
filesfrom_fd = -1;
}
+ strlcpy(olddir, curr_dir, sizeof olddir);
+ if (always_checksum && !local_server && argc > 0)
-+ pre_checksum = push_dir(argv[0]);
++ pre_checksum = push_dir(argv[0], 0);
+
flist = recv_file_list(f_in);
verbose = save_verbose;
if (!flist) {
-@@ -684,6 +692,9 @@ static void do_server_recv(int f_in, int
+@@ -835,6 +844,9 @@ static void do_server_recv(int f_in, int
}
the_file_list = flist;
if (argc > 0)
local_name = get_local_name(flist,argv[0]);
-@@ -732,6 +743,7 @@ int client_run(int f_in, int f_out, pid_
+@@ -916,6 +928,7 @@ int client_run(int f_in, int f_out, pid_
{
struct file_list *flist = NULL;
int exit_code = 0, exit_code2 = 0;
char *local_name = NULL;
cleanup_child_pid = pid;
-@@ -803,11 +815,18 @@ int client_run(int f_in, int f_out, pid_
+@@ -990,11 +1003,18 @@ int client_run(int f_in, int f_out, pid_
filesfrom_fd = -1;
}
+ strlcpy(olddir, curr_dir, sizeof olddir);
+ if (always_checksum && !local_server)
-+ pre_checksum = push_dir(argv[0]);
++ pre_checksum = push_dir(argv[0], 0);
+
if (write_batch && !am_server)
start_write_batch(f_in);
if (flist && flist->count > 0) {
local_name = get_local_name(flist, argv[0]);
---- orig/rsync.h 2005-10-14 18:45:50
-+++ rsync.h 2005-09-16 16:41:26
-@@ -64,6 +64,7 @@
- #define FLAG_DEL_HERE (1<<3) /* receiver/generator */
- #define FLAG_SENT (1<<3) /* sender */
+--- old/rsync.h
++++ new/rsync.h
+@@ -65,6 +65,7 @@
#define FLAG_HLINK_TOL (1<<4) /* receiver/generator */
-+#define FLAG_SUM_DIFFERS (1<<5) /* receiver/generator */
+ #define FLAG_NO_FUZZY (1<<5) /* generator */
+ #define FLAG_MISSING (1<<6) /* generator */
++#define FLAG_SUM_DIFFERS (1<<7) /* receiver/generator */
/* update this if you make incompatible changes */
#define PROTOCOL_VERSION 29