I have benchmarked this a little, and it appears to slow things down
for a local copy, so the old algorithm is used for local copies.
+To use this patch, run these commands for a successful build:
+
+ patch -p1 <patches/early-checksum.diff
+ ./configure (optional if already run)
+ make
+
--- old/flist.c
+++ new/flist.c
-@@ -36,6 +36,7 @@ extern int am_daemon;
- extern int am_sender;
+@@ -33,6 +33,7 @@ extern int am_sender;
+ extern int inc_recurse;
extern int do_progress;
extern int always_checksum;
+extern int pre_checksum;
extern int module_id;
extern int ignore_errors;
extern int numeric_ids;
-@@ -711,6 +712,16 @@ static struct file_struct *receive_file_
- sum = empty_sum;
- }
- read_buf(f, sum, checksum_len);
-+ if (pre_checksum) {
-+ char sum2[MD4_SUM_LENGTH];
+@@ -882,6 +883,15 @@ static struct file_struct *recv_file_ent
+ memcpy(bp, F_SUM(first), checksum_len);
+ } else
+ read_buf(f, bp, checksum_len);
++ if (pre_checksum && bp != tmp_sum) {
+ STRUCT_STAT st;
+ char *fname = f_name(file, NULL);
+ if (stat(fname, &st) == 0 && st.st_size == file_length) {
-+ file_checksum(fname, sum2, st.st_size);
-+ if (memcmp(sum, sum2, checksum_len) != 0)
++ file_checksum(fname, tmp_sum, st.st_size);
++ if (memcmp(bp, tmp_sum, checksum_len) != 0)
+ file->flags |= FLAG_SUM_DIFFERS;
+ }
+ }
}
- return file;
+ #ifdef SUPPORT_ACLS
--- old/generator.c
+++ new/generator.c
-@@ -72,6 +72,7 @@ extern int ignore_timeout;
- extern int protocol_version;
+@@ -73,6 +73,7 @@ extern int protocol_version;
+ extern int file_total;
extern int fuzzy_basis;
extern int always_checksum;
+extern int pre_checksum;
extern int checksum_len;
extern char *partial_dir;
extern char *basis_dir[];
-@@ -377,7 +378,8 @@ void itemize(struct file_struct *file, i
+@@ -590,7 +591,8 @@ void itemize(const char *fname, struct f
/* Perform our quick-check heuristic for determining if a file is unchanged. */
+int unchanged_file(char *fn, int fnamecmp_type, struct file_struct *file,
+ STRUCT_STAT *st)
{
- if (st->st_size != file->length)
+ if (st->st_size != F_LENGTH(file))
return 0;
-@@ -386,6 +388,8 @@ int unchanged_file(char *fn, struct file
+@@ -599,6 +601,8 @@ int unchanged_file(char *fn, struct file
of the file time to determine whether to sync */
- if (always_checksum && S_ISREG(st->st_mode)) {
- char sum[MD4_SUM_LENGTH];
+ if (always_checksum > 0 && S_ISREG(st->st_mode)) {
+ char sum[MAX_DIGEST_LEN];
+ if (pre_checksum && fnamecmp_type == FNAMECMP_FNAME)
+ return !(file->flags & FLAG_SUM_DIFFERS);
file_checksum(fn, sum, st->st_size);
- return memcmp(sum, file->u.sum, checksum_len) == 0;
+ return memcmp(sum, F_SUM(file), checksum_len) == 0;
}
-@@ -623,7 +627,7 @@ static int try_dests_reg(struct file_str
+@@ -819,7 +823,7 @@ static int try_dests_reg(struct file_str
match_level = 1;
/* FALL THROUGH */
case 1:
-- if (!unchanged_file(cmpbuf, file, stp))
-+ if (!unchanged_file(cmpbuf, 0, file, stp))
+- if (!unchanged_file(cmpbuf, file, &sxp->st))
++ if (!unchanged_file(cmpbuf, 0, file, &sxp->st))
continue;
best_match = j;
match_level = 2;
-@@ -1159,7 +1163,7 @@ static void recv_generator(char *fname,
+@@ -1524,7 +1528,7 @@ static void recv_generator(char *fname,
;
else if (fnamecmp_type == FNAMECMP_FUZZY)
;
-- else if (unchanged_file(fnamecmp, file, &st)) {
-+ else if (unchanged_file(fnamecmp, fnamecmp_type, file, &st)) {
+- else if (unchanged_file(fnamecmp, file, &sx.st)) {
++ else if (unchanged_file(fnamecmp, fnamecmp_type, file, &sx.st)) {
if (partialptr) {
do_unlink(partialptr);
handle_partial_dir(partialptr, PDIR_DELETE);
--- old/hlink.c
+++ new/hlink.c
-@@ -210,7 +210,7 @@ int hard_link_check(struct file_struct *
- itemizing = code = 0;
- break;
- }
-- if (!unchanged_file(cmpbuf, file, &st3))
-+ if (!unchanged_file(cmpbuf, 0, file, &st3))
- continue;
- statret = 1;
- st = &st3;
+@@ -384,7 +384,7 @@ int hard_link_check(struct file_struct *
+ }
+ break;
+ }
+- if (!unchanged_file(cmpbuf, file, &alt_sx.st))
++ if (!unchanged_file(cmpbuf, 0, file, &alt_sx.st))
+ continue;
+ statret = 1;
+ if (unchanged_attrs(cmpbuf, file, &alt_sx))
--- old/main.c
+++ new/main.c
-@@ -44,6 +44,7 @@ extern int copy_links;
+@@ -45,6 +45,7 @@ extern int module_id;
+ extern int copy_links;
+ extern int copy_dirlinks;
extern int keep_dirlinks;
++extern int always_checksum;
extern int preserve_hard_links;
extern int protocol_version;
-+extern int always_checksum;
- extern int recurse;
- extern int relative_paths;
- extern int rsync_port;
-@@ -59,7 +60,9 @@ extern char *filesfrom_host;
- extern char *rsync_path;
- extern char *shell_cmd;
- extern char *batch_name;
-+extern char curr_dir[MAXPATHLEN];
+ extern int file_total;
+@@ -72,6 +73,9 @@ extern char *password_file;
+ extern char curr_dir[MAXPATHLEN];
+ extern struct filter_list_struct server_filter_list;
++extern char curr_dir[MAXPATHLEN];
++
+int pre_checksum = 0;
int local_server = 0;
+ int new_root_dir = 0;
mode_t orig_umask = 0;
- struct file_list *the_file_list;
-@@ -717,6 +720,7 @@ static void do_server_recv(int f_in, int
+@@ -792,6 +796,7 @@ static void do_server_recv(int f_in, int
struct file_list *flist;
char *local_name = NULL;
char *dir = NULL;
int save_verbose = verbose;
if (filesfrom_fd >= 0) {
-@@ -760,6 +764,10 @@ static void do_server_recv(int f_in, int
+@@ -838,6 +843,10 @@ static void do_server_recv(int f_in, int
filesfrom_fd = -1;
}
+ strlcpy(olddir, curr_dir, sizeof olddir);
+ if (always_checksum && !local_server && argc > 0)
-+ pre_checksum = push_dir(argv[0]);
++ pre_checksum = push_dir(argv[0], 0);
+
flist = recv_file_list(f_in);
- verbose = save_verbose;
if (!flist) {
-@@ -768,6 +776,9 @@ static void do_server_recv(int f_in, int
- }
- the_file_list = flist;
+ rprintf(FERROR,"server_recv: recv_file_list error\n");
+@@ -847,6 +856,9 @@ static void do_server_recv(int f_in, int
+ recv_additional_file_list(f_in);
+ verbose = save_verbose;
+ if (pre_checksum)
+ pop_dir(olddir);
if (argc > 0)
local_name = get_local_name(flist,argv[0]);
-@@ -819,6 +830,7 @@ int client_run(int f_in, int f_out, pid_
+@@ -926,6 +938,7 @@ int client_run(int f_in, int f_out, pid_
{
struct file_list *flist = NULL;
int exit_code = 0, exit_code2 = 0;
char *local_name = NULL;
cleanup_child_pid = pid;
-@@ -893,11 +905,18 @@ int client_run(int f_in, int f_out, pid_
+@@ -1006,12 +1019,19 @@ int client_run(int f_in, int f_out, pid_
filesfrom_fd = -1;
}
+ strlcpy(olddir, curr_dir, sizeof olddir);
+ if (always_checksum && !local_server)
-+ pre_checksum = push_dir(argv[0]);
++ pre_checksum = push_dir(argv[0], 0);
+
if (write_batch && !am_server)
start_write_batch(f_in);
flist = recv_file_list(f_in);
- the_file_list = flist;
+ if (inc_recurse && file_total == 1)
+ recv_additional_file_list(f_in);
+ if (pre_checksum)
+ pop_dir(olddir);
--- old/rsync.h
+++ new/rsync.h
-@@ -64,6 +64,7 @@
- #define FLAG_DEL_HERE (1<<3) /* receiver/generator */
- #define FLAG_HLINK_TOL (1<<4) /* receiver/generator */
- #define FLAG_NO_FUZZY (1<<5) /* generator */
-+#define FLAG_SUM_DIFFERS (1<<6) /* receiver/generator */
+@@ -70,6 +70,7 @@
+ #define FLAG_HLINK_LAST (1<<7) /* receiver/generator */
+ #define FLAG_HLINK_DONE (1<<8) /* receiver/generator */
+ #define FLAG_LENGTH64 (1<<9) /* sender/receiver/generator */
++#define FLAG_SUM_DIFFERS (1<<10)/* receiver/generator */
+
+ /* These flags are passed to functions but not stored. */
- /* update this if you make incompatible changes */
- #define PROTOCOL_VERSION 29