Tweaked the patch to only use the new early-checksum algorithm
[rsync/rsync-patches.git] / early-checksum.diff
CommitLineData
2d2150f6
WD
1This patch changes the way the --checksum option works by having the
2receiving side perform a checksum-read of every file in the file list
d47d38c7 3as the list is received (if the sizes are equal), marking non-matching
2d2150f6
WD
4items with a flag. The idea is that the checksum pass on the sender and
5the receiver can then happen in parallel instead of having the reciever
6to its checksum pass during its normal find-the-different-files pass.
7
d47d38c7
WD
8I have benchmarked this a little, and it appears to slow things down
9for a local copy, so the old algorithm is used for local copies.
2d2150f6 10
d47d38c7 11--- orig/flist.c 2005-11-07 04:29:01
2d2150f6
WD
12+++ flist.c 2005-09-16 16:41:25
13@@ -36,6 +36,7 @@ extern int am_daemon;
14 extern int am_sender;
15 extern int do_progress;
16 extern int always_checksum;
17+extern int pre_checksum;
18 extern int module_id;
19 extern int ignore_errors;
20 extern int numeric_ids;
d47d38c7 21@@ -707,6 +708,16 @@ static struct file_struct *receive_file_
2d2150f6
WD
22 sum = empty_sum;
23 }
24 read_buf(f, sum, slen);
25+ if (pre_checksum) {
26+ char sum2[MD4_SUM_LENGTH];
27+ STRUCT_STAT st;
28+ char *fname = f_name(file);
29+ if (stat(fname, &st) == 0 && st.st_size == file_length) {
30+ file_checksum(fname, sum2, st.st_size);
31+ if (memcmp(sum, sum2, slen) != 0)
32+ file->flags |= FLAG_SUM_DIFFERS;
33+ }
34+ }
35 }
36
37 if (!preserve_perms) {
38--- orig/generator.c 2005-10-30 22:30:28
39+++ generator.c 2005-09-16 16:41:25
40@@ -69,6 +69,7 @@ extern int ignore_timeout;
41 extern int protocol_version;
42 extern int fuzzy_basis;
43 extern int always_checksum;
44+extern int pre_checksum;
45 extern char *partial_dir;
46 extern char *basis_dir[];
47 extern int compare_dest;
48@@ -359,7 +360,8 @@ void itemize(struct file_struct *file, i
49
50
51 /* Perform our quick-check heuristic for determining if a file is unchanged. */
52-static int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st)
53+static int unchanged_file(char *fn, int fnamecmp_type, struct file_struct *file,
54+ STRUCT_STAT *st)
55 {
56 if (st->st_size != file->length)
57 return 0;
58@@ -368,6 +370,8 @@ static int unchanged_file(char *fn, stru
59 of the file time to determine whether to sync */
60 if (always_checksum && S_ISREG(st->st_mode)) {
61 char sum[MD4_SUM_LENGTH];
62+ if (pre_checksum && fnamecmp_type == FNAMECMP_FNAME)
63+ return !(file->flags & FLAG_SUM_DIFFERS);
64 file_checksum(fn, sum, st->st_size);
65 return memcmp(sum, file->u.sum, protocol_version < 21 ? 2
66 : MD4_SUM_LENGTH) == 0;
67@@ -923,7 +927,7 @@ static void recv_generator(char *fname,
68 match_level = 1;
69 /* FALL THROUGH */
70 case 1:
71- if (!unchanged_file(fnamecmpbuf, file, &st))
72+ if (!unchanged_file(fnamecmpbuf, 0, file, &st))
73 continue;
74 best_match = i;
75 match_level = 2;
76@@ -1051,7 +1055,7 @@ static void recv_generator(char *fname,
77 ;
78 else if (fnamecmp_type == FNAMECMP_FUZZY)
79 ;
80- else if (unchanged_file(fnamecmp, file, &st)) {
81+ else if (unchanged_file(fnamecmp, fnamecmp_type, file, &st)) {
82 if (fnamecmp_type == FNAMECMP_FNAME) {
83 if (itemizing) {
84 itemize(file, ndx, real_ret, &real_st,
85--- orig/main.c 2005-11-02 07:22:12
d47d38c7 86+++ main.c 2005-11-07 05:00:49
2d2150f6
WD
87@@ -45,6 +45,7 @@ extern int copy_links;
88 extern int keep_dirlinks;
89 extern int preserve_hard_links;
90 extern int protocol_version;
91+extern int always_checksum;
92 extern int recurse;
93 extern int relative_paths;
94 extern int rsync_port;
95@@ -60,8 +61,10 @@ extern char *filesfrom_host;
96 extern char *rsync_path;
97 extern char *shell_cmd;
98 extern char *batch_name;
99+extern char curr_dir[MAXPATHLEN];
100
101 int local_server = 0;
102+int pre_checksum = 0;
103 struct file_list *the_file_list;
104
105 /* There's probably never more than at most 2 outstanding child processes,
106@@ -633,6 +636,7 @@ static void do_server_recv(int f_in, int
107 struct file_list *flist;
108 char *local_name = NULL;
109 char *dir = NULL;
110+ char olddir[sizeof curr_dir];
111 int save_verbose = verbose;
112
113 if (filesfrom_fd >= 0) {
114@@ -677,6 +681,10 @@ static void do_server_recv(int f_in, int
115 filesfrom_fd = -1;
116 }
117
118+ strlcpy(olddir, curr_dir, sizeof olddir);
d47d38c7 119+ if (always_checksum && !local_server && argc > 0)
2d2150f6
WD
120+ pre_checksum = push_dir(argv[0]);
121+
122 flist = recv_file_list(f_in);
123 verbose = save_verbose;
124 if (!flist) {
125@@ -685,6 +693,9 @@ static void do_server_recv(int f_in, int
126 }
127 the_file_list = flist;
128
129+ if (pre_checksum)
130+ pop_dir(olddir);
131+
132 if (argc > 0)
133 local_name = get_local_name(flist,argv[0]);
134
135@@ -733,6 +744,7 @@ int client_run(int f_in, int f_out, pid_
136 {
137 struct file_list *flist = NULL;
138 int exit_code = 0, exit_code2 = 0;
139+ char olddir[sizeof curr_dir];
140 char *local_name = NULL;
141
142 cleanup_child_pid = pid;
143@@ -804,11 +816,18 @@ int client_run(int f_in, int f_out, pid_
144 filesfrom_fd = -1;
145 }
146
147+ strlcpy(olddir, curr_dir, sizeof olddir);
d47d38c7 148+ if (always_checksum && !local_server)
2d2150f6
WD
149+ pre_checksum = push_dir(argv[0]);
150+
151 if (write_batch && !am_server)
152 start_write_batch(f_in);
153 flist = recv_file_list(f_in);
154 the_file_list = flist;
155
156+ if (pre_checksum)
157+ pop_dir(olddir);
158+
159 if (flist && flist->count > 0) {
160 local_name = get_local_name(flist, argv[0]);
161
162--- orig/rsync.h 2005-10-14 18:45:50
163+++ rsync.h 2005-09-16 16:41:26
164@@ -64,6 +64,7 @@
165 #define FLAG_DEL_HERE (1<<3) /* receiver/generator */
166 #define FLAG_SENT (1<<3) /* sender */
167 #define FLAG_HLINK_TOL (1<<4) /* receiver/generator */
168+#define FLAG_SUM_DIFFERS (1<<5) /* receiver/generator */
169
170 /* update this if you make incompatible changes */
171 #define PROTOCOL_VERSION 29