Make sure that any weird mode bits (if they exist up beyond the
[rsync/rsync-patches.git] / early-checksum.diff
... / ...
CommitLineData
1This patch changes the way the --checksum option works by having the
2receiving side perform a checksum-read of every file in the file list
3as the list is received (if the sizes are equal), marking non-matching
4items with a flag. The idea is that the checksum pass on the sender and
5the receiver can then happen in parallel instead of having the reciever
6to its checksum pass during its normal find-the-different-files pass.
7
8I have benchmarked this a little, and it appears to slow things down
9for a local copy, so the old algorithm is used for local copies.
10
11To use this patch, run these commands for a successful build:
12
13 patch -p1 <patches/early-checksum.diff
14 ./configure (optional if already run)
15 make
16
17--- old/flist.c
18+++ new/flist.c
19@@ -31,6 +31,7 @@ extern int am_daemon;
20 extern int am_sender;
21 extern int do_progress;
22 extern int always_checksum;
23+extern int pre_checksum;
24 extern int module_id;
25 extern int ignore_errors;
26 extern int numeric_ids;
27@@ -697,6 +698,16 @@ static struct file_struct *receive_file_
28 sum = empty_sum;
29 }
30 read_buf(f, sum, checksum_len);
31+ if (pre_checksum) {
32+ char sum2[MD4_SUM_LENGTH];
33+ STRUCT_STAT st;
34+ char *fname = f_name(file, NULL);
35+ if (stat(fname, &st) == 0 && st.st_size == file_length) {
36+ file_checksum(fname, sum2, st.st_size);
37+ if (memcmp(sum, sum2, checksum_len) != 0)
38+ file->flags |= FLAG_SUM_DIFFERS;
39+ }
40+ }
41 }
42
43 return file;
44--- old/generator.c
45+++ new/generator.c
46@@ -70,6 +70,7 @@ extern int ignore_timeout;
47 extern int protocol_version;
48 extern int fuzzy_basis;
49 extern int always_checksum;
50+extern int pre_checksum;
51 extern int checksum_len;
52 extern char *partial_dir;
53 extern char *basis_dir[];
54@@ -376,7 +377,8 @@ void itemize(struct file_struct *file, i
55
56
57 /* Perform our quick-check heuristic for determining if a file is unchanged. */
58-int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st)
59+int unchanged_file(char *fn, int fnamecmp_type, struct file_struct *file,
60+ STRUCT_STAT *st)
61 {
62 if (st->st_size != file->length)
63 return 0;
64@@ -385,6 +387,8 @@ int unchanged_file(char *fn, struct file
65 of the file time to determine whether to sync */
66 if (always_checksum && S_ISREG(st->st_mode)) {
67 char sum[MD4_SUM_LENGTH];
68+ if (pre_checksum && fnamecmp_type == FNAMECMP_FNAME)
69+ return !(file->flags & FLAG_SUM_DIFFERS);
70 file_checksum(fn, sum, st->st_size);
71 return memcmp(sum, file->u.sum, checksum_len) == 0;
72 }
73@@ -622,7 +626,7 @@ static int try_dests_reg(struct file_str
74 match_level = 1;
75 /* FALL THROUGH */
76 case 1:
77- if (!unchanged_file(cmpbuf, file, stp))
78+ if (!unchanged_file(cmpbuf, 0, file, stp))
79 continue;
80 best_match = j;
81 match_level = 2;
82@@ -1215,7 +1219,7 @@ static void recv_generator(char *fname,
83 ;
84 else if (fnamecmp_type == FNAMECMP_FUZZY)
85 ;
86- else if (unchanged_file(fnamecmp, file, &st)) {
87+ else if (unchanged_file(fnamecmp, fnamecmp_type, file, &st)) {
88 if (partialptr) {
89 do_unlink(partialptr);
90 handle_partial_dir(partialptr, PDIR_DELETE);
91--- old/hlink.c
92+++ new/hlink.c
93@@ -224,7 +224,7 @@ int hard_link_check(struct file_struct *
94 }
95 break;
96 }
97- if (!unchanged_file(cmpbuf, file, &st3))
98+ if (!unchanged_file(cmpbuf, 0, file, &st3))
99 continue;
100 statret = 1;
101 st = &st3;
102--- old/main.c
103+++ new/main.c
104@@ -47,6 +47,7 @@ extern int copy_dirlinks;
105 extern int keep_dirlinks;
106 extern int preserve_hard_links;
107 extern int protocol_version;
108+extern int always_checksum;
109 extern int recurse;
110 extern int relative_paths;
111 extern int sanitize_paths;
112@@ -71,6 +72,9 @@ extern char *batch_name;
113 extern char curr_dir[MAXPATHLEN];
114 extern struct filter_list_struct server_filter_list;
115
116+extern char curr_dir[MAXPATHLEN];
117+
118+int pre_checksum = 0;
119 int local_server = 0;
120 int new_root_dir = 0;
121 mode_t orig_umask = 0;
122@@ -784,6 +788,7 @@ static void do_server_recv(int f_in, int
123 struct file_list *flist;
124 char *local_name = NULL;
125 char *dir = NULL;
126+ char olddir[sizeof curr_dir];
127 int save_verbose = verbose;
128
129 if (filesfrom_fd >= 0) {
130@@ -827,6 +832,10 @@ static void do_server_recv(int f_in, int
131 filesfrom_fd = -1;
132 }
133
134+ strlcpy(olddir, curr_dir, sizeof olddir);
135+ if (always_checksum && !local_server && argc > 0)
136+ pre_checksum = push_dir(argv[0], 0);
137+
138 flist = recv_file_list(f_in);
139 verbose = save_verbose;
140 if (!flist) {
141@@ -835,6 +844,9 @@ static void do_server_recv(int f_in, int
142 }
143 the_file_list = flist;
144
145+ if (pre_checksum)
146+ pop_dir(olddir);
147+
148 if (argc > 0)
149 local_name = get_local_name(flist,argv[0]);
150
151@@ -916,6 +928,7 @@ int client_run(int f_in, int f_out, pid_
152 {
153 struct file_list *flist = NULL;
154 int exit_code = 0, exit_code2 = 0;
155+ char olddir[sizeof curr_dir];
156 char *local_name = NULL;
157
158 cleanup_child_pid = pid;
159@@ -990,11 +1003,18 @@ int client_run(int f_in, int f_out, pid_
160 filesfrom_fd = -1;
161 }
162
163+ strlcpy(olddir, curr_dir, sizeof olddir);
164+ if (always_checksum && !local_server)
165+ pre_checksum = push_dir(argv[0], 0);
166+
167 if (write_batch && !am_server)
168 start_write_batch(f_in);
169 flist = recv_file_list(f_in);
170 the_file_list = flist;
171
172+ if (pre_checksum)
173+ pop_dir(olddir);
174+
175 if (flist && flist->count > 0) {
176 local_name = get_local_name(flist, argv[0]);
177
178--- old/rsync.h
179+++ new/rsync.h
180@@ -65,6 +65,7 @@
181 #define FLAG_HLINK_TOL (1<<4) /* receiver/generator */
182 #define FLAG_NO_FUZZY (1<<5) /* generator */
183 #define FLAG_MISSING (1<<6) /* generator */
184+#define FLAG_SUM_DIFFERS (1<<7) /* receiver/generator */
185
186 /* update this if you make incompatible changes */
187 #define PROTOCOL_VERSION 29