Commit | Line | Data |
---|---|---|
2d2150f6 WD |
1 | This patch changes the way the --checksum option works by having the |
2 | receiving side perform a checksum-read of every file in the file list | |
3 | (if the sizes are equal) as the list is received, marking non-matching | |
4 | items with a flag. The idea is that the checksum pass on the sender and | |
5 | the receiver can then happen in parallel instead of having the reciever | |
6 | to its checksum pass during its normal find-the-different-files pass. | |
7 | ||
8 | I have benchmarked this a little, and it appears to slow things down. | |
9 | ||
10 | --- orig/flist.c 2005-11-01 20:09:55 | |
11 | +++ flist.c 2005-09-16 16:41:25 | |
12 | @@ -36,6 +36,7 @@ extern int am_daemon; | |
13 | extern int am_sender; | |
14 | extern int do_progress; | |
15 | extern int always_checksum; | |
16 | +extern int pre_checksum; | |
17 | extern int module_id; | |
18 | extern int ignore_errors; | |
19 | extern int numeric_ids; | |
20 | @@ -705,6 +706,16 @@ static struct file_struct *receive_file_ | |
21 | sum = empty_sum; | |
22 | } | |
23 | read_buf(f, sum, slen); | |
24 | + if (pre_checksum) { | |
25 | + char sum2[MD4_SUM_LENGTH]; | |
26 | + STRUCT_STAT st; | |
27 | + char *fname = f_name(file); | |
28 | + if (stat(fname, &st) == 0 && st.st_size == file_length) { | |
29 | + file_checksum(fname, sum2, st.st_size); | |
30 | + if (memcmp(sum, sum2, slen) != 0) | |
31 | + file->flags |= FLAG_SUM_DIFFERS; | |
32 | + } | |
33 | + } | |
34 | } | |
35 | ||
36 | if (!preserve_perms) { | |
37 | --- orig/generator.c 2005-10-30 22:30:28 | |
38 | +++ generator.c 2005-09-16 16:41:25 | |
39 | @@ -69,6 +69,7 @@ extern int ignore_timeout; | |
40 | extern int protocol_version; | |
41 | extern int fuzzy_basis; | |
42 | extern int always_checksum; | |
43 | +extern int pre_checksum; | |
44 | extern char *partial_dir; | |
45 | extern char *basis_dir[]; | |
46 | extern int compare_dest; | |
47 | @@ -359,7 +360,8 @@ void itemize(struct file_struct *file, i | |
48 | ||
49 | ||
50 | /* Perform our quick-check heuristic for determining if a file is unchanged. */ | |
51 | -static int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st) | |
52 | +static int unchanged_file(char *fn, int fnamecmp_type, struct file_struct *file, | |
53 | + STRUCT_STAT *st) | |
54 | { | |
55 | if (st->st_size != file->length) | |
56 | return 0; | |
57 | @@ -368,6 +370,8 @@ static int unchanged_file(char *fn, stru | |
58 | of the file time to determine whether to sync */ | |
59 | if (always_checksum && S_ISREG(st->st_mode)) { | |
60 | char sum[MD4_SUM_LENGTH]; | |
61 | + if (pre_checksum && fnamecmp_type == FNAMECMP_FNAME) | |
62 | + return !(file->flags & FLAG_SUM_DIFFERS); | |
63 | file_checksum(fn, sum, st->st_size); | |
64 | return memcmp(sum, file->u.sum, protocol_version < 21 ? 2 | |
65 | : MD4_SUM_LENGTH) == 0; | |
66 | @@ -923,7 +927,7 @@ static void recv_generator(char *fname, | |
67 | match_level = 1; | |
68 | /* FALL THROUGH */ | |
69 | case 1: | |
70 | - if (!unchanged_file(fnamecmpbuf, file, &st)) | |
71 | + if (!unchanged_file(fnamecmpbuf, 0, file, &st)) | |
72 | continue; | |
73 | best_match = i; | |
74 | match_level = 2; | |
75 | @@ -1051,7 +1055,7 @@ static void recv_generator(char *fname, | |
76 | ; | |
77 | else if (fnamecmp_type == FNAMECMP_FUZZY) | |
78 | ; | |
79 | - else if (unchanged_file(fnamecmp, file, &st)) { | |
80 | + else if (unchanged_file(fnamecmp, fnamecmp_type, file, &st)) { | |
81 | if (fnamecmp_type == FNAMECMP_FNAME) { | |
82 | if (itemizing) { | |
83 | itemize(file, ndx, real_ret, &real_st, | |
84 | --- orig/main.c 2005-11-02 07:22:12 | |
85 | +++ main.c 2005-09-16 16:41:25 | |
86 | @@ -45,6 +45,7 @@ extern int copy_links; | |
87 | extern int keep_dirlinks; | |
88 | extern int preserve_hard_links; | |
89 | extern int protocol_version; | |
90 | +extern int always_checksum; | |
91 | extern int recurse; | |
92 | extern int relative_paths; | |
93 | extern int rsync_port; | |
94 | @@ -60,8 +61,10 @@ extern char *filesfrom_host; | |
95 | extern char *rsync_path; | |
96 | extern char *shell_cmd; | |
97 | extern char *batch_name; | |
98 | +extern char curr_dir[MAXPATHLEN]; | |
99 | ||
100 | int local_server = 0; | |
101 | +int pre_checksum = 0; | |
102 | struct file_list *the_file_list; | |
103 | ||
104 | /* There's probably never more than at most 2 outstanding child processes, | |
105 | @@ -633,6 +636,7 @@ static void do_server_recv(int f_in, int | |
106 | struct file_list *flist; | |
107 | char *local_name = NULL; | |
108 | char *dir = NULL; | |
109 | + char olddir[sizeof curr_dir]; | |
110 | int save_verbose = verbose; | |
111 | ||
112 | if (filesfrom_fd >= 0) { | |
113 | @@ -677,6 +681,10 @@ static void do_server_recv(int f_in, int | |
114 | filesfrom_fd = -1; | |
115 | } | |
116 | ||
117 | + strlcpy(olddir, curr_dir, sizeof olddir); | |
118 | + if (always_checksum && argc > 0) | |
119 | + pre_checksum = push_dir(argv[0]); | |
120 | + | |
121 | flist = recv_file_list(f_in); | |
122 | verbose = save_verbose; | |
123 | if (!flist) { | |
124 | @@ -685,6 +693,9 @@ static void do_server_recv(int f_in, int | |
125 | } | |
126 | the_file_list = flist; | |
127 | ||
128 | + if (pre_checksum) | |
129 | + pop_dir(olddir); | |
130 | + | |
131 | if (argc > 0) | |
132 | local_name = get_local_name(flist,argv[0]); | |
133 | ||
134 | @@ -733,6 +744,7 @@ int client_run(int f_in, int f_out, pid_ | |
135 | { | |
136 | struct file_list *flist = NULL; | |
137 | int exit_code = 0, exit_code2 = 0; | |
138 | + char olddir[sizeof curr_dir]; | |
139 | char *local_name = NULL; | |
140 | ||
141 | cleanup_child_pid = pid; | |
142 | @@ -804,11 +816,18 @@ int client_run(int f_in, int f_out, pid_ | |
143 | filesfrom_fd = -1; | |
144 | } | |
145 | ||
146 | + strlcpy(olddir, curr_dir, sizeof olddir); | |
147 | + if (always_checksum) | |
148 | + pre_checksum = push_dir(argv[0]); | |
149 | + | |
150 | if (write_batch && !am_server) | |
151 | start_write_batch(f_in); | |
152 | flist = recv_file_list(f_in); | |
153 | the_file_list = flist; | |
154 | ||
155 | + if (pre_checksum) | |
156 | + pop_dir(olddir); | |
157 | + | |
158 | if (flist && flist->count > 0) { | |
159 | local_name = get_local_name(flist, argv[0]); | |
160 | ||
161 | --- orig/rsync.h 2005-10-14 18:45:50 | |
162 | +++ rsync.h 2005-09-16 16:41:26 | |
163 | @@ -64,6 +64,7 @@ | |
164 | #define FLAG_DEL_HERE (1<<3) /* receiver/generator */ | |
165 | #define FLAG_SENT (1<<3) /* sender */ | |
166 | #define FLAG_HLINK_TOL (1<<4) /* receiver/generator */ | |
167 | +#define FLAG_SUM_DIFFERS (1<<5) /* receiver/generator */ | |
168 | ||
169 | /* update this if you make incompatible changes */ | |
170 | #define PROTOCOL_VERSION 29 |