| 1 | This patch to rsync adds a %C log escape that expands to the sender's |
| 2 | post-transfer checksum of a file for protocol 30 or above. This way, if |
| 3 | you need the MD5 checksums of transferred files, you can have rsync log |
| 4 | them instead of spending extra processor time on a separate command to |
| 5 | compute them. |
| 6 | |
| 7 | -- Matt McCutchen <hashproduct@gmail.com> |
| 8 | |
| 9 | To use this patch, run these commands for a successful build: |
| 10 | |
| 11 | patch -p1 <patches/log-checksum.diff |
| 12 | ./configure (optional if already run) |
| 13 | make |
| 14 | |
| 15 | diff --git a/flist.c b/flist.c |
| 16 | --- a/flist.c |
| 17 | +++ b/flist.c |
| 18 | @@ -68,6 +68,7 @@ extern int sanitize_paths; |
| 19 | extern int munge_symlinks; |
| 20 | extern int need_unsorted_flist; |
| 21 | extern int sender_symlink_iconv; |
| 22 | +extern int sender_keeps_checksum; |
| 23 | extern int unsort_ndx; |
| 24 | extern struct stats stats; |
| 25 | extern char *filesfrom_host; |
| 26 | @@ -1223,6 +1224,12 @@ struct file_struct *make_file(const char *fname, struct file_list *flist, |
| 27 | extra_len += EXTRA_LEN; |
| 28 | #endif |
| 29 | |
| 30 | + if (always_checksum && am_sender && S_ISREG(st.st_mode)) { |
| 31 | + file_checksum(thisname, tmp_sum, st.st_size); |
| 32 | + if (sender_keeps_checksum) |
| 33 | + extra_len += SUM_EXTRA_CNT * EXTRA_LEN; |
| 34 | + } |
| 35 | + |
| 36 | #if EXTRA_ROUNDING > 0 |
| 37 | if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN)) |
| 38 | extra_len = (extra_len | (EXTRA_ROUNDING * EXTRA_LEN)) + EXTRA_LEN; |
| 39 | @@ -1286,9 +1293,6 @@ struct file_struct *make_file(const char *fname, struct file_list *flist, |
| 40 | memcpy(bp + basename_len, linkname, linkname_len); |
| 41 | #endif |
| 42 | |
| 43 | - if (always_checksum && am_sender && S_ISREG(st.st_mode)) |
| 44 | - file_checksum(thisname, tmp_sum, st.st_size); |
| 45 | - |
| 46 | if (am_sender) |
| 47 | F_PATHNAME(file) = pathname; |
| 48 | else if (!pool) |
| 49 | @@ -1300,6 +1304,9 @@ struct file_struct *make_file(const char *fname, struct file_list *flist, |
| 50 | return NULL; |
| 51 | } |
| 52 | |
| 53 | + if (sender_keeps_checksum && S_ISREG(st.st_mode)) |
| 54 | + memcpy(F_SUM(file), tmp_sum, checksum_len); |
| 55 | + |
| 56 | if (unsort_ndx) |
| 57 | F_NDX(file) = dir_count; |
| 58 | |
| 59 | diff --git a/log.c b/log.c |
| 60 | --- a/log.c |
| 61 | +++ b/log.c |
| 62 | @@ -32,8 +32,10 @@ extern int local_server; |
| 63 | extern int quiet; |
| 64 | extern int module_id; |
| 65 | extern int msg_fd_out; |
| 66 | +extern int checksum_len; |
| 67 | extern int allow_8bit_chars; |
| 68 | extern int protocol_version; |
| 69 | +extern int always_checksum; |
| 70 | extern int preserve_times; |
| 71 | extern int uid_ndx; |
| 72 | extern int gid_ndx; |
| 73 | @@ -57,6 +59,7 @@ extern iconv_t ic_send, ic_recv; |
| 74 | extern char curr_dir[]; |
| 75 | extern char *full_module_path; |
| 76 | extern unsigned int module_dirlen; |
| 77 | +extern char sender_file_sum[MAX_DIGEST_LEN]; |
| 78 | |
| 79 | static int log_initialised; |
| 80 | static int logfile_was_closed; |
| 81 | @@ -632,6 +635,28 @@ static void log_formatted(enum logcode code, const char *format, const char *op, |
| 82 | snprintf(buf2, sizeof buf2, fmt, (double)b); |
| 83 | n = buf2; |
| 84 | break; |
| 85 | + case 'C': |
| 86 | + if (protocol_version >= 30 |
| 87 | + && (iflags & ITEM_TRANSFER |
| 88 | + || (always_checksum && S_ISREG(file->mode)))) { |
| 89 | + int i, x1, x2; |
| 90 | + const char *sum = iflags & ITEM_TRANSFER |
| 91 | + ? sender_file_sum : F_SUM(file); |
| 92 | + c = buf2 + checksum_len*2; |
| 93 | + *c = '\0'; |
| 94 | + for (i = checksum_len; --i >= 0; ) { |
| 95 | + x1 = CVAL(sum, i); |
| 96 | + x2 = x1 >> 4; |
| 97 | + x1 &= 0xF; |
| 98 | + *--c = x1 <= 9 ? x1 + '0' : x1 + 'a' - 10; |
| 99 | + *--c = x2 <= 9 ? x2 + '0' : x2 + 'a' - 10; |
| 100 | + } |
| 101 | + } else { |
| 102 | + memset(buf2, ' ', checksum_len*2); |
| 103 | + buf2[checksum_len*2] = '\0'; |
| 104 | + } |
| 105 | + n = buf2; |
| 106 | + break; |
| 107 | case 'i': |
| 108 | if (iflags & ITEM_DELETED) { |
| 109 | n = "*deleting "; |
| 110 | diff --git a/main.c b/main.c |
| 111 | --- a/main.c |
| 112 | +++ b/main.c |
| 113 | @@ -37,6 +37,7 @@ extern int am_generator; |
| 114 | extern int am_daemon; |
| 115 | extern int inc_recurse; |
| 116 | extern int blocking_io; |
| 117 | +extern int always_checksum; |
| 118 | extern int remove_source_files; |
| 119 | extern int need_messages_from_generator; |
| 120 | extern int kluge_around_eof; |
| 121 | @@ -68,6 +69,8 @@ extern int connect_timeout; |
| 122 | extern pid_t cleanup_child_pid; |
| 123 | extern unsigned int module_dirlen; |
| 124 | extern struct stats stats; |
| 125 | +extern char *stdout_format; |
| 126 | +extern char *logfile_format; |
| 127 | extern char *filesfrom_host; |
| 128 | extern char *partial_dir; |
| 129 | extern char *dest_option; |
| 130 | @@ -85,6 +88,7 @@ int local_server = 0; |
| 131 | int daemon_over_rsh = 0; |
| 132 | mode_t orig_umask = 0; |
| 133 | int batch_gen_fd = -1; |
| 134 | +int sender_keeps_checksum = 0; |
| 135 | |
| 136 | /* There's probably never more than at most 2 outstanding child processes, |
| 137 | * but set it higher, just in case. */ |
| 138 | @@ -1003,6 +1007,12 @@ int client_run(int f_in, int f_out, pid_t pid, int argc, char *argv[]) |
| 139 | |
| 140 | if (am_sender) { |
| 141 | keep_dirlinks = 0; /* Must be disabled on the sender. */ |
| 142 | + |
| 143 | + if (always_checksum |
| 144 | + && (log_format_has(stdout_format, 'C') |
| 145 | + || log_format_has(logfile_format, 'C'))) |
| 146 | + sender_keeps_checksum = 1; |
| 147 | + |
| 148 | if (protocol_version >= 30) |
| 149 | io_start_multiplex_out(); |
| 150 | else |
| 151 | diff --git a/match.c b/match.c |
| 152 | --- a/match.c |
| 153 | +++ b/match.c |
| 154 | @@ -25,8 +25,10 @@ extern int verbose; |
| 155 | extern int do_progress; |
| 156 | extern int checksum_seed; |
| 157 | extern int append_mode; |
| 158 | +extern int checksum_len; |
| 159 | |
| 160 | int updating_basis_file; |
| 161 | +char sender_file_sum[MAX_DIGEST_LEN]; |
| 162 | |
| 163 | static int false_alarms; |
| 164 | static int hash_hits; |
| 165 | @@ -329,9 +331,6 @@ static void hash_search(int f,struct sum_struct *s, |
| 166 | **/ |
| 167 | void match_sums(int f, struct sum_struct *s, struct map_struct *buf, OFF_T len) |
| 168 | { |
| 169 | - char file_sum[MAX_DIGEST_LEN]; |
| 170 | - int sum_len; |
| 171 | - |
| 172 | last_match = 0; |
| 173 | false_alarms = 0; |
| 174 | hash_hits = 0; |
| 175 | @@ -379,18 +378,28 @@ void match_sums(int f, struct sum_struct *s, struct map_struct *buf, OFF_T len) |
| 176 | matched(f, s, buf, len, -1); |
| 177 | } |
| 178 | |
| 179 | - sum_len = sum_end(file_sum); |
| 180 | - /* If we had a read error, send a bad checksum. */ |
| 181 | - if (buf && buf->status != 0) |
| 182 | - file_sum[0]++; |
| 183 | + if (sum_end(sender_file_sum) != checksum_len) |
| 184 | + overflow_exit("checksum_len"); /* Impossible... */ |
| 185 | + |
| 186 | + /* If we had a read error, send a bad checksum. We use all bits |
| 187 | + * off as long as the checksum doesn't happen to be that, in |
| 188 | + * which case we turn the last 0 bit into a 1. */ |
| 189 | + if (buf && buf->status != 0) { |
| 190 | + int i; |
| 191 | + for (i = 0; i < checksum_len && sender_file_sum[i] == 0; i++) {} |
| 192 | + memset(sender_file_sum, 0, checksum_len); |
| 193 | + if (i == checksum_len) |
| 194 | + sender_file_sum[i-1]++; |
| 195 | + } |
| 196 | |
| 197 | if (verbose > 2) |
| 198 | rprintf(FINFO,"sending file_sum\n"); |
| 199 | - write_buf(f, file_sum, sum_len); |
| 200 | + write_buf(f, sender_file_sum, checksum_len); |
| 201 | |
| 202 | - if (verbose > 2) |
| 203 | + if (verbose > 2) { |
| 204 | rprintf(FINFO, "false_alarms=%d hash_hits=%d matches=%d\n", |
| 205 | false_alarms, hash_hits, matches); |
| 206 | + } |
| 207 | |
| 208 | total_hash_hits += hash_hits; |
| 209 | total_false_alarms += false_alarms; |
| 210 | diff --git a/options.c b/options.c |
| 211 | --- a/options.c |
| 212 | +++ b/options.c |
| 213 | @@ -1525,7 +1525,8 @@ int parse_arguments(int *argc_p, const char ***argv_p) |
| 214 | else if (log_format_has(stdout_format, 'i')) |
| 215 | stdout_format_has_i = itemize_changes | 1; |
| 216 | if (!log_format_has(stdout_format, 'b') |
| 217 | - && !log_format_has(stdout_format, 'c')) |
| 218 | + && !log_format_has(stdout_format, 'c') |
| 219 | + && !log_format_has(stdout_format, 'C')) |
| 220 | log_before_transfer = !am_server; |
| 221 | } else if (itemize_changes) { |
| 222 | stdout_format = "%i %n%L"; |
| 223 | diff --git a/receiver.c b/receiver.c |
| 224 | --- a/receiver.c |
| 225 | +++ b/receiver.c |
| 226 | @@ -46,6 +46,7 @@ extern int remove_source_files; |
| 227 | extern int append_mode; |
| 228 | extern int sparse_files; |
| 229 | extern int keep_partial; |
| 230 | +extern int checksum_len; |
| 231 | extern int checksum_seed; |
| 232 | extern int inplace; |
| 233 | extern int delay_updates; |
| 234 | @@ -54,6 +55,7 @@ extern struct stats stats; |
| 235 | extern char *tmpdir; |
| 236 | extern char *partial_dir; |
| 237 | extern char *basis_dir[]; |
| 238 | +extern char sender_file_sum[MAX_DIGEST_LEN]; |
| 239 | extern struct file_list *cur_flist, *first_flist, *dir_flist; |
| 240 | extern struct filter_list_struct daemon_filter_list; |
| 241 | |
| 242 | @@ -166,10 +168,9 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r, |
| 243 | const char *fname, int fd, OFF_T total_size) |
| 244 | { |
| 245 | static char file_sum1[MAX_DIGEST_LEN]; |
| 246 | - static char file_sum2[MAX_DIGEST_LEN]; |
| 247 | struct map_struct *mapbuf; |
| 248 | struct sum_struct sum; |
| 249 | - int32 len, sum_len; |
| 250 | + int32 len; |
| 251 | OFF_T offset = 0; |
| 252 | OFF_T offset2; |
| 253 | char *data; |
| 254 | @@ -302,15 +303,16 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r, |
| 255 | exit_cleanup(RERR_FILEIO); |
| 256 | } |
| 257 | |
| 258 | - sum_len = sum_end(file_sum1); |
| 259 | + if (sum_end(file_sum1) != checksum_len) |
| 260 | + overflow_exit("checksum_len"); /* Impossible... */ |
| 261 | |
| 262 | if (mapbuf) |
| 263 | unmap_file(mapbuf); |
| 264 | |
| 265 | - read_buf(f_in, file_sum2, sum_len); |
| 266 | + read_buf(f_in, sender_file_sum, checksum_len); |
| 267 | if (verbose > 2) |
| 268 | rprintf(FINFO,"got file_sum\n"); |
| 269 | - if (fd != -1 && memcmp(file_sum1, file_sum2, sum_len) != 0) |
| 270 | + if (fd != -1 && memcmp(file_sum1, sender_file_sum, checksum_len) != 0) |
| 271 | return 0; |
| 272 | return 1; |
| 273 | } |
| 274 | diff --git a/rsyncd.conf.yo b/rsyncd.conf.yo |
| 275 | --- a/rsyncd.conf.yo |
| 276 | +++ b/rsyncd.conf.yo |
| 277 | @@ -510,6 +510,7 @@ quote(itemization( |
| 278 | it() %b the number of bytes actually transferred |
| 279 | it() %B the permission bits of the file (e.g. rwxrwxrwt) |
| 280 | it() %c the total size of the block checksums received for the basis file (only when sending) |
| 281 | + it() %C the full-file MD5 checksum if bf(--checksum) is enabled or a file was transferred (only for protocol 30 or above). |
| 282 | it() %f the filename (long form on sender; no trailing "/") |
| 283 | it() %G the gid of the file (decimal) or "DEFAULT" |
| 284 | it() %h the remote host name |