| 1 | This patch to rsync adds a %C log escape that expands to the sender's |
| 2 | post-transfer checksum of a file for protocol 30 or above. This way, if |
| 3 | you need the MD5 checksums of transferred files, you can have rsync log |
| 4 | them instead of spending extra processor time on a separate command to |
| 5 | compute them. |
| 6 | |
| 7 | -- Matt McCutchen <hashproduct@gmail.com> |
| 8 | |
| 9 | diff --git a/log.c b/log.c |
| 10 | --- a/log.c |
| 11 | +++ b/log.c |
| 12 | @@ -57,6 +57,9 @@ extern char curr_dir[]; |
| 13 | extern char *module_dir; |
| 14 | extern unsigned int module_dirlen; |
| 15 | |
| 16 | +extern char sender_file_sum[MAX_DIGEST_LEN]; |
| 17 | +extern int file_sum_len; |
| 18 | + |
| 19 | static int log_initialised; |
| 20 | static int logfile_was_closed; |
| 21 | static FILE *logfile_fp; |
| 22 | @@ -622,6 +625,19 @@ static void log_formatted(enum logcode code, const char *format, const char *op, |
| 23 | snprintf(buf2, sizeof buf2, fmt, (double)b); |
| 24 | n = buf2; |
| 25 | break; |
| 26 | + case 'C': |
| 27 | + if (iflags & ITEM_TRANSFER && protocol_version >= 30) { |
| 28 | + int i; |
| 29 | + for (i = 0; i < file_sum_len; i++) |
| 30 | + snprintf(buf2 + i*2, 3, "%02x", (int)CVAL(sender_file_sum,i)); |
| 31 | + } else { |
| 32 | + int i; |
| 33 | + for (i = 0; i < file_sum_len*2; i++) |
| 34 | + buf2[i] = '?'; |
| 35 | + buf2[i] = '\0'; |
| 36 | + } |
| 37 | + n = buf2; |
| 38 | + break; |
| 39 | case 'i': |
| 40 | if (iflags & ITEM_DELETED) { |
| 41 | n = "*deleting "; |
| 42 | diff --git a/match.c b/match.c |
| 43 | --- a/match.c |
| 44 | +++ b/match.c |
| 45 | @@ -312,6 +312,10 @@ static void hash_search(int f,struct sum_struct *s, |
| 46 | map_ptr(buf, len-1, 1); |
| 47 | } |
| 48 | |
| 49 | +/* Global variables to make the sender's checksum of a transferred file |
| 50 | + * available to the code for log escape %C. */ |
| 51 | +char sender_file_sum[MAX_DIGEST_LEN]; |
| 52 | +int file_sum_len = MD5_DIGEST_LEN; |
| 53 | |
| 54 | /** |
| 55 | * Scan through a origin file, looking for sections that match |
| 56 | @@ -329,9 +333,6 @@ static void hash_search(int f,struct sum_struct *s, |
| 57 | **/ |
| 58 | void match_sums(int f, struct sum_struct *s, struct map_struct *buf, OFF_T len) |
| 59 | { |
| 60 | - char file_sum[MAX_DIGEST_LEN]; |
| 61 | - int sum_len; |
| 62 | - |
| 63 | last_match = 0; |
| 64 | false_alarms = 0; |
| 65 | hash_hits = 0; |
| 66 | @@ -379,14 +380,26 @@ void match_sums(int f, struct sum_struct *s, struct map_struct *buf, OFF_T len) |
| 67 | matched(f, s, buf, len, -1); |
| 68 | } |
| 69 | |
| 70 | - sum_len = sum_end(file_sum); |
| 71 | - /* If we had a read error, send a bad checksum. */ |
| 72 | - if (buf && buf->status != 0) |
| 73 | - file_sum[0]++; |
| 74 | + file_sum_len = sum_end(sender_file_sum); |
| 75 | + |
| 76 | + /* If we had a read error, send a bad checksum. We use all bits |
| 77 | + * off or all bits on so that a user logging checksums with %C |
| 78 | + * can recognize a bad checksum. */ |
| 79 | + if (buf && buf->status != 0) { |
| 80 | + int i; |
| 81 | + for (i = 0; i < file_sum_len; i++) { |
| 82 | + if (sender_file_sum[i]) |
| 83 | + break; |
| 84 | + } |
| 85 | + if (i < file_sum_len) |
| 86 | + memset(sender_file_sum, 0, file_sum_len); |
| 87 | + else |
| 88 | + memset(sender_file_sum, 0xFF, file_sum_len); |
| 89 | + } |
| 90 | |
| 91 | if (verbose > 2) |
| 92 | rprintf(FINFO,"sending file_sum\n"); |
| 93 | - write_buf(f, file_sum, sum_len); |
| 94 | + write_buf(f, sender_file_sum, file_sum_len); |
| 95 | |
| 96 | if (verbose > 2) |
| 97 | rprintf(FINFO, "false_alarms=%d hash_hits=%d matches=%d\n", |
| 98 | diff --git a/options.c b/options.c |
| 99 | --- a/options.c |
| 100 | +++ b/options.c |
| 101 | @@ -1498,7 +1498,8 @@ int parse_arguments(int *argc_p, const char ***argv_p, int frommain) |
| 102 | else if (log_format_has(stdout_format, 'i')) |
| 103 | stdout_format_has_i = itemize_changes | 1; |
| 104 | if (!log_format_has(stdout_format, 'b') |
| 105 | - && !log_format_has(stdout_format, 'c')) |
| 106 | + && !log_format_has(stdout_format, 'c') |
| 107 | + && !log_format_has(stdout_format, 'C')) |
| 108 | log_before_transfer = !am_server; |
| 109 | } else if (itemize_changes) { |
| 110 | stdout_format = "%i %n%L"; |
| 111 | diff --git a/receiver.c b/receiver.c |
| 112 | --- a/receiver.c |
| 113 | +++ b/receiver.c |
| 114 | @@ -62,6 +62,9 @@ static int phase = 0, redoing = 0; |
| 115 | /* We're either updating the basis file or an identical copy: */ |
| 116 | static int updating_basis_or_equiv; |
| 117 | |
| 118 | +extern char sender_file_sum[MAX_DIGEST_LEN]; |
| 119 | +extern int file_sum_len; |
| 120 | + |
| 121 | /* |
| 122 | * get_tmpname() - create a tmp filename for a given filename |
| 123 | * |
| 124 | @@ -165,10 +168,9 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r, |
| 125 | const char *fname, int fd, OFF_T total_size) |
| 126 | { |
| 127 | static char file_sum1[MAX_DIGEST_LEN]; |
| 128 | - static char file_sum2[MAX_DIGEST_LEN]; |
| 129 | struct map_struct *mapbuf; |
| 130 | struct sum_struct sum; |
| 131 | - int32 len, sum_len; |
| 132 | + int32 len; |
| 133 | OFF_T offset = 0; |
| 134 | OFF_T offset2; |
| 135 | char *data; |
| 136 | @@ -298,15 +300,15 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r, |
| 137 | exit_cleanup(RERR_FILEIO); |
| 138 | } |
| 139 | |
| 140 | - sum_len = sum_end(file_sum1); |
| 141 | + file_sum_len = sum_end(file_sum1); |
| 142 | |
| 143 | if (mapbuf) |
| 144 | unmap_file(mapbuf); |
| 145 | |
| 146 | - read_buf(f_in, file_sum2, sum_len); |
| 147 | + read_buf(f_in, sender_file_sum, file_sum_len); |
| 148 | if (verbose > 2) |
| 149 | rprintf(FINFO,"got file_sum\n"); |
| 150 | - if (fd != -1 && memcmp(file_sum1, file_sum2, sum_len) != 0) |
| 151 | + if (fd != -1 && memcmp(file_sum1, sender_file_sum, file_sum_len) != 0) |
| 152 | return 0; |
| 153 | return 1; |
| 154 | } |
| 155 | diff --git a/rsync.yo b/rsync.yo |
| 156 | --- a/rsync.yo |
| 157 | +++ b/rsync.yo |
| 158 | @@ -2075,7 +2075,7 @@ by the server and defaults to the current code(time()). This option |
| 159 | is used to set a specific checksum seed, which is useful for |
| 160 | applications that want repeatable block and file checksums, or |
| 161 | in the case where the user wants a more random checksum seed. |
| 162 | -Note that setting NUM to 0 causes rsync to use the default of code(time()) |
| 163 | +Setting NUM to 0 causes rsync to use the default of code(time()) |
| 164 | for checksum seed. |
| 165 | enddit() |
| 166 | |
| 167 | diff --git a/rsyncd.conf.yo b/rsyncd.conf.yo |
| 168 | --- a/rsyncd.conf.yo |
| 169 | +++ b/rsyncd.conf.yo |
| 170 | @@ -511,7 +511,8 @@ quote(itemization( |
| 171 | it() %a the remote IP address |
| 172 | it() %b the number of bytes actually transferred |
| 173 | it() %B the permission bits of the file (e.g. rwxrwxrwt) |
| 174 | - it() %c the checksum bytes received for this file (only when sending) |
| 175 | + it() %c the total size of the block checksums received for the basis file (only when sending) |
| 176 | + it() %C the full-file MD5 checksum of a transferred file (only for protocol 30 or above). |
| 177 | it() %f the filename (long form on sender; no trailing "/") |
| 178 | it() %G the gid of the file (decimal) or "DEFAULT" |
| 179 | it() %h the remote host name |