| 1 | This patch changes the way the --checksum option works by having the |
| 2 | receiving side perform a checksum-read of every file in the file list |
| 3 | as the list is received (if the sizes are equal), marking non-matching |
| 4 | items with a flag. The idea is that the checksum pass on the sender and |
| 5 | the receiver can then happen in parallel instead of having the reciever |
| 6 | to its checksum pass during its normal find-the-different-files pass. |
| 7 | |
| 8 | I have benchmarked this a little, and it appears to slow things down |
| 9 | for a local copy, so the old algorithm is used for local copies. |
| 10 | |
| 11 | To use this patch, run these commands for a successful build: |
| 12 | |
| 13 | patch -p1 <patches/early-checksum.diff |
| 14 | ./configure (optional if already run) |
| 15 | make |
| 16 | |
| 17 | --- old/flist.c |
| 18 | +++ new/flist.c |
| 19 | @@ -33,6 +33,7 @@ extern int am_sender; |
| 20 | extern int inc_recurse; |
| 21 | extern int do_progress; |
| 22 | extern int always_checksum; |
| 23 | +extern int pre_checksum; |
| 24 | extern int module_id; |
| 25 | extern int ignore_errors; |
| 26 | extern int numeric_ids; |
| 27 | @@ -850,6 +851,15 @@ static struct file_struct *recv_file_ent |
| 28 | memcpy(bp, F_SUM(first), checksum_len); |
| 29 | } else |
| 30 | read_buf(f, bp, checksum_len); |
| 31 | + if (pre_checksum && sum_len) { |
| 32 | + STRUCT_STAT st; |
| 33 | + char *fname = f_name(file, NULL); |
| 34 | + if (stat(fname, &st) == 0 && st.st_size == file_length) { |
| 35 | + file_checksum(fname, tmp_sum, st.st_size); |
| 36 | + if (memcmp(bp, tmp_sum, checksum_len) != 0) |
| 37 | + file->flags |= FLAG_SUM_DIFFERS; |
| 38 | + } |
| 39 | + } |
| 40 | } |
| 41 | |
| 42 | if (S_ISREG(mode) || S_ISLNK(mode)) |
| 43 | --- old/generator.c |
| 44 | +++ new/generator.c |
| 45 | @@ -73,6 +73,7 @@ extern int protocol_version; |
| 46 | extern int file_total; |
| 47 | extern int fuzzy_basis; |
| 48 | extern int always_checksum; |
| 49 | +extern int pre_checksum; |
| 50 | extern int checksum_len; |
| 51 | extern char *partial_dir; |
| 52 | extern char *basis_dir[]; |
| 53 | @@ -569,7 +570,8 @@ void itemize(struct file_struct *file, i |
| 54 | |
| 55 | |
| 56 | /* Perform our quick-check heuristic for determining if a file is unchanged. */ |
| 57 | -int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st) |
| 58 | +int unchanged_file(char *fn, int fnamecmp_type, struct file_struct *file, |
| 59 | + STRUCT_STAT *st) |
| 60 | { |
| 61 | if (st->st_size != F_LENGTH(file)) |
| 62 | return 0; |
| 63 | @@ -578,6 +580,8 @@ int unchanged_file(char *fn, struct file |
| 64 | of the file time to determine whether to sync */ |
| 65 | if (always_checksum > 0 && S_ISREG(st->st_mode)) { |
| 66 | char sum[MD4_SUM_LENGTH]; |
| 67 | + if (pre_checksum && fnamecmp_type == FNAMECMP_FNAME) |
| 68 | + return !(file->flags & FLAG_SUM_DIFFERS); |
| 69 | file_checksum(fn, sum, st->st_size); |
| 70 | return memcmp(sum, F_SUM(file), checksum_len) == 0; |
| 71 | } |
| 72 | @@ -816,7 +820,7 @@ static int try_dests_reg(struct file_str |
| 73 | match_level = 1; |
| 74 | /* FALL THROUGH */ |
| 75 | case 1: |
| 76 | - if (!unchanged_file(cmpbuf, file, stp)) |
| 77 | + if (!unchanged_file(cmpbuf, 0, file, stp)) |
| 78 | continue; |
| 79 | best_match = j; |
| 80 | match_level = 2; |
| 81 | @@ -1508,7 +1512,7 @@ static void recv_generator(char *fname, |
| 82 | ; |
| 83 | else if (fnamecmp_type == FNAMECMP_FUZZY) |
| 84 | ; |
| 85 | - else if (unchanged_file(fnamecmp, file, &st)) { |
| 86 | + else if (unchanged_file(fnamecmp, fnamecmp_type, file, &st)) { |
| 87 | if (partialptr) { |
| 88 | do_unlink(partialptr); |
| 89 | handle_partial_dir(partialptr, PDIR_DELETE); |
| 90 | --- old/hlink.c |
| 91 | +++ new/hlink.c |
| 92 | @@ -382,7 +382,7 @@ int hard_link_check(struct file_struct * |
| 93 | } |
| 94 | break; |
| 95 | } |
| 96 | - if (!unchanged_file(cmpbuf, file, &alt_st)) |
| 97 | + if (!unchanged_file(cmpbuf, 0, file, &alt_st)) |
| 98 | continue; |
| 99 | statret = 1; |
| 100 | *stp = alt_st; |
| 101 | --- old/main.c |
| 102 | +++ new/main.c |
| 103 | @@ -46,6 +46,7 @@ extern int module_id; |
| 104 | extern int copy_links; |
| 105 | extern int copy_dirlinks; |
| 106 | extern int keep_dirlinks; |
| 107 | +extern int always_checksum; |
| 108 | extern int preserve_hard_links; |
| 109 | extern int protocol_version; |
| 110 | extern int file_total; |
| 111 | @@ -73,6 +74,9 @@ extern char *password_file; |
| 112 | extern char curr_dir[MAXPATHLEN]; |
| 113 | extern struct filter_list_struct server_filter_list; |
| 114 | |
| 115 | +extern char curr_dir[MAXPATHLEN]; |
| 116 | + |
| 117 | +int pre_checksum = 0; |
| 118 | int local_server = 0; |
| 119 | int new_root_dir = 0; |
| 120 | mode_t orig_umask = 0; |
| 121 | @@ -793,6 +797,7 @@ static void do_server_recv(int f_in, int |
| 122 | struct file_list *flist; |
| 123 | char *local_name = NULL; |
| 124 | char *dir = NULL; |
| 125 | + char olddir[sizeof curr_dir]; |
| 126 | int save_verbose = verbose; |
| 127 | |
| 128 | if (filesfrom_fd >= 0) { |
| 129 | @@ -839,6 +844,10 @@ static void do_server_recv(int f_in, int |
| 130 | filesfrom_fd = -1; |
| 131 | } |
| 132 | |
| 133 | + strlcpy(olddir, curr_dir, sizeof olddir); |
| 134 | + if (always_checksum && !local_server && argc > 0) |
| 135 | + pre_checksum = push_dir(argv[0], 0); |
| 136 | + |
| 137 | flist = recv_file_list(f_in); |
| 138 | if (!flist) { |
| 139 | rprintf(FERROR,"server_recv: recv_file_list error\n"); |
| 140 | @@ -848,6 +857,9 @@ static void do_server_recv(int f_in, int |
| 141 | recv_additional_file_list(f_in); |
| 142 | verbose = save_verbose; |
| 143 | |
| 144 | + if (pre_checksum) |
| 145 | + pop_dir(olddir); |
| 146 | + |
| 147 | if (argc > 0) |
| 148 | local_name = get_local_name(flist,argv[0]); |
| 149 | |
| 150 | @@ -927,6 +939,7 @@ int client_run(int f_in, int f_out, pid_ |
| 151 | { |
| 152 | struct file_list *flist = NULL; |
| 153 | int exit_code = 0, exit_code2 = 0; |
| 154 | + char olddir[sizeof curr_dir]; |
| 155 | char *local_name = NULL; |
| 156 | |
| 157 | cleanup_child_pid = pid; |
| 158 | @@ -1007,12 +1020,19 @@ int client_run(int f_in, int f_out, pid_ |
| 159 | filesfrom_fd = -1; |
| 160 | } |
| 161 | |
| 162 | + strlcpy(olddir, curr_dir, sizeof olddir); |
| 163 | + if (always_checksum && !local_server) |
| 164 | + pre_checksum = push_dir(argv[0], 0); |
| 165 | + |
| 166 | if (write_batch && !am_server) |
| 167 | start_write_batch(f_in); |
| 168 | flist = recv_file_list(f_in); |
| 169 | if (inc_recurse && file_total == 1) |
| 170 | recv_additional_file_list(f_in); |
| 171 | |
| 172 | + if (pre_checksum) |
| 173 | + pop_dir(olddir); |
| 174 | + |
| 175 | if (flist && flist->count > 0) { |
| 176 | local_name = get_local_name(flist, argv[0]); |
| 177 | |
| 178 | --- old/rsync.h |
| 179 | +++ new/rsync.h |
| 180 | @@ -71,6 +71,7 @@ |
| 181 | #define FLAG_HLINK_LAST (1<<7) /* receiver/generator */ |
| 182 | #define FLAG_HLINK_DONE (1<<8) /* receiver/generator */ |
| 183 | #define FLAG_LENGTH64 (1<<9) /* sender/receiver/generator */ |
| 184 | +#define FLAG_SUM_DIFFERS (1<<10)/* receiver/generator */ |
| 185 | |
| 186 | /* These flags are passed to functions but not stored. */ |
| 187 | |