| 1 | CAUTION: This patch compiles, but is otherwise totally untested! |
| 2 | |
| 3 | This patch also implements --times-only. |
| 4 | |
| 5 | Implementation details for the --source-filter and -dest-filter options: |
| 6 | |
| 7 | - These options open a *HUGE* security hole in daemon mode unless they |
| 8 | are refused in your rsyncd.conf! |
| 9 | |
| 10 | - Filtering disables rsync alogrithm. (This should be fixed.) |
| 11 | |
| 12 | - Source filter makes temporary files in /tmp. (Should be overridable.) |
| 13 | |
| 14 | - If source filter fails, data is send unfiltered. (Should be changed |
| 15 | to abort.) |
| 16 | |
| 17 | - Failure of destination filter, causes data loss!!! (Should be changed |
| 18 | to abort.) |
| 19 | |
| 20 | - If filter changes size of file, you should use --times-only option to |
| 21 | prevent repeated transfers of unchanged files. |
| 22 | |
| 23 | - If the COMMAND contains single quotes, option-passing breaks. (Needs |
| 24 | to be fixed.) |
| 25 | |
| 26 | To use this patch, run these commands for a successful build: |
| 27 | |
| 28 | patch -p1 <patches/source-filter_dest-filter.diff |
| 29 | ./prepare-source |
| 30 | ./configure (optional if already run) |
| 31 | make |
| 32 | |
| 33 | --- old/generator.c |
| 34 | +++ new/generator.c |
| 35 | @@ -61,6 +61,7 @@ extern int append_mode; |
| 36 | extern int make_backups; |
| 37 | extern int csum_length; |
| 38 | extern int ignore_times; |
| 39 | +extern int times_only; |
| 40 | extern int size_only; |
| 41 | extern OFF_T max_size; |
| 42 | extern OFF_T min_size; |
| 43 | @@ -571,7 +572,7 @@ void itemize(struct file_struct *file, i |
| 44 | /* Perform our quick-check heuristic for determining if a file is unchanged. */ |
| 45 | int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st) |
| 46 | { |
| 47 | - if (st->st_size != F_LENGTH(file)) |
| 48 | + if (!times_only && st->st_size != F_LENGTH(file)) |
| 49 | return 0; |
| 50 | |
| 51 | /* if always checksum is set then we use the checksum instead |
| 52 | --- old/main.c |
| 53 | +++ new/main.c |
| 54 | @@ -128,7 +128,7 @@ pid_t wait_process(pid_t pid, int *statu |
| 55 | } |
| 56 | |
| 57 | /* Wait for a process to exit, calling io_flush while waiting. */ |
| 58 | -static void wait_process_with_flush(pid_t pid, int *exit_code_ptr) |
| 59 | +void wait_process_with_flush(pid_t pid, int *exit_code_ptr) |
| 60 | { |
| 61 | pid_t waited_pid; |
| 62 | int status; |
| 63 | --- old/options.c |
| 64 | +++ new/options.c |
| 65 | @@ -99,6 +99,7 @@ int keep_partial = 0; |
| 66 | int safe_symlinks = 0; |
| 67 | int copy_unsafe_links = 0; |
| 68 | int size_only = 0; |
| 69 | +int times_only = 0; |
| 70 | int daemon_bwlimit = 0; |
| 71 | int bwlimit = 0; |
| 72 | int fuzzy_basis = 0; |
| 73 | @@ -150,6 +151,8 @@ char *logfile_name = NULL; |
| 74 | char *logfile_format = NULL; |
| 75 | char *stdout_format = NULL; |
| 76 | char *password_file = NULL; |
| 77 | +char *source_filter = NULL; |
| 78 | +char *dest_filter = NULL; |
| 79 | char *rsync_path = RSYNC_PATH; |
| 80 | char *backup_dir = NULL; |
| 81 | char backup_dir_buf[MAXPATHLEN]; |
| 82 | @@ -340,6 +343,7 @@ void usage(enum logcode F) |
| 83 | rprintf(F," --timeout=TIME set I/O timeout in seconds\n"); |
| 84 | rprintf(F," -I, --ignore-times don't skip files that match in size and mod-time\n"); |
| 85 | rprintf(F," --size-only skip files that match in size\n"); |
| 86 | + rprintf(F," --times-only skip files that match in mod-time\n"); |
| 87 | rprintf(F," --modify-window=NUM compare mod-times with reduced accuracy\n"); |
| 88 | rprintf(F," -T, --temp-dir=DIR create temporary files in directory DIR\n"); |
| 89 | rprintf(F," -y, --fuzzy find similar file for basis if no dest file\n"); |
| 90 | @@ -377,6 +381,8 @@ void usage(enum logcode F) |
| 91 | rprintf(F," --write-batch=FILE write a batched update to FILE\n"); |
| 92 | rprintf(F," --only-write-batch=FILE like --write-batch but w/o updating destination\n"); |
| 93 | rprintf(F," --read-batch=FILE read a batched update from FILE\n"); |
| 94 | + rprintf(F," --source-filter=COMMAND filter file through COMMAND at source\n"); |
| 95 | + rprintf(F," --dest-filter=COMMAND filter file through COMMAND at destination\n"); |
| 96 | rprintf(F," --protocol=NUM force an older protocol version to be used\n"); |
| 97 | #ifdef INET6 |
| 98 | rprintf(F," -4, --ipv4 prefer IPv4\n"); |
| 99 | @@ -460,6 +466,7 @@ static struct poptOption long_options[] |
| 100 | {"chmod", 0, POPT_ARG_STRING, 0, OPT_CHMOD, 0, 0 }, |
| 101 | {"ignore-times", 'I', POPT_ARG_NONE, &ignore_times, 0, 0, 0 }, |
| 102 | {"size-only", 0, POPT_ARG_NONE, &size_only, 0, 0, 0 }, |
| 103 | + {"times-only", 0, POPT_ARG_NONE, ×_only , 0, 0, 0 }, |
| 104 | {"one-file-system", 'x', POPT_ARG_NONE, 0, 'x', 0, 0 }, |
| 105 | {"update", 'u', POPT_ARG_NONE, &update_only, 0, 0, 0 }, |
| 106 | {"existing", 0, POPT_ARG_NONE, &ignore_non_existing, 0, 0, 0 }, |
| 107 | @@ -539,6 +546,8 @@ static struct poptOption long_options[] |
| 108 | {"password-file", 0, POPT_ARG_STRING, &password_file, 0, 0, 0 }, |
| 109 | {"blocking-io", 0, POPT_ARG_VAL, &blocking_io, 1, 0, 0 }, |
| 110 | {"no-blocking-io", 0, POPT_ARG_VAL, &blocking_io, 0, 0, 0 }, |
| 111 | + {"source-filter", 0, POPT_ARG_STRING, &source_filter, 0, 0, 0 }, |
| 112 | + {"dest-filter", 0, POPT_ARG_STRING, &dest_filter, 0, 0, 0 }, |
| 113 | {"protocol", 0, POPT_ARG_INT, &protocol_version, 0, 0, 0 }, |
| 114 | {"checksum-seed", 0, POPT_ARG_INT, &checksum_seed, 0, 0, 0 }, |
| 115 | {"server", 0, POPT_ARG_NONE, 0, OPT_SERVER, 0, 0 }, |
| 116 | @@ -1416,6 +1425,16 @@ int parse_arguments(int *argc, const cha |
| 117 | } |
| 118 | } |
| 119 | |
| 120 | + if (source_filter || dest_filter) { |
| 121 | + if (whole_file == 0) { |
| 122 | + snprintf(err_buf, sizeof err_buf, |
| 123 | + "--no-whole-file cannot be used with --%s-filter\n", |
| 124 | + source_filter ? "source" : "dest"); |
| 125 | + return 0; |
| 126 | + } |
| 127 | + whole_file = 1; |
| 128 | + } |
| 129 | + |
| 130 | if (files_from) { |
| 131 | char *h, *p; |
| 132 | int q; |
| 133 | @@ -1692,6 +1711,25 @@ void server_options(char **args,int *arg |
| 134 | args[ac++] = "--size-only"; |
| 135 | } |
| 136 | |
| 137 | + if (times_only && am_sender) |
| 138 | + args[ac++] = "--times-only"; |
| 139 | + |
| 140 | + if (source_filter && !am_sender) { |
| 141 | + /* Need to single quote the arg to keep the remote shell |
| 142 | + * from splitting it. FIXME: breaks if command has single quotes. */ |
| 143 | + if (asprintf(&arg, "--source-filter='%s'", source_filter) < 0) |
| 144 | + goto oom; |
| 145 | + args[ac++] = arg; |
| 146 | + } |
| 147 | + |
| 148 | + if (dest_filter && am_sender) { |
| 149 | + /* Need to single quote the arg to keep the remote shell |
| 150 | + * from splitting it. FIXME: breaks if command has single quotes. */ |
| 151 | + if (asprintf(&arg, "--dest-filter='%s'", dest_filter) < 0) |
| 152 | + goto oom; |
| 153 | + args[ac++] = arg; |
| 154 | + } |
| 155 | + |
| 156 | if (modify_window_set) { |
| 157 | if (asprintf(&arg, "--modify-window=%d", modify_window) < 0) |
| 158 | goto oom; |
| 159 | --- old/pipe.c |
| 160 | +++ new/pipe.c |
| 161 | @@ -165,3 +165,77 @@ pid_t local_child(int argc, char **argv, |
| 162 | |
| 163 | return pid; |
| 164 | } |
| 165 | + |
| 166 | +pid_t run_filter(char *command[], int out, int *pipe_to_filter) |
| 167 | +{ |
| 168 | + pid_t pid; |
| 169 | + int pipefds[2]; |
| 170 | + |
| 171 | + if (verbose >= 2) |
| 172 | + print_child_argv(command); |
| 173 | + |
| 174 | + if (pipe(pipefds) < 0) { |
| 175 | + rsyserr(FERROR, errno, "pipe"); |
| 176 | + exit_cleanup(RERR_IPC); |
| 177 | + } |
| 178 | + |
| 179 | + pid = do_fork(); |
| 180 | + if (pid == -1) { |
| 181 | + rsyserr(FERROR, errno, "fork"); |
| 182 | + exit_cleanup(RERR_IPC); |
| 183 | + } |
| 184 | + |
| 185 | + if (pid == 0) { |
| 186 | + if (dup2(pipefds[0], STDIN_FILENO) < 0 |
| 187 | + || close(pipefds[1]) < 0 |
| 188 | + || dup2(out, STDOUT_FILENO) < 0) { |
| 189 | + rsyserr(FERROR, errno, "Failed dup/close"); |
| 190 | + exit_cleanup(RERR_IPC); |
| 191 | + } |
| 192 | + umask(orig_umask); |
| 193 | + set_blocking(STDIN_FILENO); |
| 194 | + if (blocking_io) |
| 195 | + set_blocking(STDOUT_FILENO); |
| 196 | + execvp(command[0], command); |
| 197 | + rsyserr(FERROR, errno, "Failed to exec %s", command[0]); |
| 198 | + exit_cleanup(RERR_IPC); |
| 199 | + } |
| 200 | + |
| 201 | + if (close(pipefds[0]) < 0) { |
| 202 | + rsyserr(FERROR, errno, "Failed to close"); |
| 203 | + exit_cleanup(RERR_IPC); |
| 204 | + } |
| 205 | + |
| 206 | + *pipe_to_filter = pipefds[1]; |
| 207 | + |
| 208 | + return pid; |
| 209 | +} |
| 210 | + |
| 211 | +pid_t run_filter_on_file(char *command[], int out, int in) |
| 212 | +{ |
| 213 | + pid_t pid; |
| 214 | + |
| 215 | + if (verbose >= 2) |
| 216 | + print_child_argv(command); |
| 217 | + |
| 218 | + pid = do_fork(); |
| 219 | + if (pid == -1) { |
| 220 | + rsyserr(FERROR, errno, "fork"); |
| 221 | + exit_cleanup(RERR_IPC); |
| 222 | + } |
| 223 | + |
| 224 | + if (pid == 0) { |
| 225 | + if (dup2(in, STDIN_FILENO) < 0 |
| 226 | + || dup2(out, STDOUT_FILENO) < 0) { |
| 227 | + rsyserr(FERROR, errno, "Failed to dup2"); |
| 228 | + exit_cleanup(RERR_IPC); |
| 229 | + } |
| 230 | + if (blocking_io) |
| 231 | + set_blocking(STDOUT_FILENO); |
| 232 | + execvp(command[0], command); |
| 233 | + rsyserr(FERROR, errno, "Failed to exec %s", command[0]); |
| 234 | + exit_cleanup(RERR_IPC); |
| 235 | + } |
| 236 | + |
| 237 | + return pid; |
| 238 | +} |
| 239 | --- old/receiver.c |
| 240 | +++ new/receiver.c |
| 241 | @@ -52,6 +52,7 @@ extern struct stats stats; |
| 242 | extern char *tmpdir; |
| 243 | extern char *partial_dir; |
| 244 | extern char *basis_dir[]; |
| 245 | +extern char *dest_filter; |
| 246 | extern struct file_list *cur_flist, *first_flist; |
| 247 | extern struct filter_list_struct server_filter_list; |
| 248 | |
| 249 | @@ -349,6 +350,8 @@ int recv_files(int f_in, char *local_nam |
| 250 | enum logcode log_code = log_before_transfer ? FLOG : FINFO; |
| 251 | int max_phase = protocol_version >= 29 ? 2 : 1; |
| 252 | int ndx, recv_ok; |
| 253 | + pid_t pid = 0; |
| 254 | + char *filter_argv[MAX_FILTER_ARGS + 1]; |
| 255 | |
| 256 | if (verbose > 2) |
| 257 | rprintf(FINFO, "recv_files(%d) starting\n", cur_flist->count); |
| 258 | @@ -358,6 +361,23 @@ int recv_files(int f_in, char *local_nam |
| 259 | |
| 260 | updating_basis = inplace; |
| 261 | |
| 262 | + if (dest_filter) { |
| 263 | + char *p; |
| 264 | + char *sep = " \t"; |
| 265 | + int i; |
| 266 | + for (p = strtok(dest_filter, sep), i = 0; |
| 267 | + p && i < MAX_FILTER_ARGS; |
| 268 | + p = strtok(0, sep)) |
| 269 | + filter_argv[i++] = p; |
| 270 | + filter_argv[i] = NULL; |
| 271 | + if (p) { |
| 272 | + rprintf(FERROR, |
| 273 | + "Too many arguments to dest-filter (> %d)\n", |
| 274 | + MAX_FILTER_ARGS); |
| 275 | + exit_cleanup(RERR_SYNTAX); |
| 276 | + } |
| 277 | + } |
| 278 | + |
| 279 | while (1) { |
| 280 | cleanup_disable(); |
| 281 | |
| 282 | @@ -620,6 +640,9 @@ int recv_files(int f_in, char *local_nam |
| 283 | else if (!am_server && verbose && do_progress) |
| 284 | rprintf(FINFO, "%s\n", fname); |
| 285 | |
| 286 | + if (dest_filter) |
| 287 | + pid = run_filter(filter_argv, fd2, &fd2); |
| 288 | + |
| 289 | /* recv file data */ |
| 290 | recv_ok = receive_data(f_in, fnamecmp, fd1, st.st_size, |
| 291 | fname, fd2, F_LENGTH(file)); |
| 292 | @@ -634,6 +657,16 @@ int recv_files(int f_in, char *local_nam |
| 293 | exit_cleanup(RERR_FILEIO); |
| 294 | } |
| 295 | |
| 296 | + if (dest_filter) { |
| 297 | + int status; |
| 298 | + wait_process_with_flush(pid, &status); |
| 299 | + if (status != 0) { |
| 300 | + rprintf(FERROR, "filter %s exited code: %d\n", |
| 301 | + dest_filter, status); |
| 302 | + continue; |
| 303 | + } |
| 304 | + } |
| 305 | + |
| 306 | if ((recv_ok && (!delay_updates || !partialptr)) || inplace) { |
| 307 | char *temp_copy_name; |
| 308 | if (partialptr == fname) |
| 309 | --- old/rsync.h |
| 310 | +++ new/rsync.h |
| 311 | @@ -119,6 +119,7 @@ |
| 312 | #define IOERR_DEL_LIMIT (1<<2) |
| 313 | |
| 314 | #define MAX_ARGS 1000 |
| 315 | +#define MAX_FILTER_ARGS 100 |
| 316 | #define MAX_BASIS_DIRS 20 |
| 317 | #define MAX_SERVER_ARGS (MAX_BASIS_DIRS*2 + 100) |
| 318 | |
| 319 | --- old/rsync.yo |
| 320 | +++ new/rsync.yo |
| 321 | @@ -361,6 +361,7 @@ to the detailed description below for a |
| 322 | --timeout=TIME set I/O timeout in seconds |
| 323 | -I, --ignore-times don't skip files that match size and time |
| 324 | --size-only skip files that match in size |
| 325 | + --times-only skip files that match in mod-time |
| 326 | --modify-window=NUM compare mod-times with reduced accuracy |
| 327 | -T, --temp-dir=DIR create temporary files in directory DIR |
| 328 | -y, --fuzzy find similar file for basis if no dest file |
| 329 | @@ -398,6 +399,8 @@ to the detailed description below for a |
| 330 | --write-batch=FILE write a batched update to FILE |
| 331 | --only-write-batch=FILE like --write-batch but w/o updating dest |
| 332 | --read-batch=FILE read a batched update from FILE |
| 333 | + --source-filter=COMMAND filter file through COMMAND at source |
| 334 | + --dest-filter=COMMAND filter file through COMMAND at destination |
| 335 | --protocol=NUM force an older protocol version to be used |
| 336 | --checksum-seed=NUM set block/file checksum seed (advanced) |
| 337 | -4, --ipv4 prefer IPv4 |
| 338 | @@ -1804,6 +1807,33 @@ file previously generated by bf(--write- |
| 339 | If em(FILE) is bf(-), the batch data will be read from standard input. |
| 340 | See the "BATCH MODE" section for details. |
| 341 | |
| 342 | +dit(bf(--source-filter=COMMAND)) This option allows the user to specify a |
| 343 | +filter program that will be applied to the contents of all transferred |
| 344 | +regular files before the data is sent to destination. COMMAND will receive |
| 345 | +the data on its standard input and it should write the filtered data to |
| 346 | +standard output. COMMAND should exit non-zero if it cannot process the |
| 347 | +data or if it encounters an error when writing the data to stdout. |
| 348 | + |
| 349 | +Example: --source-filter="gzip -9" will cause remote files to be |
| 350 | +compressed. |
| 351 | +Use of --source-filter automatically enables --whole-file. |
| 352 | +If your filter does not output the same number of bytes that it received on |
| 353 | +input, you should use --times-only to disable size and content checks on |
| 354 | +subsequent rsync runs. |
| 355 | + |
| 356 | +dit(bf(--dest-filter=COMMAND)) This option allows you to specify a filter |
| 357 | +program that will be applied to the contents of all transferred regular |
| 358 | +files before the data is written to disk. COMMAND will receive the data on |
| 359 | +its standard input and it should write the filtered data to standard |
| 360 | +output. COMMAND should exit non-zero if it cannot process the data or if |
| 361 | +it encounters an error when writing the data to stdout. |
| 362 | + |
| 363 | +Example: --dest-filter="gzip -9" will cause remote files to be compressed. |
| 364 | +Use of --dest-filter automatically enables --whole-file. |
| 365 | +If your filter does not output the same number of bytes that it |
| 366 | +received on input, you should use --times-only to disable size and |
| 367 | +content checks on subsequent rsync runs. |
| 368 | + |
| 369 | dit(bf(--protocol=NUM)) Force an older protocol version to be used. This |
| 370 | is useful for creating a batch file that is compatible with an older |
| 371 | version of rsync. For instance, if rsync 2.6.4 is being used with the |
| 372 | --- old/sender.c |
| 373 | +++ new/sender.c |
| 374 | @@ -42,6 +42,7 @@ extern int do_progress; |
| 375 | extern int inplace; |
| 376 | extern int batch_fd; |
| 377 | extern int write_batch; |
| 378 | +extern char *source_filter; |
| 379 | extern struct stats stats; |
| 380 | extern struct file_list *cur_flist, *first_flist; |
| 381 | |
| 382 | @@ -175,6 +176,26 @@ void send_files(int f_in, int f_out) |
| 383 | enum logcode log_code = log_before_transfer ? FLOG : FINFO; |
| 384 | int f_xfer = write_batch < 0 ? batch_fd : f_out; |
| 385 | int ndx, j; |
| 386 | + char *filter_argv[MAX_FILTER_ARGS + 1]; |
| 387 | + char *tmp = 0; |
| 388 | + int unlink_tmp = 0; |
| 389 | + |
| 390 | + if (source_filter) { |
| 391 | + char *p; |
| 392 | + char *sep = " \t"; |
| 393 | + int i; |
| 394 | + for (p = strtok(source_filter, sep), i = 0; |
| 395 | + p && i < MAX_FILTER_ARGS; |
| 396 | + p = strtok(0, sep)) |
| 397 | + filter_argv[i++] = p; |
| 398 | + filter_argv[i] = NULL; |
| 399 | + if (p) { |
| 400 | + rprintf(FERROR, |
| 401 | + "Too many arguments to source-filter (> %d)\n", |
| 402 | + MAX_FILTER_ARGS); |
| 403 | + exit_cleanup(RERR_SYNTAX); |
| 404 | + } |
| 405 | + } |
| 406 | |
| 407 | if (verbose > 2) |
| 408 | rprintf(FINFO, "send_files starting\n"); |
| 409 | @@ -265,6 +286,7 @@ void send_files(int f_in, int f_out) |
| 410 | exit_cleanup(RERR_PROTOCOL); |
| 411 | } |
| 412 | |
| 413 | + unlink_tmp = 0; |
| 414 | fd = do_open(fname, O_RDONLY, 0); |
| 415 | if (fd == -1) { |
| 416 | if (errno == ENOENT) { |
| 417 | @@ -295,6 +317,33 @@ void send_files(int f_in, int f_out) |
| 418 | exit_cleanup(RERR_PROTOCOL); |
| 419 | } |
| 420 | |
| 421 | + if (source_filter) { |
| 422 | + int fd2; |
| 423 | + char *tmpl = "/tmp/rsync-filtered_sourceXXXXXX"; |
| 424 | + |
| 425 | + tmp = strdup(tmpl); |
| 426 | + fd2 = mkstemp(tmp); |
| 427 | + if (fd2 == -1) { |
| 428 | + rprintf(FERROR, "mkstemp %s failed: %s\n", |
| 429 | + tmp, strerror(errno)); |
| 430 | + } else { |
| 431 | + int status; |
| 432 | + pid_t pid = run_filter_on_file(filter_argv, fd2, fd); |
| 433 | + close(fd); |
| 434 | + close(fd2); |
| 435 | + wait_process_with_flush(pid, &status); |
| 436 | + if (status != 0) { |
| 437 | + rprintf(FERROR, |
| 438 | + "bypassing source filter %s; exited with code: %d\n", |
| 439 | + source_filter, status); |
| 440 | + fd = do_open(fname, O_RDONLY, 0); |
| 441 | + } else { |
| 442 | + fd = do_open(tmp, O_RDONLY, 0); |
| 443 | + unlink_tmp = 1; |
| 444 | + } |
| 445 | + } |
| 446 | + } |
| 447 | + |
| 448 | if (st.st_size) { |
| 449 | int32 read_size = MAX(s->blength * 3, MAX_MAP_SIZE); |
| 450 | mbuf = map_file(fd, st.st_size, read_size, s->blength); |
| 451 | @@ -336,6 +385,8 @@ void send_files(int f_in, int f_out) |
| 452 | } |
| 453 | } |
| 454 | close(fd); |
| 455 | + if (unlink_tmp) |
| 456 | + unlink(tmp); |
| 457 | |
| 458 | free_sums(s); |
| 459 | |