Updated patches to work with the current trunk.
[rsync/rsync-patches.git] / source-filter_dest-filter.diff
CommitLineData
33d38bc8
WD
1CAUTION: This patch compiles, but is otherwise totally untested!
2
3This patch also implements --times-only.
4
5Implementation details for the --source-filter and -dest-filter options:
6
7 - These options open a *HUGE* security hole in daemon mode unless they
8 are refused in your rsyncd.conf!
9
10 - Filtering disables rsync alogrithm. (This should be fixed.)
11
12 - Source filter makes temporary files in /tmp. (Should be overridable.)
13
14 - If source filter fails, data is send unfiltered. (Should be changed
15 to abort.)
16
17 - Failure of destination filter, causes data loss!!! (Should be changed
18 to abort.)
19
20 - If filter changes size of file, you should use --times-only option to
21 prevent repeated transfers of unchanged files.
22
23 - If the COMMAND contains single quotes, option-passing breaks. (Needs
24 to be fixed.)
25
03019e41 26To use this patch, run these commands for a successful build:
33d38bc8 27
03019e41 28 patch -p1 <patches/source-filter_dest-filter.diff
27e96866 29 ./prepare-source
03019e41 30 ./configure (optional if already run)
27e96866
WD
31 make
32
c1ff70aa 33based-on: a01e3b490eb36ccf9e704840e1b6683dab867550
cc3e685d
WD
34diff --git a/generator.c b/generator.c
35--- a/generator.c
36+++ b/generator.c
fc557362 37@@ -64,6 +64,7 @@ extern int append_mode;
33d38bc8
WD
38 extern int make_backups;
39 extern int csum_length;
40 extern int ignore_times;
41+extern int times_only;
42 extern int size_only;
43 extern OFF_T max_size;
93ca4d27 44 extern OFF_T min_size;
c1ff70aa 45@@ -523,7 +524,7 @@ void itemize(const char *fnamecmp, struct file_struct *file, int ndx, int statre
33d38bc8 46 /* Perform our quick-check heuristic for determining if a file is unchanged. */
93ca4d27 47 int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st)
33d38bc8 48 {
1aa236e1
WD
49- if (st->st_size != F_LENGTH(file))
50+ if (!times_only && st->st_size != F_LENGTH(file))
33d38bc8
WD
51 return 0;
52
53 /* if always checksum is set then we use the checksum instead
cc3e685d
WD
54diff --git a/main.c b/main.c
55--- a/main.c
56+++ b/main.c
c1ff70aa 57@@ -153,7 +153,7 @@ pid_t wait_process(pid_t pid, int *status_ptr, int flags)
614291be
WD
58 }
59
60 /* Wait for a process to exit, calling io_flush while waiting. */
61-static void wait_process_with_flush(pid_t pid, int *exit_code_ptr)
62+void wait_process_with_flush(pid_t pid, int *exit_code_ptr)
63 {
64 pid_t waited_pid;
65 int status;
cc3e685d
WD
66diff --git a/options.c b/options.c
67--- a/options.c
68+++ b/options.c
fc557362 69@@ -105,6 +105,7 @@ int safe_symlinks = 0;
33d38bc8 70 int copy_unsafe_links = 0;
fc557362 71 int munge_symlinks = 0;
33d38bc8
WD
72 int size_only = 0;
73+int times_only = 0;
74 int daemon_bwlimit = 0;
75 int bwlimit = 0;
76 int fuzzy_basis = 0;
fc557362 77@@ -164,6 +165,8 @@ char *logfile_name = NULL;
55c1a3b7 78 char *logfile_format = NULL;
a859733e 79 char *stdout_format = NULL;
55c1a3b7 80 char *password_file = NULL;
33d38bc8
WD
81+char *source_filter = NULL;
82+char *dest_filter = NULL;
33d38bc8
WD
83 char *rsync_path = RSYNC_PATH;
84 char *backup_dir = NULL;
55c1a3b7 85 char backup_dir_buf[MAXPATHLEN];
72e5645e 86@@ -740,6 +743,7 @@ void usage(enum logcode F)
33d38bc8 87 rprintf(F," -I, --ignore-times don't skip files that match in size and mod-time\n");
fc557362 88 rprintf(F," -M, --remote-option=OPTION send OPTION to the remote side only\n");
33d38bc8
WD
89 rprintf(F," --size-only skip files that match in size\n");
90+ rprintf(F," --times-only skip files that match in mod-time\n");
91 rprintf(F," --modify-window=NUM compare mod-times with reduced accuracy\n");
92 rprintf(F," -T, --temp-dir=DIR create temporary files in directory DIR\n");
93 rprintf(F," -y, --fuzzy find similar file for basis if no dest file\n");
72e5645e 94@@ -779,6 +783,8 @@ void usage(enum logcode F)
33d38bc8
WD
95 rprintf(F," --write-batch=FILE write a batched update to FILE\n");
96 rprintf(F," --only-write-batch=FILE like --write-batch but w/o updating destination\n");
97 rprintf(F," --read-batch=FILE read a batched update from FILE\n");
98+ rprintf(F," --source-filter=COMMAND filter file through COMMAND at source\n");
99+ rprintf(F," --dest-filter=COMMAND filter file through COMMAND at destination\n");
100 rprintf(F," --protocol=NUM force an older protocol version to be used\n");
9c85142a 101 #ifdef ICONV_OPTION
cc3e685d 102 rprintf(F," --iconv=CONVERT_SPEC request charset conversion of filenames\n");
72e5645e 103@@ -888,6 +894,7 @@ static struct poptOption long_options[] = {
27e96866 104 {"chmod", 0, POPT_ARG_STRING, 0, OPT_CHMOD, 0, 0 },
33d38bc8
WD
105 {"ignore-times", 'I', POPT_ARG_NONE, &ignore_times, 0, 0, 0 },
106 {"size-only", 0, POPT_ARG_NONE, &size_only, 0, 0, 0 },
107+ {"times-only", 0, POPT_ARG_NONE, &times_only , 0, 0, 0 },
e0e47893 108 {"one-file-system", 'x', POPT_ARG_NONE, 0, 'x', 0, 0 },
85096e5e
WD
109 {"no-one-file-system",'x',POPT_ARG_VAL, &one_file_system, 0, 0, 0 },
110 {"no-x", 'x', POPT_ARG_VAL, &one_file_system, 0, 0, 0 },
72e5645e 111@@ -1008,6 +1015,8 @@ static struct poptOption long_options[] = {
489b0a72
WD
112 {"password-file", 0, POPT_ARG_STRING, &password_file, 0, 0, 0 },
113 {"blocking-io", 0, POPT_ARG_VAL, &blocking_io, 1, 0, 0 },
114 {"no-blocking-io", 0, POPT_ARG_VAL, &blocking_io, 0, 0, 0 },
33d38bc8
WD
115+ {"source-filter", 0, POPT_ARG_STRING, &source_filter, 0, 0, 0 },
116+ {"dest-filter", 0, POPT_ARG_STRING, &dest_filter, 0, 0, 0 },
fc557362 117 {"remote-option", 'M', POPT_ARG_STRING, 0, 'M', 0, 0 },
33d38bc8
WD
118 {"protocol", 0, POPT_ARG_INT, &protocol_version, 0, 0, 0 },
119 {"checksum-seed", 0, POPT_ARG_INT, &checksum_seed, 0, 0, 0 },
c1ff70aa 120@@ -2201,6 +2210,16 @@ int parse_arguments(int *argc_p, const char ***argv_p)
33d38bc8
WD
121 }
122 }
123
124+ if (source_filter || dest_filter) {
125+ if (whole_file == 0) {
126+ snprintf(err_buf, sizeof err_buf,
127+ "--no-whole-file cannot be used with --%s-filter\n",
128+ source_filter ? "source" : "dest");
129+ return 0;
130+ }
131+ whole_file = 1;
132+ }
133+
134 if (files_from) {
135 char *h, *p;
136 int q;
c1ff70aa 137@@ -2545,6 +2564,25 @@ void server_options(char **args, int *argc_p)
fc557362
WD
138 else if (missing_args == 1 && !am_sender)
139 args[ac++] = "--ignore-missing-args";
33d38bc8
WD
140
141+ if (times_only && am_sender)
142+ args[ac++] = "--times-only";
143+
144+ if (source_filter && !am_sender) {
145+ /* Need to single quote the arg to keep the remote shell
146+ * from splitting it. FIXME: breaks if command has single quotes. */
147+ if (asprintf(&arg, "--source-filter='%s'", source_filter) < 0)
148+ goto oom;
149+ args[ac++] = arg;
150+ }
151+
152+ if (dest_filter && am_sender) {
153+ /* Need to single quote the arg to keep the remote shell
154+ * from splitting it. FIXME: breaks if command has single quotes. */
155+ if (asprintf(&arg, "--dest-filter='%s'", dest_filter) < 0)
156+ goto oom;
157+ args[ac++] = arg;
158+ }
159+
ccc3a12c
WD
160 if (modify_window_set) {
161 if (asprintf(&arg, "--modify-window=%d", modify_window) < 0)
162 goto oom;
cc3e685d
WD
163diff --git a/pipe.c b/pipe.c
164--- a/pipe.c
165+++ b/pipe.c
fc557362 166@@ -180,3 +180,77 @@ pid_t local_child(int argc, char **argv, int *f_in, int *f_out,
33d38bc8
WD
167
168 return pid;
169 }
170+
171+pid_t run_filter(char *command[], int out, int *pipe_to_filter)
172+{
173+ pid_t pid;
174+ int pipefds[2];
e2e42a01 175+
fc557362 176+ if (DEBUG_GTE(CMD, 1))
7bfcb297 177+ print_child_argv("opening connection using:", command);
33d38bc8
WD
178+
179+ if (pipe(pipefds) < 0) {
180+ rsyserr(FERROR, errno, "pipe");
181+ exit_cleanup(RERR_IPC);
182+ }
183+
184+ pid = do_fork();
185+ if (pid == -1) {
186+ rsyserr(FERROR, errno, "fork");
187+ exit_cleanup(RERR_IPC);
188+ }
189+
190+ if (pid == 0) {
191+ if (dup2(pipefds[0], STDIN_FILENO) < 0
192+ || close(pipefds[1]) < 0
193+ || dup2(out, STDOUT_FILENO) < 0) {
194+ rsyserr(FERROR, errno, "Failed dup/close");
195+ exit_cleanup(RERR_IPC);
196+ }
197+ umask(orig_umask);
198+ set_blocking(STDIN_FILENO);
199+ if (blocking_io)
200+ set_blocking(STDOUT_FILENO);
201+ execvp(command[0], command);
93ca4d27 202+ rsyserr(FERROR, errno, "Failed to exec %s", command[0]);
33d38bc8
WD
203+ exit_cleanup(RERR_IPC);
204+ }
205+
206+ if (close(pipefds[0]) < 0) {
207+ rsyserr(FERROR, errno, "Failed to close");
208+ exit_cleanup(RERR_IPC);
209+ }
210+
211+ *pipe_to_filter = pipefds[1];
212+
213+ return pid;
214+}
215+
216+pid_t run_filter_on_file(char *command[], int out, int in)
217+{
218+ pid_t pid;
e2e42a01 219+
fc557362 220+ if (DEBUG_GTE(CMD, 1))
7bfcb297 221+ print_child_argv("opening connection using:", command);
33d38bc8
WD
222+
223+ pid = do_fork();
224+ if (pid == -1) {
225+ rsyserr(FERROR, errno, "fork");
226+ exit_cleanup(RERR_IPC);
227+ }
228+
229+ if (pid == 0) {
230+ if (dup2(in, STDIN_FILENO) < 0
231+ || dup2(out, STDOUT_FILENO) < 0) {
232+ rsyserr(FERROR, errno, "Failed to dup2");
233+ exit_cleanup(RERR_IPC);
234+ }
235+ if (blocking_io)
236+ set_blocking(STDOUT_FILENO);
237+ execvp(command[0], command);
93ca4d27 238+ rsyserr(FERROR, errno, "Failed to exec %s", command[0]);
33d38bc8
WD
239+ exit_cleanup(RERR_IPC);
240+ }
241+
242+ return pid;
243+}
cc3e685d
WD
244diff --git a/receiver.c b/receiver.c
245--- a/receiver.c
246+++ b/receiver.c
7bfcb297 247@@ -52,6 +52,7 @@ extern int delay_updates;
9c85142a
WD
248 extern mode_t orig_umask;
249 extern struct stats stats;
33d38bc8 250 extern char *tmpdir;
9c85142a 251+extern char *dest_filter;
33d38bc8 252 extern char *partial_dir;
fc557362
WD
253 extern char *basis_dir[MAX_BASIS_DIRS+1];
254 extern char sender_file_sum[MAX_DIGEST_LEN];
5214a41b 255@@ -475,6 +476,8 @@ int recv_files(int f_in, int f_out, char *local_name)
5ff5e82f
WD
256 const char *parent_dirname = "";
257 #endif
81ecd8e0 258 int ndx, recv_ok;
33d38bc8
WD
259+ pid_t pid = 0;
260+ char *filter_argv[MAX_FILTER_ARGS + 1];
261
fc557362 262 if (DEBUG_GTE(RECV, 1))
9c85142a 263 rprintf(FINFO, "recv_files(%d) starting\n", cur_flist->used);
5214a41b 264@@ -482,6 +485,23 @@ int recv_files(int f_in, int f_out, char *local_name)
76c553f7
WD
265 if (delay_updates)
266 delayed_bits = bitbag_create(cur_flist->used + 1);
33d38bc8
WD
267
268+ if (dest_filter) {
269+ char *p;
270+ char *sep = " \t";
271+ int i;
272+ for (p = strtok(dest_filter, sep), i = 0;
273+ p && i < MAX_FILTER_ARGS;
274+ p = strtok(0, sep))
275+ filter_argv[i++] = p;
276+ filter_argv[i] = NULL;
277+ if (p) {
278+ rprintf(FERROR,
279+ "Too many arguments to dest-filter (> %d)\n",
280+ MAX_FILTER_ARGS);
281+ exit_cleanup(RERR_SYNTAX);
282+ }
283+ }
284+
285 while (1) {
286 cleanup_disable();
287
72e5645e 288@@ -777,6 +797,9 @@ int recv_files(int f_in, int f_out, char *local_name)
fc557362 289 else if (!am_server && INFO_GTE(NAME, 1) && INFO_EQ(PROGRESS, 1))
93ca4d27 290 rprintf(FINFO, "%s\n", fname);
33d38bc8
WD
291
292+ if (dest_filter)
293+ pid = run_filter(filter_argv, fd2, &fd2);
294+
295 /* recv file data */
296 recv_ok = receive_data(f_in, fnamecmp, fd1, st.st_size,
1aa236e1 297 fname, fd2, F_LENGTH(file));
72e5645e 298@@ -791,6 +814,16 @@ int recv_files(int f_in, int f_out, char *local_name)
33d38bc8
WD
299 exit_cleanup(RERR_FILEIO);
300 }
301
302+ if (dest_filter) {
303+ int status;
614291be 304+ wait_process_with_flush(pid, &status);
33d38bc8
WD
305+ if (status != 0) {
306+ rprintf(FERROR, "filter %s exited code: %d\n",
307+ dest_filter, status);
308+ continue;
309+ }
310+ }
311+
312 if ((recv_ok && (!delay_updates || !partialptr)) || inplace) {
55c1a3b7 313 if (partialptr == fname)
4c15e800 314 partialptr = NULL;
cc3e685d
WD
315diff --git a/rsync.h b/rsync.h
316--- a/rsync.h
317+++ b/rsync.h
5214a41b 318@@ -143,6 +143,7 @@
33d38bc8
WD
319 #define IOERR_DEL_LIMIT (1<<2)
320
321 #define MAX_ARGS 1000
322+#define MAX_FILTER_ARGS 100
323 #define MAX_BASIS_DIRS 20
324 #define MAX_SERVER_ARGS (MAX_BASIS_DIRS*2 + 100)
325
cc3e685d
WD
326diff --git a/rsync.yo b/rsync.yo
327--- a/rsync.yo
328+++ b/rsync.yo
fc557362 329@@ -394,6 +394,7 @@ to the detailed description below for a complete description. verb(
cc3e685d 330 --contimeout=SECONDS set daemon connection timeout in seconds
33d38bc8
WD
331 -I, --ignore-times don't skip files that match size and time
332 --size-only skip files that match in size
333+ --times-only skip files that match in mod-time
334 --modify-window=NUM compare mod-times with reduced accuracy
335 -T, --temp-dir=DIR create temporary files in directory DIR
336 -y, --fuzzy find similar file for basis if no dest file
fc557362 337@@ -434,6 +435,8 @@ to the detailed description below for a complete description. verb(
33d38bc8
WD
338 --write-batch=FILE write a batched update to FILE
339 --only-write-batch=FILE like --write-batch but w/o updating dest
340 --read-batch=FILE read a batched update from FILE
341+ --source-filter=COMMAND filter file through COMMAND at source
342+ --dest-filter=COMMAND filter file through COMMAND at destination
343 --protocol=NUM force an older protocol version to be used
cc3e685d 344 --iconv=CONVERT_SPEC request charset conversion of filenames
33d38bc8 345 --checksum-seed=NUM set block/file checksum seed (advanced)
c1ff70aa 346@@ -2335,6 +2338,33 @@ file previously generated by bf(--write-batch).
93ca4d27 347 If em(FILE) is bf(-), the batch data will be read from standard input.
33d38bc8
WD
348 See the "BATCH MODE" section for details.
349
350+dit(bf(--source-filter=COMMAND)) This option allows the user to specify a
351+filter program that will be applied to the contents of all transferred
352+regular files before the data is sent to destination. COMMAND will receive
353+the data on its standard input and it should write the filtered data to
354+standard output. COMMAND should exit non-zero if it cannot process the
355+data or if it encounters an error when writing the data to stdout.
356+
357+Example: --source-filter="gzip -9" will cause remote files to be
358+compressed.
359+Use of --source-filter automatically enables --whole-file.
360+If your filter does not output the same number of bytes that it received on
361+input, you should use --times-only to disable size and content checks on
362+subsequent rsync runs.
363+
364+dit(bf(--dest-filter=COMMAND)) This option allows you to specify a filter
365+program that will be applied to the contents of all transferred regular
366+files before the data is written to disk. COMMAND will receive the data on
367+its standard input and it should write the filtered data to standard
368+output. COMMAND should exit non-zero if it cannot process the data or if
369+it encounters an error when writing the data to stdout.
370+
371+Example: --dest-filter="gzip -9" will cause remote files to be compressed.
372+Use of --dest-filter automatically enables --whole-file.
373+If your filter does not output the same number of bytes that it
374+received on input, you should use --times-only to disable size and
375+content checks on subsequent rsync runs.
376+
377 dit(bf(--protocol=NUM)) Force an older protocol version to be used. This
378 is useful for creating a batch file that is compatible with an older
379 version of rsync. For instance, if rsync 2.6.4 is being used with the
cc3e685d
WD
380diff --git a/sender.c b/sender.c
381--- a/sender.c
382+++ b/sender.c
5214a41b 383@@ -43,6 +43,7 @@ extern int inplace;
dd0d95fa 384 extern int batch_fd;
a859733e 385 extern int write_batch;
5214a41b 386 extern int file_old_total;
dd0d95fa 387+extern char *source_filter;
c1ff70aa 388 extern BOOL we_send_keepalive_messages;
33d38bc8 389 extern struct stats stats;
9c85142a 390 extern struct file_list *cur_flist, *first_flist, *dir_flist;
c1ff70aa 391@@ -178,6 +179,26 @@ void send_files(int f_in, int f_out)
33d38bc8 392 int f_xfer = write_batch < 0 ? batch_fd : f_out;
72e5645e 393 int save_io_error = io_error;
fc068916 394 int ndx, j;
33d38bc8
WD
395+ char *filter_argv[MAX_FILTER_ARGS + 1];
396+ char *tmp = 0;
397+ int unlink_tmp = 0;
398+
399+ if (source_filter) {
400+ char *p;
401+ char *sep = " \t";
402+ int i;
403+ for (p = strtok(source_filter, sep), i = 0;
404+ p && i < MAX_FILTER_ARGS;
405+ p = strtok(0, sep))
406+ filter_argv[i++] = p;
407+ filter_argv[i] = NULL;
408+ if (p) {
409+ rprintf(FERROR,
410+ "Too many arguments to source-filter (> %d)\n",
411+ MAX_FILTER_ARGS);
412+ exit_cleanup(RERR_SYNTAX);
413+ }
414+ }
415
fc557362 416 if (DEBUG_GTE(SEND, 1))
33d38bc8 417 rprintf(FINFO, "send_files starting\n");
c1ff70aa 418@@ -311,6 +332,7 @@ void send_files(int f_in, int f_out)
fc068916 419 exit_cleanup(RERR_PROTOCOL);
33d38bc8
WD
420 }
421
422+ unlink_tmp = 0;
423 fd = do_open(fname, O_RDONLY, 0);
424 if (fd == -1) {
425 if (errno == ENOENT) {
c1ff70aa 426@@ -332,6 +354,33 @@ void send_files(int f_in, int f_out)
44cedd19 427 continue;
33d38bc8
WD
428 }
429
430+ if (source_filter) {
431+ int fd2;
432+ char *tmpl = "/tmp/rsync-filtered_sourceXXXXXX";
433+
434+ tmp = strdup(tmpl);
435+ fd2 = mkstemp(tmp);
436+ if (fd2 == -1) {
437+ rprintf(FERROR, "mkstemp %s failed: %s\n",
438+ tmp, strerror(errno));
439+ } else {
440+ int status;
441+ pid_t pid = run_filter_on_file(filter_argv, fd2, fd);
442+ close(fd);
443+ close(fd2);
614291be 444+ wait_process_with_flush(pid, &status);
33d38bc8
WD
445+ if (status != 0) {
446+ rprintf(FERROR,
447+ "bypassing source filter %s; exited with code: %d\n",
448+ source_filter, status);
449+ fd = do_open(fname, O_RDONLY, 0);
450+ } else {
451+ fd = do_open(tmp, O_RDONLY, 0);
452+ unlink_tmp = 1;
453+ }
454+ }
455+ }
456+
44cedd19
WD
457 /* map the local file */
458 if (do_fstat(fd, &st) != 0) {
459 io_error |= IOERR_GENERAL;
c1ff70aa 460@@ -382,6 +431,8 @@ void send_files(int f_in, int f_out)
33d38bc8
WD
461 }
462 }
463 close(fd);
464+ if (unlink_tmp)
465+ unlink(tmp);
466
467 free_sums(s);
468