Use "use warnings" rather than -w on the #! line.
[rsync/rsync-patches.git] / source-filter_dest-filter.diff
CommitLineData
33d38bc8
WD
1CAUTION: This patch compiles, but is otherwise totally untested!
2
3This patch also implements --times-only.
4
5Implementation details for the --source-filter and -dest-filter options:
6
7 - These options open a *HUGE* security hole in daemon mode unless they
8 are refused in your rsyncd.conf!
9
10 - Filtering disables rsync alogrithm. (This should be fixed.)
11
12 - Source filter makes temporary files in /tmp. (Should be overridable.)
13
14 - If source filter fails, data is send unfiltered. (Should be changed
15 to abort.)
16
17 - Failure of destination filter, causes data loss!!! (Should be changed
18 to abort.)
19
20 - If filter changes size of file, you should use --times-only option to
21 prevent repeated transfers of unchanged files.
22
23 - If the COMMAND contains single quotes, option-passing breaks. (Needs
24 to be fixed.)
25
03019e41 26To use this patch, run these commands for a successful build:
33d38bc8 27
03019e41 28 patch -p1 <patches/source-filter_dest-filter.diff
27e96866 29 ./prepare-source
03019e41 30 ./configure (optional if already run)
27e96866
WD
31 make
32
cc3e685d
WD
33diff --git a/generator.c b/generator.c
34--- a/generator.c
35+++ b/generator.c
c0c7984e 36@@ -62,6 +62,7 @@ extern int append_mode;
33d38bc8
WD
37 extern int make_backups;
38 extern int csum_length;
39 extern int ignore_times;
40+extern int times_only;
41 extern int size_only;
42 extern OFF_T max_size;
93ca4d27 43 extern OFF_T min_size;
abd3adb8 44@@ -717,7 +718,7 @@ void itemize(const char *fnamecmp, struct file_struct *file, int ndx, int statre
33d38bc8 45 /* Perform our quick-check heuristic for determining if a file is unchanged. */
93ca4d27 46 int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st)
33d38bc8 47 {
1aa236e1
WD
48- if (st->st_size != F_LENGTH(file))
49+ if (!times_only && st->st_size != F_LENGTH(file))
33d38bc8
WD
50 return 0;
51
52 /* if always checksum is set then we use the checksum instead
cc3e685d
WD
53diff --git a/main.c b/main.c
54--- a/main.c
55+++ b/main.c
4c107044 56@@ -136,7 +136,7 @@ pid_t wait_process(pid_t pid, int *status_ptr, int flags)
614291be
WD
57 }
58
59 /* Wait for a process to exit, calling io_flush while waiting. */
60-static void wait_process_with_flush(pid_t pid, int *exit_code_ptr)
61+void wait_process_with_flush(pid_t pid, int *exit_code_ptr)
62 {
63 pid_t waited_pid;
64 int status;
cc3e685d
WD
65diff --git a/options.c b/options.c
66--- a/options.c
67+++ b/options.c
c0c7984e 68@@ -106,6 +106,7 @@ int keep_partial = 0;
33d38bc8
WD
69 int safe_symlinks = 0;
70 int copy_unsafe_links = 0;
71 int size_only = 0;
72+int times_only = 0;
73 int daemon_bwlimit = 0;
74 int bwlimit = 0;
75 int fuzzy_basis = 0;
c0c7984e 76@@ -163,6 +164,8 @@ char *logfile_name = NULL;
55c1a3b7 77 char *logfile_format = NULL;
a859733e 78 char *stdout_format = NULL;
55c1a3b7 79 char *password_file = NULL;
33d38bc8
WD
80+char *source_filter = NULL;
81+char *dest_filter = NULL;
33d38bc8
WD
82 char *rsync_path = RSYNC_PATH;
83 char *backup_dir = NULL;
55c1a3b7 84 char backup_dir_buf[MAXPATHLEN];
abd3adb8 85@@ -389,6 +392,7 @@ void usage(enum logcode F)
cc3e685d 86 rprintf(F," --contimeout=SECONDS set daemon connection timeout in seconds\n");
33d38bc8
WD
87 rprintf(F," -I, --ignore-times don't skip files that match in size and mod-time\n");
88 rprintf(F," --size-only skip files that match in size\n");
89+ rprintf(F," --times-only skip files that match in mod-time\n");
90 rprintf(F," --modify-window=NUM compare mod-times with reduced accuracy\n");
91 rprintf(F," -T, --temp-dir=DIR create temporary files in directory DIR\n");
92 rprintf(F," -y, --fuzzy find similar file for basis if no dest file\n");
abd3adb8 93@@ -428,6 +432,8 @@ void usage(enum logcode F)
33d38bc8
WD
94 rprintf(F," --write-batch=FILE write a batched update to FILE\n");
95 rprintf(F," --only-write-batch=FILE like --write-batch but w/o updating destination\n");
96 rprintf(F," --read-batch=FILE read a batched update from FILE\n");
97+ rprintf(F," --source-filter=COMMAND filter file through COMMAND at source\n");
98+ rprintf(F," --dest-filter=COMMAND filter file through COMMAND at destination\n");
99 rprintf(F," --protocol=NUM force an older protocol version to be used\n");
9c85142a 100 #ifdef ICONV_OPTION
cc3e685d 101 rprintf(F," --iconv=CONVERT_SPEC request charset conversion of filenames\n");
abd3adb8 102@@ -531,6 +537,7 @@ static struct poptOption long_options[] = {
27e96866 103 {"chmod", 0, POPT_ARG_STRING, 0, OPT_CHMOD, 0, 0 },
33d38bc8
WD
104 {"ignore-times", 'I', POPT_ARG_NONE, &ignore_times, 0, 0, 0 },
105 {"size-only", 0, POPT_ARG_NONE, &size_only, 0, 0, 0 },
106+ {"times-only", 0, POPT_ARG_NONE, &times_only , 0, 0, 0 },
e0e47893 107 {"one-file-system", 'x', POPT_ARG_NONE, 0, 'x', 0, 0 },
85096e5e
WD
108 {"no-one-file-system",'x',POPT_ARG_VAL, &one_file_system, 0, 0, 0 },
109 {"no-x", 'x', POPT_ARG_VAL, &one_file_system, 0, 0, 0 },
abd3adb8 110@@ -646,6 +653,8 @@ static struct poptOption long_options[] = {
489b0a72
WD
111 {"password-file", 0, POPT_ARG_STRING, &password_file, 0, 0, 0 },
112 {"blocking-io", 0, POPT_ARG_VAL, &blocking_io, 1, 0, 0 },
113 {"no-blocking-io", 0, POPT_ARG_VAL, &blocking_io, 0, 0, 0 },
33d38bc8
WD
114+ {"source-filter", 0, POPT_ARG_STRING, &source_filter, 0, 0, 0 },
115+ {"dest-filter", 0, POPT_ARG_STRING, &dest_filter, 0, 0, 0 },
116 {"protocol", 0, POPT_ARG_INT, &protocol_version, 0, 0, 0 },
117 {"checksum-seed", 0, POPT_ARG_INT, &checksum_seed, 0, 0, 0 },
27e96866 118 {"server", 0, POPT_ARG_NONE, 0, OPT_SERVER, 0, 0 },
abd3adb8 119@@ -1635,6 +1644,16 @@ int parse_arguments(int *argc_p, const char ***argv_p)
33d38bc8
WD
120 }
121 }
122
123+ if (source_filter || dest_filter) {
124+ if (whole_file == 0) {
125+ snprintf(err_buf, sizeof err_buf,
126+ "--no-whole-file cannot be used with --%s-filter\n",
127+ source_filter ? "source" : "dest");
128+ return 0;
129+ }
130+ whole_file = 1;
131+ }
132+
133 if (files_from) {
134 char *h, *p;
135 int q;
abd3adb8 136@@ -1969,6 +1988,25 @@ void server_options(char **args, int *argc_p)
7bfcb297 137 }
33d38bc8
WD
138 }
139
140+ if (times_only && am_sender)
141+ args[ac++] = "--times-only";
142+
143+ if (source_filter && !am_sender) {
144+ /* Need to single quote the arg to keep the remote shell
145+ * from splitting it. FIXME: breaks if command has single quotes. */
146+ if (asprintf(&arg, "--source-filter='%s'", source_filter) < 0)
147+ goto oom;
148+ args[ac++] = arg;
149+ }
150+
151+ if (dest_filter && am_sender) {
152+ /* Need to single quote the arg to keep the remote shell
153+ * from splitting it. FIXME: breaks if command has single quotes. */
154+ if (asprintf(&arg, "--dest-filter='%s'", dest_filter) < 0)
155+ goto oom;
156+ args[ac++] = arg;
157+ }
158+
ccc3a12c
WD
159 if (modify_window_set) {
160 if (asprintf(&arg, "--modify-window=%d", modify_window) < 0)
161 goto oom;
cc3e685d
WD
162diff --git a/pipe.c b/pipe.c
163--- a/pipe.c
164+++ b/pipe.c
165@@ -167,3 +167,77 @@ pid_t local_child(int argc, char **argv, int *f_in, int *f_out,
33d38bc8
WD
166
167 return pid;
168 }
169+
170+pid_t run_filter(char *command[], int out, int *pipe_to_filter)
171+{
172+ pid_t pid;
173+ int pipefds[2];
e2e42a01 174+
33d38bc8 175+ if (verbose >= 2)
7bfcb297 176+ print_child_argv("opening connection using:", command);
33d38bc8
WD
177+
178+ if (pipe(pipefds) < 0) {
179+ rsyserr(FERROR, errno, "pipe");
180+ exit_cleanup(RERR_IPC);
181+ }
182+
183+ pid = do_fork();
184+ if (pid == -1) {
185+ rsyserr(FERROR, errno, "fork");
186+ exit_cleanup(RERR_IPC);
187+ }
188+
189+ if (pid == 0) {
190+ if (dup2(pipefds[0], STDIN_FILENO) < 0
191+ || close(pipefds[1]) < 0
192+ || dup2(out, STDOUT_FILENO) < 0) {
193+ rsyserr(FERROR, errno, "Failed dup/close");
194+ exit_cleanup(RERR_IPC);
195+ }
196+ umask(orig_umask);
197+ set_blocking(STDIN_FILENO);
198+ if (blocking_io)
199+ set_blocking(STDOUT_FILENO);
200+ execvp(command[0], command);
93ca4d27 201+ rsyserr(FERROR, errno, "Failed to exec %s", command[0]);
33d38bc8
WD
202+ exit_cleanup(RERR_IPC);
203+ }
204+
205+ if (close(pipefds[0]) < 0) {
206+ rsyserr(FERROR, errno, "Failed to close");
207+ exit_cleanup(RERR_IPC);
208+ }
209+
210+ *pipe_to_filter = pipefds[1];
211+
212+ return pid;
213+}
214+
215+pid_t run_filter_on_file(char *command[], int out, int in)
216+{
217+ pid_t pid;
e2e42a01 218+
33d38bc8 219+ if (verbose >= 2)
7bfcb297 220+ print_child_argv("opening connection using:", command);
33d38bc8
WD
221+
222+ pid = do_fork();
223+ if (pid == -1) {
224+ rsyserr(FERROR, errno, "fork");
225+ exit_cleanup(RERR_IPC);
226+ }
227+
228+ if (pid == 0) {
229+ if (dup2(in, STDIN_FILENO) < 0
230+ || dup2(out, STDOUT_FILENO) < 0) {
231+ rsyserr(FERROR, errno, "Failed to dup2");
232+ exit_cleanup(RERR_IPC);
233+ }
234+ if (blocking_io)
235+ set_blocking(STDOUT_FILENO);
236+ execvp(command[0], command);
93ca4d27 237+ rsyserr(FERROR, errno, "Failed to exec %s", command[0]);
33d38bc8
WD
238+ exit_cleanup(RERR_IPC);
239+ }
240+
241+ return pid;
242+}
cc3e685d
WD
243diff --git a/receiver.c b/receiver.c
244--- a/receiver.c
245+++ b/receiver.c
7bfcb297 246@@ -52,6 +52,7 @@ extern int delay_updates;
9c85142a
WD
247 extern mode_t orig_umask;
248 extern struct stats stats;
33d38bc8 249 extern char *tmpdir;
9c85142a 250+extern char *dest_filter;
33d38bc8 251 extern char *partial_dir;
dd0d95fa 252 extern char *basis_dir[];
9c85142a 253 extern struct file_list *cur_flist, *first_flist, *dir_flist;
abd3adb8 254@@ -433,6 +434,8 @@ int recv_files(int f_in, char *local_name)
5ff5e82f
WD
255 const char *parent_dirname = "";
256 #endif
81ecd8e0 257 int ndx, recv_ok;
33d38bc8
WD
258+ pid_t pid = 0;
259+ char *filter_argv[MAX_FILTER_ARGS + 1];
260
261 if (verbose > 2)
9c85142a 262 rprintf(FINFO, "recv_files(%d) starting\n", cur_flist->used);
abd3adb8 263@@ -440,6 +443,23 @@ int recv_files(int f_in, char *local_name)
76c553f7
WD
264 if (delay_updates)
265 delayed_bits = bitbag_create(cur_flist->used + 1);
33d38bc8
WD
266
267+ if (dest_filter) {
268+ char *p;
269+ char *sep = " \t";
270+ int i;
271+ for (p = strtok(dest_filter, sep), i = 0;
272+ p && i < MAX_FILTER_ARGS;
273+ p = strtok(0, sep))
274+ filter_argv[i++] = p;
275+ filter_argv[i] = NULL;
276+ if (p) {
277+ rprintf(FERROR,
278+ "Too many arguments to dest-filter (> %d)\n",
279+ MAX_FILTER_ARGS);
280+ exit_cleanup(RERR_SYNTAX);
281+ }
282+ }
283+
284 while (1) {
285 cleanup_disable();
286
abd3adb8 287@@ -706,6 +726,9 @@ int recv_files(int f_in, char *local_name)
33d38bc8 288 else if (!am_server && verbose && do_progress)
93ca4d27 289 rprintf(FINFO, "%s\n", fname);
33d38bc8
WD
290
291+ if (dest_filter)
292+ pid = run_filter(filter_argv, fd2, &fd2);
293+
294 /* recv file data */
295 recv_ok = receive_data(f_in, fnamecmp, fd1, st.st_size,
1aa236e1 296 fname, fd2, F_LENGTH(file));
abd3adb8 297@@ -720,6 +743,16 @@ int recv_files(int f_in, char *local_name)
33d38bc8
WD
298 exit_cleanup(RERR_FILEIO);
299 }
300
301+ if (dest_filter) {
302+ int status;
614291be 303+ wait_process_with_flush(pid, &status);
33d38bc8
WD
304+ if (status != 0) {
305+ rprintf(FERROR, "filter %s exited code: %d\n",
306+ dest_filter, status);
307+ continue;
308+ }
309+ }
310+
311 if ((recv_ok && (!delay_updates || !partialptr)) || inplace) {
55c1a3b7 312 if (partialptr == fname)
4c15e800 313 partialptr = NULL;
cc3e685d
WD
314diff --git a/rsync.h b/rsync.h
315--- a/rsync.h
316+++ b/rsync.h
e8972101 317@@ -137,6 +137,7 @@
33d38bc8
WD
318 #define IOERR_DEL_LIMIT (1<<2)
319
320 #define MAX_ARGS 1000
321+#define MAX_FILTER_ARGS 100
322 #define MAX_BASIS_DIRS 20
323 #define MAX_SERVER_ARGS (MAX_BASIS_DIRS*2 + 100)
324
cc3e685d
WD
325diff --git a/rsync.yo b/rsync.yo
326--- a/rsync.yo
327+++ b/rsync.yo
abd3adb8 328@@ -386,6 +386,7 @@ to the detailed description below for a complete description. verb(
cc3e685d 329 --contimeout=SECONDS set daemon connection timeout in seconds
33d38bc8
WD
330 -I, --ignore-times don't skip files that match size and time
331 --size-only skip files that match in size
332+ --times-only skip files that match in mod-time
333 --modify-window=NUM compare mod-times with reduced accuracy
334 -T, --temp-dir=DIR create temporary files in directory DIR
335 -y, --fuzzy find similar file for basis if no dest file
abd3adb8 336@@ -425,6 +426,8 @@ to the detailed description below for a complete description. verb(
33d38bc8
WD
337 --write-batch=FILE write a batched update to FILE
338 --only-write-batch=FILE like --write-batch but w/o updating dest
339 --read-batch=FILE read a batched update from FILE
340+ --source-filter=COMMAND filter file through COMMAND at source
341+ --dest-filter=COMMAND filter file through COMMAND at destination
342 --protocol=NUM force an older protocol version to be used
cc3e685d 343 --iconv=CONVERT_SPEC request charset conversion of filenames
33d38bc8 344 --checksum-seed=NUM set block/file checksum seed (advanced)
abd3adb8 345@@ -2055,6 +2058,33 @@ file previously generated by bf(--write-batch).
93ca4d27 346 If em(FILE) is bf(-), the batch data will be read from standard input.
33d38bc8
WD
347 See the "BATCH MODE" section for details.
348
349+dit(bf(--source-filter=COMMAND)) This option allows the user to specify a
350+filter program that will be applied to the contents of all transferred
351+regular files before the data is sent to destination. COMMAND will receive
352+the data on its standard input and it should write the filtered data to
353+standard output. COMMAND should exit non-zero if it cannot process the
354+data or if it encounters an error when writing the data to stdout.
355+
356+Example: --source-filter="gzip -9" will cause remote files to be
357+compressed.
358+Use of --source-filter automatically enables --whole-file.
359+If your filter does not output the same number of bytes that it received on
360+input, you should use --times-only to disable size and content checks on
361+subsequent rsync runs.
362+
363+dit(bf(--dest-filter=COMMAND)) This option allows you to specify a filter
364+program that will be applied to the contents of all transferred regular
365+files before the data is written to disk. COMMAND will receive the data on
366+its standard input and it should write the filtered data to standard
367+output. COMMAND should exit non-zero if it cannot process the data or if
368+it encounters an error when writing the data to stdout.
369+
370+Example: --dest-filter="gzip -9" will cause remote files to be compressed.
371+Use of --dest-filter automatically enables --whole-file.
372+If your filter does not output the same number of bytes that it
373+received on input, you should use --times-only to disable size and
374+content checks on subsequent rsync runs.
375+
376 dit(bf(--protocol=NUM)) Force an older protocol version to be used. This
377 is useful for creating a batch file that is compatible with an older
378 version of rsync. For instance, if rsync 2.6.4 is being used with the
cc3e685d
WD
379diff --git a/sender.c b/sender.c
380--- a/sender.c
381+++ b/sender.c
7bfcb297 382@@ -43,6 +43,7 @@ extern int do_progress;
dd0d95fa
WD
383 extern int inplace;
384 extern int batch_fd;
a859733e 385 extern int write_batch;
dd0d95fa 386+extern char *source_filter;
33d38bc8 387 extern struct stats stats;
9c85142a 388 extern struct file_list *cur_flist, *first_flist, *dir_flist;
33d38bc8 389
4c107044 390@@ -175,6 +176,26 @@ void send_files(int f_in, int f_out)
a859733e 391 enum logcode log_code = log_before_transfer ? FLOG : FINFO;
33d38bc8 392 int f_xfer = write_batch < 0 ? batch_fd : f_out;
fc068916 393 int ndx, j;
33d38bc8
WD
394+ char *filter_argv[MAX_FILTER_ARGS + 1];
395+ char *tmp = 0;
396+ int unlink_tmp = 0;
397+
398+ if (source_filter) {
399+ char *p;
400+ char *sep = " \t";
401+ int i;
402+ for (p = strtok(source_filter, sep), i = 0;
403+ p && i < MAX_FILTER_ARGS;
404+ p = strtok(0, sep))
405+ filter_argv[i++] = p;
406+ filter_argv[i] = NULL;
407+ if (p) {
408+ rprintf(FERROR,
409+ "Too many arguments to source-filter (> %d)\n",
410+ MAX_FILTER_ARGS);
411+ exit_cleanup(RERR_SYNTAX);
412+ }
413+ }
414
415 if (verbose > 2)
416 rprintf(FINFO, "send_files starting\n");
4c107044 417@@ -279,6 +300,7 @@ void send_files(int f_in, int f_out)
fc068916 418 exit_cleanup(RERR_PROTOCOL);
33d38bc8
WD
419 }
420
421+ unlink_tmp = 0;
422 fd = do_open(fname, O_RDONLY, 0);
423 if (fd == -1) {
424 if (errno == ENOENT) {
4c107044 425@@ -300,6 +322,33 @@ void send_files(int f_in, int f_out)
44cedd19 426 continue;
33d38bc8
WD
427 }
428
429+ if (source_filter) {
430+ int fd2;
431+ char *tmpl = "/tmp/rsync-filtered_sourceXXXXXX";
432+
433+ tmp = strdup(tmpl);
434+ fd2 = mkstemp(tmp);
435+ if (fd2 == -1) {
436+ rprintf(FERROR, "mkstemp %s failed: %s\n",
437+ tmp, strerror(errno));
438+ } else {
439+ int status;
440+ pid_t pid = run_filter_on_file(filter_argv, fd2, fd);
441+ close(fd);
442+ close(fd2);
614291be 443+ wait_process_with_flush(pid, &status);
33d38bc8
WD
444+ if (status != 0) {
445+ rprintf(FERROR,
446+ "bypassing source filter %s; exited with code: %d\n",
447+ source_filter, status);
448+ fd = do_open(fname, O_RDONLY, 0);
449+ } else {
450+ fd = do_open(tmp, O_RDONLY, 0);
451+ unlink_tmp = 1;
452+ }
453+ }
454+ }
455+
44cedd19
WD
456 /* map the local file */
457 if (do_fstat(fd, &st) != 0) {
458 io_error |= IOERR_GENERAL;
4c107044 459@@ -350,6 +399,8 @@ void send_files(int f_in, int f_out)
33d38bc8
WD
460 }
461 }
462 close(fd);
463+ if (unlink_tmp)
464+ unlink(tmp);
465
466 free_sums(s);
467