Updated patches to work with the current trunk.
[rsync/rsync-patches.git] / source-filter_dest-filter.diff
1 CAUTION:  This patch compiles, but is otherwise totally untested!
2
3 This patch also implements --times-only.
4
5 Implementation details for the --source-filter and -dest-filter options:
6
7  - These options open a *HUGE* security hole in daemon mode unless they
8    are refused in your rsyncd.conf!
9
10  - Filtering disables rsync alogrithm. (This should be fixed.)
11
12  - Source filter makes temporary files in /tmp. (Should be overridable.)
13
14  - If source filter fails, data is send unfiltered. (Should be changed
15    to abort.)
16
17  - Failure of destination filter, causes data loss!!! (Should be changed
18    to abort.)
19
20  - If filter changes size of file, you should use --times-only option to
21    prevent repeated transfers of unchanged files.
22
23  - If the COMMAND contains single quotes, option-passing breaks.  (Needs
24    to be fixed.)
25
26 To use this patch, run these commands for a successful build:
27
28     patch -p1 <patches/source-filter_dest-filter.diff
29     ./prepare-source
30     ./configure                                (optional if already run)
31     make
32
33 diff --git a/generator.c b/generator.c
34 index 12007a1..88bd5e7 100644
35 --- a/generator.c
36 +++ b/generator.c
37 @@ -64,6 +64,7 @@ extern int append_mode;
38  extern int make_backups;
39  extern int csum_length;
40  extern int ignore_times;
41 +extern int times_only;
42  extern int size_only;
43  extern OFF_T max_size;
44  extern OFF_T min_size;
45 @@ -524,7 +525,7 @@ void itemize(const char *fnamecmp, struct file_struct *file, int ndx, int statre
46  /* Perform our quick-check heuristic for determining if a file is unchanged. */
47  int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st)
48  {
49 -       if (st->st_size != F_LENGTH(file))
50 +       if (!times_only && st->st_size != F_LENGTH(file))
51                 return 0;
52  
53         /* if always checksum is set then we use the checksum instead
54 diff --git a/main.c b/main.c
55 index 2ef2f47..e7b4a05 100644
56 --- a/main.c
57 +++ b/main.c
58 @@ -140,7 +140,7 @@ pid_t wait_process(pid_t pid, int *status_ptr, int flags)
59  }
60  
61  /* Wait for a process to exit, calling io_flush while waiting. */
62 -static void wait_process_with_flush(pid_t pid, int *exit_code_ptr)
63 +void wait_process_with_flush(pid_t pid, int *exit_code_ptr)
64  {
65         pid_t waited_pid;
66         int status;
67 diff --git a/options.c b/options.c
68 index e7c6c61..059bcbf 100644
69 --- a/options.c
70 +++ b/options.c
71 @@ -105,6 +105,7 @@ int safe_symlinks = 0;
72  int copy_unsafe_links = 0;
73  int munge_symlinks = 0;
74  int size_only = 0;
75 +int times_only = 0;
76  int daemon_bwlimit = 0;
77  int bwlimit = 0;
78  int fuzzy_basis = 0;
79 @@ -164,6 +165,8 @@ char *logfile_name = NULL;
80  char *logfile_format = NULL;
81  char *stdout_format = NULL;
82  char *password_file = NULL;
83 +char *source_filter = NULL;
84 +char *dest_filter = NULL;
85  char *rsync_path = RSYNC_PATH;
86  char *backup_dir = NULL;
87  char backup_dir_buf[MAXPATHLEN];
88 @@ -739,6 +742,7 @@ void usage(enum logcode F)
89    rprintf(F," -I, --ignore-times          don't skip files that match in size and mod-time\n");
90    rprintf(F," -M, --remote-option=OPTION  send OPTION to the remote side only\n");
91    rprintf(F,"     --size-only             skip files that match in size\n");
92 +  rprintf(F,"     --times-only            skip files that match in mod-time\n");
93    rprintf(F,"     --modify-window=NUM     compare mod-times with reduced accuracy\n");
94    rprintf(F," -T, --temp-dir=DIR          create temporary files in directory DIR\n");
95    rprintf(F," -y, --fuzzy                 find similar file for basis if no dest file\n");
96 @@ -778,6 +782,8 @@ void usage(enum logcode F)
97    rprintf(F,"     --write-batch=FILE      write a batched update to FILE\n");
98    rprintf(F,"     --only-write-batch=FILE like --write-batch but w/o updating destination\n");
99    rprintf(F,"     --read-batch=FILE       read a batched update from FILE\n");
100 +  rprintf(F,"     --source-filter=COMMAND filter file through COMMAND at source\n");
101 +  rprintf(F,"     --dest-filter=COMMAND   filter file through COMMAND at destination\n");
102    rprintf(F,"     --protocol=NUM          force an older protocol version to be used\n");
103  #ifdef ICONV_OPTION
104    rprintf(F,"     --iconv=CONVERT_SPEC    request charset conversion of filenames\n");
105 @@ -887,6 +893,7 @@ static struct poptOption long_options[] = {
106    {"chmod",            0,  POPT_ARG_STRING, 0, OPT_CHMOD, 0, 0 },
107    {"ignore-times",    'I', POPT_ARG_NONE,   &ignore_times, 0, 0, 0 },
108    {"size-only",        0,  POPT_ARG_NONE,   &size_only, 0, 0, 0 },
109 +  {"times-only",       0,  POPT_ARG_NONE,   &times_only , 0, 0, 0 },
110    {"one-file-system", 'x', POPT_ARG_NONE,   0, 'x', 0, 0 },
111    {"no-one-file-system",'x',POPT_ARG_VAL,   &one_file_system, 0, 0, 0 },
112    {"no-x",            'x', POPT_ARG_VAL,    &one_file_system, 0, 0, 0 },
113 @@ -1007,6 +1014,8 @@ static struct poptOption long_options[] = {
114    {"password-file",    0,  POPT_ARG_STRING, &password_file, 0, 0, 0 },
115    {"blocking-io",      0,  POPT_ARG_VAL,    &blocking_io, 1, 0, 0 },
116    {"no-blocking-io",   0,  POPT_ARG_VAL,    &blocking_io, 0, 0, 0 },
117 +  {"source-filter",    0,  POPT_ARG_STRING, &source_filter, 0, 0, 0 },
118 +  {"dest-filter",      0,  POPT_ARG_STRING, &dest_filter, 0, 0, 0 },
119    {"remote-option",   'M', POPT_ARG_STRING, 0, 'M', 0, 0 },
120    {"protocol",         0,  POPT_ARG_INT,    &protocol_version, 0, 0, 0 },
121    {"checksum-seed",    0,  POPT_ARG_INT,    &checksum_seed, 0, 0, 0 },
122 @@ -2149,6 +2158,16 @@ int parse_arguments(int *argc_p, const char ***argv_p)
123                 }
124         }
125  
126 +       if (source_filter || dest_filter) {
127 +               if (whole_file == 0) {
128 +                       snprintf(err_buf, sizeof err_buf,
129 +                                "--no-whole-file cannot be used with --%s-filter\n",
130 +                                source_filter ? "source" : "dest");
131 +                       return 0;
132 +               }
133 +               whole_file = 1;
134 +       }
135 +
136         if (files_from) {
137                 char *h, *p;
138                 int q;
139 @@ -2493,6 +2512,25 @@ void server_options(char **args, int *argc_p)
140         else if (missing_args == 1 && !am_sender)
141                 args[ac++] = "--ignore-missing-args";
142  
143 +       if (times_only && am_sender)
144 +               args[ac++] = "--times-only";
145 +
146 +       if (source_filter && !am_sender) {
147 +               /* Need to single quote the arg to keep the remote shell
148 +                * from splitting it.  FIXME: breaks if command has single quotes. */
149 +               if (asprintf(&arg, "--source-filter='%s'", source_filter) < 0)
150 +                       goto oom;
151 +               args[ac++] = arg;
152 +       }
153 +
154 +       if (dest_filter && am_sender) {
155 +               /* Need to single quote the arg to keep the remote shell
156 +                * from splitting it.  FIXME: breaks if command has single quotes. */
157 +               if (asprintf(&arg, "--dest-filter='%s'", dest_filter) < 0)
158 +                       goto oom;
159 +               args[ac++] = arg;
160 +       }
161 +
162         if (modify_window_set) {
163                 if (asprintf(&arg, "--modify-window=%d", modify_window) < 0)
164                         goto oom;
165 diff --git a/pipe.c b/pipe.c
166 index a33117c..43eea31 100644
167 --- a/pipe.c
168 +++ b/pipe.c
169 @@ -180,3 +180,77 @@ pid_t local_child(int argc, char **argv, int *f_in, int *f_out,
170  
171         return pid;
172  }
173 +
174 +pid_t run_filter(char *command[], int out, int *pipe_to_filter)
175 +{
176 +       pid_t pid;
177 +       int pipefds[2];
178 +
179 +       if (DEBUG_GTE(CMD, 1))
180 +               print_child_argv("opening connection using:", command);
181 +
182 +       if (pipe(pipefds) < 0) {
183 +               rsyserr(FERROR, errno, "pipe");
184 +               exit_cleanup(RERR_IPC);
185 +       }
186 +
187 +       pid = do_fork();
188 +       if (pid == -1) {
189 +               rsyserr(FERROR, errno, "fork");
190 +               exit_cleanup(RERR_IPC);
191 +       }
192 +
193 +       if (pid == 0) {
194 +               if (dup2(pipefds[0], STDIN_FILENO) < 0
195 +                || close(pipefds[1]) < 0
196 +                || dup2(out, STDOUT_FILENO) < 0) {
197 +                       rsyserr(FERROR, errno, "Failed dup/close");
198 +                       exit_cleanup(RERR_IPC);
199 +               }
200 +               umask(orig_umask);
201 +               set_blocking(STDIN_FILENO);
202 +               if (blocking_io)
203 +                       set_blocking(STDOUT_FILENO);
204 +               execvp(command[0], command);
205 +               rsyserr(FERROR, errno, "Failed to exec %s", command[0]);
206 +               exit_cleanup(RERR_IPC);
207 +       }
208 +
209 +       if (close(pipefds[0]) < 0) {
210 +               rsyserr(FERROR, errno, "Failed to close");
211 +               exit_cleanup(RERR_IPC);
212 +       }
213 +
214 +       *pipe_to_filter = pipefds[1];
215 +
216 +       return pid;
217 +}
218 +
219 +pid_t run_filter_on_file(char *command[], int out, int in)
220 +{
221 +       pid_t pid;
222 +
223 +       if (DEBUG_GTE(CMD, 1))
224 +               print_child_argv("opening connection using:", command);
225 +
226 +       pid = do_fork();
227 +       if (pid == -1) {
228 +               rsyserr(FERROR, errno, "fork");
229 +               exit_cleanup(RERR_IPC);
230 +       }
231 +
232 +       if (pid == 0) {
233 +               if (dup2(in, STDIN_FILENO) < 0
234 +                || dup2(out, STDOUT_FILENO) < 0) {
235 +                       rsyserr(FERROR, errno, "Failed to dup2");
236 +                       exit_cleanup(RERR_IPC);
237 +               }
238 +               if (blocking_io)
239 +                       set_blocking(STDOUT_FILENO);
240 +               execvp(command[0], command);
241 +               rsyserr(FERROR, errno, "Failed to exec %s", command[0]);
242 +               exit_cleanup(RERR_IPC);
243 +       }
244 +
245 +       return pid;
246 +}
247 diff --git a/receiver.c b/receiver.c
248 index 4325e30..e5ede73 100644
249 --- a/receiver.c
250 +++ b/receiver.c
251 @@ -52,6 +52,7 @@ extern int delay_updates;
252  extern mode_t orig_umask;
253  extern struct stats stats;
254  extern char *tmpdir;
255 +extern char *dest_filter;
256  extern char *partial_dir;
257  extern char *basis_dir[MAX_BASIS_DIRS+1];
258  extern char sender_file_sum[MAX_DIGEST_LEN];
259 @@ -441,6 +442,8 @@ int recv_files(int f_in, char *local_name)
260         const char *parent_dirname = "";
261  #endif
262         int ndx, recv_ok;
263 +       pid_t pid = 0;
264 +       char *filter_argv[MAX_FILTER_ARGS + 1];
265  
266         if (DEBUG_GTE(RECV, 1))
267                 rprintf(FINFO, "recv_files(%d) starting\n", cur_flist->used);
268 @@ -448,6 +451,23 @@ int recv_files(int f_in, char *local_name)
269         if (delay_updates)
270                 delayed_bits = bitbag_create(cur_flist->used + 1);
271  
272 +       if (dest_filter) {
273 +               char *p;
274 +               char *sep = " \t";
275 +               int i;
276 +               for (p = strtok(dest_filter, sep), i = 0;
277 +                    p && i < MAX_FILTER_ARGS;
278 +                    p = strtok(0, sep))
279 +                       filter_argv[i++] = p;
280 +               filter_argv[i] = NULL;
281 +               if (p) {
282 +                       rprintf(FERROR,
283 +                               "Too many arguments to dest-filter (> %d)\n",
284 +                               MAX_FILTER_ARGS);
285 +                       exit_cleanup(RERR_SYNTAX);
286 +               }
287 +       }
288 +
289         while (1) {
290                 cleanup_disable();
291  
292 @@ -742,6 +762,9 @@ int recv_files(int f_in, char *local_name)
293                 else if (!am_server && INFO_GTE(NAME, 1) && INFO_EQ(PROGRESS, 1))
294                         rprintf(FINFO, "%s\n", fname);
295  
296 +               if (dest_filter)
297 +                       pid = run_filter(filter_argv, fd2, &fd2);
298 +
299                 /* recv file data */
300                 recv_ok = receive_data(f_in, fnamecmp, fd1, st.st_size,
301                                        fname, fd2, F_LENGTH(file));
302 @@ -756,6 +779,16 @@ int recv_files(int f_in, char *local_name)
303                         exit_cleanup(RERR_FILEIO);
304                 }
305  
306 +               if (dest_filter) {
307 +                       int status;
308 +                       wait_process_with_flush(pid, &status);
309 +                       if (status != 0) {
310 +                               rprintf(FERROR, "filter %s exited code: %d\n",
311 +                                       dest_filter, status);
312 +                               continue;
313 +                       }
314 +               }
315 +
316                 if ((recv_ok && (!delay_updates || !partialptr)) || inplace) {
317                         if (partialptr == fname)
318                                 partialptr = NULL;
319 diff --git a/rsync.h b/rsync.h
320 index be7cf8a..8d78818 100644
321 --- a/rsync.h
322 +++ b/rsync.h
323 @@ -138,6 +138,7 @@
324  #define IOERR_DEL_LIMIT (1<<2)
325  
326  #define MAX_ARGS 1000
327 +#define MAX_FILTER_ARGS 100
328  #define MAX_BASIS_DIRS 20
329  #define MAX_SERVER_ARGS (MAX_BASIS_DIRS*2 + 100)
330  
331 diff --git a/rsync.yo b/rsync.yo
332 index 941f7a5..edfad5e 100644
333 --- a/rsync.yo
334 +++ b/rsync.yo
335 @@ -394,6 +394,7 @@ to the detailed description below for a complete description.  verb(
336       --contimeout=SECONDS    set daemon connection timeout in seconds
337   -I, --ignore-times          don't skip files that match size and time
338       --size-only             skip files that match in size
339 +     --times-only            skip files that match in mod-time
340       --modify-window=NUM     compare mod-times with reduced accuracy
341   -T, --temp-dir=DIR          create temporary files in directory DIR
342   -y, --fuzzy                 find similar file for basis if no dest file
343 @@ -434,6 +435,8 @@ to the detailed description below for a complete description.  verb(
344       --write-batch=FILE      write a batched update to FILE
345       --only-write-batch=FILE like --write-batch but w/o updating dest
346       --read-batch=FILE       read a batched update from FILE
347 +     --source-filter=COMMAND filter file through COMMAND at source
348 +     --dest-filter=COMMAND   filter file through COMMAND at destination
349       --protocol=NUM          force an older protocol version to be used
350       --iconv=CONVERT_SPEC    request charset conversion of filenames
351       --checksum-seed=NUM     set block/file checksum seed (advanced)
352 @@ -2256,6 +2259,33 @@ file previously generated by bf(--write-batch).
353  If em(FILE) is bf(-), the batch data will be read from standard input.
354  See the "BATCH MODE" section for details.
355  
356 +dit(bf(--source-filter=COMMAND)) This option allows the user to specify a
357 +filter program that will be applied to the contents of all transferred
358 +regular files before the data is sent to destination.  COMMAND will receive
359 +the data on its standard input and it should write the filtered data to
360 +standard output.  COMMAND should exit non-zero if it cannot process the
361 +data or if it encounters an error when writing the data to stdout.
362 +
363 +Example: --source-filter="gzip -9" will cause remote files to be
364 +compressed.
365 +Use of --source-filter automatically enables --whole-file.
366 +If your filter does not output the same number of bytes that it received on
367 +input, you should use --times-only to disable size and content checks on
368 +subsequent rsync runs.
369 +
370 +dit(bf(--dest-filter=COMMAND)) This option allows you to specify a filter
371 +program that will be applied to the contents of all transferred regular
372 +files before the data is written to disk.  COMMAND will receive the data on
373 +its standard input and it should write the filtered data to standard
374 +output.  COMMAND should exit non-zero if it cannot process the data or if
375 +it encounters an error when writing the data to stdout.
376 +
377 +Example: --dest-filter="gzip -9" will cause remote files to be compressed.
378 +Use of --dest-filter automatically enables --whole-file.
379 +If your filter does not output the same number of bytes that it
380 +received on input, you should use --times-only to disable size and
381 +content checks on subsequent rsync runs.
382 +
383  dit(bf(--protocol=NUM)) Force an older protocol version to be used.  This
384  is useful for creating a batch file that is compatible with an older
385  version of rsync.  For instance, if rsync 2.6.4 is being used with the
386 diff --git a/sender.c b/sender.c
387 index bf8221d..f315f80 100644
388 --- a/sender.c
389 +++ b/sender.c
390 @@ -42,6 +42,7 @@ extern int make_backups;
391  extern int inplace;
392  extern int batch_fd;
393  extern int write_batch;
394 +extern char *source_filter;
395  extern struct stats stats;
396  extern struct file_list *cur_flist, *first_flist, *dir_flist;
397  
398 @@ -174,6 +175,26 @@ void send_files(int f_in, int f_out)
399         enum logcode log_code = log_before_transfer ? FLOG : FINFO;
400         int f_xfer = write_batch < 0 ? batch_fd : f_out;
401         int ndx, j;
402 +       char *filter_argv[MAX_FILTER_ARGS + 1];
403 +       char *tmp = 0;
404 +       int unlink_tmp = 0;
405 +
406 +       if (source_filter) {
407 +               char *p;
408 +               char *sep = " \t";
409 +               int i;
410 +               for (p = strtok(source_filter, sep), i = 0;
411 +                    p && i < MAX_FILTER_ARGS;
412 +                    p = strtok(0, sep))
413 +                       filter_argv[i++] = p;
414 +               filter_argv[i] = NULL;
415 +               if (p) {
416 +                       rprintf(FERROR,
417 +                               "Too many arguments to source-filter (> %d)\n",
418 +                               MAX_FILTER_ARGS);
419 +                       exit_cleanup(RERR_SYNTAX);
420 +               }
421 +       }
422  
423         if (DEBUG_GTE(SEND, 1))
424                 rprintf(FINFO, "send_files starting\n");
425 @@ -299,6 +320,7 @@ void send_files(int f_in, int f_out)
426                         exit_cleanup(RERR_PROTOCOL);
427                 }
428  
429 +               unlink_tmp = 0;
430                 fd = do_open(fname, O_RDONLY, 0);
431                 if (fd == -1) {
432                         if (errno == ENOENT) {
433 @@ -320,6 +342,33 @@ void send_files(int f_in, int f_out)
434                         continue;
435                 }
436  
437 +               if (source_filter) {
438 +                       int fd2;
439 +                       char *tmpl = "/tmp/rsync-filtered_sourceXXXXXX";
440 +
441 +                       tmp = strdup(tmpl);
442 +                       fd2 = mkstemp(tmp);
443 +                       if (fd2 == -1) {
444 +                               rprintf(FERROR, "mkstemp %s failed: %s\n",
445 +                                       tmp, strerror(errno));
446 +                       } else {
447 +                               int status;
448 +                               pid_t pid = run_filter_on_file(filter_argv, fd2, fd);
449 +                               close(fd);
450 +                               close(fd2);
451 +                               wait_process_with_flush(pid, &status);
452 +                               if (status != 0) {
453 +                                       rprintf(FERROR,
454 +                                           "bypassing source filter %s; exited with code: %d\n",
455 +                                           source_filter, status);
456 +                                       fd = do_open(fname, O_RDONLY, 0);
457 +                               } else {
458 +                                       fd = do_open(tmp, O_RDONLY, 0);
459 +                                       unlink_tmp = 1;
460 +                               }
461 +                       }
462 +               }
463 +
464                 /* map the local file */
465                 if (do_fstat(fd, &st) != 0) {
466                         io_error |= IOERR_GENERAL;
467 @@ -370,6 +419,8 @@ void send_files(int f_in, int f_out)
468                         }
469                 }
470                 close(fd);
471 +               if (unlink_tmp)
472 +                       unlink(tmp);
473  
474                 free_sums(s);
475