Updated for rolled-back trunk.
[rsync/rsync-patches.git] / source-filter_dest-filter.diff
1 CAUTION:  This patch compiles, but is otherwise totally untested!
2
3 This patch also implements --times-only.
4
5 Implementation details for the --source-filter and -dest-filter options:
6
7  - These options open a *HUGE* security hole in daemon mode unless they
8    are refused in your rsyncd.conf!
9
10  - Filtering disables rsync alogrithm. (This should be fixed.)
11
12  - Source filter makes temporary files in /tmp. (Should be overridable.)
13
14  - If source filter fails, data is send unfiltered. (Should be changed
15    to abort.)
16
17  - Failure of destination filter, causes data loss!!! (Should be changed
18    to abort.)
19
20  - If filter changes size of file, you should use --times-only option to
21    prevent repeated transfers of unchanged files.
22
23  - If the COMMAND contains single quotes, option-passing breaks.  (Needs
24    to be fixed.)
25
26 After applying this patch, run these commands for a successful build:
27
28     ./prepare-source
29     ./configure                      (optional if already run)
30     make
31
32 --- old/generator.c
33 +++ new/generator.c
34 @@ -60,6 +60,7 @@ extern int append_mode;
35  extern int make_backups;
36  extern int csum_length;
37  extern int ignore_times;
38 +extern int times_only;
39  extern int size_only;
40  extern OFF_T max_size;
41  extern OFF_T min_size;
42 @@ -378,7 +379,7 @@ void itemize(struct file_struct *file, i
43  /* Perform our quick-check heuristic for determining if a file is unchanged. */
44  int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st)
45  {
46 -       if (st->st_size != file->length)
47 +       if (!times_only && st->st_size != file->length)
48                 return 0;
49  
50         /* if always checksum is set then we use the checksum instead
51 --- old/main.c
52 +++ new/main.c
53 @@ -122,7 +122,7 @@ pid_t wait_process(pid_t pid, int *statu
54  }
55  
56  /* Wait for a process to exit, calling io_flush while waiting. */
57 -static void wait_process_with_flush(pid_t pid, int *exit_code_ptr)
58 +void wait_process_with_flush(pid_t pid, int *exit_code_ptr)
59  {
60         pid_t waited_pid;
61         int status;
62 --- old/options.c
63 +++ new/options.c
64 @@ -99,6 +99,7 @@ int keep_partial = 0;
65  int safe_symlinks = 0;
66  int copy_unsafe_links = 0;
67  int size_only = 0;
68 +int times_only = 0;
69  int daemon_bwlimit = 0;
70  int bwlimit = 0;
71  int fuzzy_basis = 0;
72 @@ -151,6 +152,8 @@ char *logfile_name = NULL;
73  char *logfile_format = NULL;
74  char *stdout_format = NULL;
75  char *password_file = NULL;
76 +char *source_filter = NULL;
77 +char *dest_filter = NULL;
78  char *rsync_path = RSYNC_PATH;
79  char *backup_dir = NULL;
80  char backup_dir_buf[MAXPATHLEN];
81 @@ -341,6 +344,7 @@ void usage(enum logcode F)
82    rprintf(F,"     --timeout=TIME          set I/O timeout in seconds\n");
83    rprintf(F," -I, --ignore-times          don't skip files that match in size and mod-time\n");
84    rprintf(F,"     --size-only             skip files that match in size\n");
85 +  rprintf(F,"     --times-only            skip files that match in mod-time\n");
86    rprintf(F,"     --modify-window=NUM     compare mod-times with reduced accuracy\n");
87    rprintf(F," -T, --temp-dir=DIR          create temporary files in directory DIR\n");
88    rprintf(F," -y, --fuzzy                 find similar file for basis if no dest file\n");
89 @@ -378,6 +382,8 @@ void usage(enum logcode F)
90    rprintf(F,"     --write-batch=FILE      write a batched update to FILE\n");
91    rprintf(F,"     --only-write-batch=FILE like --write-batch but w/o updating destination\n");
92    rprintf(F,"     --read-batch=FILE       read a batched update from FILE\n");
93 +  rprintf(F,"     --source-filter=COMMAND filter file through COMMAND at source\n");
94 +  rprintf(F,"     --dest-filter=COMMAND   filter file through COMMAND at destination\n");
95    rprintf(F,"     --protocol=NUM          force an older protocol version to be used\n");
96  #ifdef INET6
97    rprintf(F," -4, --ipv4                  prefer IPv4\n");
98 @@ -459,6 +465,7 @@ static struct poptOption long_options[] 
99    {"chmod",            0,  POPT_ARG_STRING, 0, OPT_CHMOD, 0, 0 },
100    {"ignore-times",    'I', POPT_ARG_NONE,   &ignore_times, 0, 0, 0 },
101    {"size-only",        0,  POPT_ARG_NONE,   &size_only, 0, 0, 0 },
102 +  {"times-only",       0,  POPT_ARG_NONE,   &times_only , 0, 0, 0 },
103    {"one-file-system", 'x', POPT_ARG_NONE,   0, 'x', 0, 0 },
104    {"update",          'u', POPT_ARG_NONE,   &update_only, 0, 0, 0 },
105    {"existing",         0,  POPT_ARG_NONE,   &ignore_non_existing, 0, 0, 0 },
106 @@ -537,6 +544,8 @@ static struct poptOption long_options[] 
107    {"password-file",    0,  POPT_ARG_STRING, &password_file, 0, 0, 0 },
108    {"blocking-io",      0,  POPT_ARG_VAL,    &blocking_io, 1, 0, 0 },
109    {"no-blocking-io",   0,  POPT_ARG_VAL,    &blocking_io, 0, 0, 0 },
110 +  {"source-filter",    0,  POPT_ARG_STRING, &source_filter, 0, 0, 0 },
111 +  {"dest-filter",      0,  POPT_ARG_STRING, &dest_filter, 0, 0, 0 },
112    {"protocol",         0,  POPT_ARG_INT,    &protocol_version, 0, 0, 0 },
113    {"checksum-seed",    0,  POPT_ARG_INT,    &checksum_seed, 0, 0, 0 },
114    {"server",           0,  POPT_ARG_NONE,   0, OPT_SERVER, 0, 0 },
115 @@ -1403,6 +1412,16 @@ int parse_arguments(int *argc, const cha
116                 }
117         }
118  
119 +       if (source_filter || dest_filter) {
120 +               if (whole_file == 0) {
121 +                       snprintf(err_buf, sizeof err_buf,
122 +                                "--no-whole-file cannot be used with --%s-filter\n",
123 +                                source_filter ? "source" : "dest");
124 +                       return 0;
125 +               }
126 +               whole_file = 1;
127 +       }
128 +
129         if (files_from) {
130                 char *h, *p;
131                 int q;
132 @@ -1669,6 +1688,25 @@ void server_options(char **args,int *arg
133                         args[ac++] = "--size-only";
134         }
135  
136 +       if (times_only && am_sender)
137 +               args[ac++] = "--times-only";
138 +
139 +       if (source_filter && !am_sender) {
140 +               /* Need to single quote the arg to keep the remote shell
141 +                * from splitting it.  FIXME: breaks if command has single quotes. */
142 +               if (asprintf(&arg, "--source-filter='%s'", source_filter) < 0)
143 +                       goto oom;
144 +               args[ac++] = arg;
145 +       }
146 +
147 +       if (dest_filter && am_sender) {
148 +               /* Need to single quote the arg to keep the remote shell
149 +                * from splitting it.  FIXME: breaks if command has single quotes. */
150 +               if (asprintf(&arg, "--dest-filter='%s'", dest_filter) < 0)
151 +                       goto oom;
152 +               args[ac++] = arg;
153 +       }
154 +
155         if (modify_window_set) {
156                 if (asprintf(&arg, "--modify-window=%d", modify_window) < 0)
157                         goto oom;
158 --- old/pipe.c
159 +++ new/pipe.c
160 @@ -166,3 +166,77 @@ pid_t local_child(int argc, char **argv,
161  
162         return pid;
163  }
164 +
165 +pid_t run_filter(char *command[], int out, int *pipe_to_filter)
166 +{
167 +       pid_t pid;
168 +       int pipefds[2];
169 +       
170 +       if (verbose >= 2)
171 +               print_child_argv(command);
172 +
173 +       if (pipe(pipefds) < 0) {
174 +               rsyserr(FERROR, errno, "pipe");
175 +               exit_cleanup(RERR_IPC);
176 +       }
177 +
178 +       pid = do_fork();
179 +       if (pid == -1) {
180 +               rsyserr(FERROR, errno, "fork");
181 +               exit_cleanup(RERR_IPC);
182 +       }
183 +
184 +       if (pid == 0) {
185 +               if (dup2(pipefds[0], STDIN_FILENO) < 0
186 +                || close(pipefds[1]) < 0
187 +                || dup2(out, STDOUT_FILENO) < 0) {
188 +                       rsyserr(FERROR, errno, "Failed dup/close");
189 +                       exit_cleanup(RERR_IPC);
190 +               }
191 +               umask(orig_umask);
192 +               set_blocking(STDIN_FILENO);
193 +               if (blocking_io)
194 +                       set_blocking(STDOUT_FILENO);
195 +               execvp(command[0], command);
196 +               rsyserr(FERROR, errno, "Failed to exec %s", command[0]);
197 +               exit_cleanup(RERR_IPC);
198 +       }
199 +
200 +       if (close(pipefds[0]) < 0) {
201 +               rsyserr(FERROR, errno, "Failed to close");
202 +               exit_cleanup(RERR_IPC);
203 +       }
204 +
205 +       *pipe_to_filter = pipefds[1];
206 +
207 +       return pid;
208 +}
209 +
210 +pid_t run_filter_on_file(char *command[], int out, int in)
211 +{
212 +       pid_t pid;
213 +       
214 +       if (verbose >= 2)
215 +               print_child_argv(command);
216 +
217 +       pid = do_fork();
218 +       if (pid == -1) {
219 +               rsyserr(FERROR, errno, "fork");
220 +               exit_cleanup(RERR_IPC);
221 +       }
222 +
223 +       if (pid == 0) {
224 +               if (dup2(in, STDIN_FILENO) < 0
225 +                || dup2(out, STDOUT_FILENO) < 0) {
226 +                       rsyserr(FERROR, errno, "Failed to dup2");
227 +                       exit_cleanup(RERR_IPC);
228 +               }
229 +               if (blocking_io)
230 +                       set_blocking(STDOUT_FILENO);
231 +               execvp(command[0], command);
232 +               rsyserr(FERROR, errno, "Failed to exec %s", command[0]);
233 +               exit_cleanup(RERR_IPC);
234 +       }
235 +
236 +       return pid;
237 +}
238 --- old/receiver.c
239 +++ new/receiver.c
240 @@ -48,6 +48,7 @@ extern int checksum_seed;
241  extern int inplace;
242  extern int delay_updates;
243  extern struct stats stats;
244 +extern char *dest_filter;
245  extern char *stdout_format;
246  extern char *tmpdir;
247  extern char *partial_dir;
248 @@ -350,6 +351,8 @@ int recv_files(int f_in, struct file_lis
249         enum logcode log_code = log_before_transfer ? FLOG : FINFO;
250         int max_phase = protocol_version >= 29 ? 2 : 1;
251         int i, recv_ok;
252 +       pid_t pid = 0;
253 +       char *filter_argv[MAX_FILTER_ARGS + 1];
254  
255         if (verbose > 2)
256                 rprintf(FINFO,"recv_files(%d) starting\n",flist->count);
257 @@ -364,6 +367,23 @@ int recv_files(int f_in, struct file_lis
258  
259         updating_basis = inplace;
260  
261 +       if (dest_filter) {
262 +               char *p;
263 +               char *sep = " \t";
264 +               int i;
265 +               for (p = strtok(dest_filter, sep), i = 0;
266 +                    p && i < MAX_FILTER_ARGS;
267 +                    p = strtok(0, sep))
268 +                       filter_argv[i++] = p;
269 +               filter_argv[i] = NULL;
270 +               if (p) {
271 +                       rprintf(FERROR,
272 +                               "Too many arguments to dest-filter (> %d)\n",
273 +                               MAX_FILTER_ARGS);
274 +                       exit_cleanup(RERR_SYNTAX);
275 +               }
276 +       }
277 +
278         while (1) {
279                 cleanup_disable();
280  
281 @@ -606,6 +626,9 @@ int recv_files(int f_in, struct file_lis
282                 else if (!am_server && verbose && do_progress)
283                         rprintf(FINFO, "%s\n", fname);
284  
285 +               if (dest_filter)
286 +                       pid = run_filter(filter_argv, fd2, &fd2);
287 +
288                 /* recv file data */
289                 recv_ok = receive_data(f_in, fnamecmp, fd1, st.st_size,
290                                        fname, fd2, file->length);
291 @@ -620,6 +643,16 @@ int recv_files(int f_in, struct file_lis
292                         exit_cleanup(RERR_FILEIO);
293                 }
294  
295 +               if (dest_filter) {
296 +                       int status;
297 +                       wait_process_with_flush(pid, &status);
298 +                       if (status != 0) {
299 +                               rprintf(FERROR, "filter %s exited code: %d\n",
300 +                                       dest_filter, status);
301 +                               continue;
302 +                       }
303 +               }
304 +
305                 if ((recv_ok && (!delay_updates || !partialptr)) || inplace) {
306                         char *temp_copy_name;
307                         if (partialptr == fname)
308 --- old/rsync.h
309 +++ new/rsync.h
310 @@ -104,6 +104,7 @@
311  #define IOERR_DEL_LIMIT (1<<2)
312  
313  #define MAX_ARGS 1000
314 +#define MAX_FILTER_ARGS 100
315  #define MAX_BASIS_DIRS 20
316  #define MAX_SERVER_ARGS (MAX_BASIS_DIRS*2 + 100)
317  
318 --- old/rsync.yo
319 +++ new/rsync.yo
320 @@ -359,6 +359,7 @@ to the detailed description below for a 
321       --timeout=TIME          set I/O timeout in seconds
322   -I, --ignore-times          don't skip files that match size and time
323       --size-only             skip files that match in size
324 +     --times-only            skip files that match in mod-time
325       --modify-window=NUM     compare mod-times with reduced accuracy
326   -T, --temp-dir=DIR          create temporary files in directory DIR
327   -y, --fuzzy                 find similar file for basis if no dest file
328 @@ -396,6 +397,8 @@ to the detailed description below for a 
329       --write-batch=FILE      write a batched update to FILE
330       --only-write-batch=FILE like --write-batch but w/o updating dest
331       --read-batch=FILE       read a batched update from FILE
332 +     --source-filter=COMMAND filter file through COMMAND at source
333 +     --dest-filter=COMMAND   filter file through COMMAND at destination
334       --protocol=NUM          force an older protocol version to be used
335       --checksum-seed=NUM     set block/file checksum seed (advanced)
336   -4, --ipv4                  prefer IPv4
337 @@ -1753,6 +1756,33 @@ file previously generated by bf(--write-
338  If em(FILE) is bf(-), the batch data will be read from standard input.
339  See the "BATCH MODE" section for details.
340  
341 +dit(bf(--source-filter=COMMAND)) This option allows the user to specify a
342 +filter program that will be applied to the contents of all transferred
343 +regular files before the data is sent to destination.  COMMAND will receive
344 +the data on its standard input and it should write the filtered data to
345 +standard output.  COMMAND should exit non-zero if it cannot process the
346 +data or if it encounters an error when writing the data to stdout.
347 +
348 +Example: --source-filter="gzip -9" will cause remote files to be
349 +compressed.
350 +Use of --source-filter automatically enables --whole-file.
351 +If your filter does not output the same number of bytes that it received on
352 +input, you should use --times-only to disable size and content checks on
353 +subsequent rsync runs.
354 +
355 +dit(bf(--dest-filter=COMMAND)) This option allows you to specify a filter
356 +program that will be applied to the contents of all transferred regular
357 +files before the data is written to disk.  COMMAND will receive the data on
358 +its standard input and it should write the filtered data to standard
359 +output.  COMMAND should exit non-zero if it cannot process the data or if
360 +it encounters an error when writing the data to stdout.
361 +
362 +Example: --dest-filter="gzip -9" will cause remote files to be compressed.
363 +Use of --dest-filter automatically enables --whole-file.
364 +If your filter does not output the same number of bytes that it
365 +received on input, you should use --times-only to disable size and
366 +content checks on subsequent rsync runs.
367 +
368  dit(bf(--protocol=NUM)) Force an older protocol version to be used.  This
369  is useful for creating a batch file that is compatible with an older
370  version of rsync.  For instance, if rsync 2.6.4 is being used with the
371 --- old/sender.c
372 +++ new/sender.c
373 @@ -43,6 +43,7 @@ extern int batch_fd;
374  extern int write_batch;
375  extern struct stats stats;
376  extern struct file_list *the_file_list;
377 +extern char *source_filter;
378  extern char *stdout_format;
379  
380  
381 @@ -224,6 +225,26 @@ void send_files(struct file_list *flist,
382         enum logcode log_code = log_before_transfer ? FLOG : FINFO;
383         int f_xfer = write_batch < 0 ? batch_fd : f_out;
384         int i, j;
385 +       char *filter_argv[MAX_FILTER_ARGS + 1];
386 +       char *tmp = 0;
387 +       int unlink_tmp = 0;
388 +
389 +       if (source_filter) {
390 +               char *p;
391 +               char *sep = " \t";
392 +               int i;
393 +               for (p = strtok(source_filter, sep), i = 0;
394 +                    p && i < MAX_FILTER_ARGS;
395 +                    p = strtok(0, sep))
396 +                       filter_argv[i++] = p;
397 +               filter_argv[i] = NULL;
398 +               if (p) {
399 +                       rprintf(FERROR,
400 +                               "Too many arguments to source-filter (> %d)\n",
401 +                               MAX_FILTER_ARGS);
402 +                       exit_cleanup(RERR_SYNTAX);
403 +               }
404 +       }
405  
406         if (verbose > 2)
407                 rprintf(FINFO, "send_files starting\n");
408 @@ -297,6 +318,7 @@ void send_files(struct file_list *flist,
409                         return;
410                 }
411  
412 +               unlink_tmp = 0;
413                 fd = do_open(fname, O_RDONLY, 0);
414                 if (fd == -1) {
415                         if (errno == ENOENT) {
416 @@ -325,6 +347,33 @@ void send_files(struct file_list *flist,
417                         return;
418                 }
419  
420 +               if (source_filter) {
421 +                       int fd2;
422 +                       char *tmpl = "/tmp/rsync-filtered_sourceXXXXXX";
423 +
424 +                       tmp = strdup(tmpl);
425 +                       fd2 = mkstemp(tmp);
426 +                       if (fd2 == -1) {
427 +                               rprintf(FERROR, "mkstemp %s failed: %s\n",
428 +                                       tmp, strerror(errno));
429 +                       } else {
430 +                               int status;
431 +                               pid_t pid = run_filter_on_file(filter_argv, fd2, fd);
432 +                               close(fd);
433 +                               close(fd2);
434 +                               wait_process_with_flush(pid, &status);
435 +                               if (status != 0) {
436 +                                       rprintf(FERROR,
437 +                                           "bypassing source filter %s; exited with code: %d\n",
438 +                                           source_filter, status);
439 +                                       fd = do_open(fname, O_RDONLY, 0);
440 +                               } else {
441 +                                       fd = do_open(tmp, O_RDONLY, 0);
442 +                                       unlink_tmp = 1;
443 +                               }
444 +                       }
445 +               }
446 +
447                 if (st.st_size) {
448                         int32 read_size = MAX(s->blength * 3, MAX_MAP_SIZE);
449                         mbuf = map_file(fd, st.st_size, read_size, s->blength);
450 @@ -366,6 +415,8 @@ void send_files(struct file_list *flist,
451                         }
452                 }
453                 close(fd);
454 +               if (unlink_tmp)
455 +                       unlink(tmp);
456  
457                 free_sums(s);
458