Fixed failing hunks.
[rsync/rsync-patches.git] / source-filter_dest-filter.diff
1 CAUTION:  This patch compiles, but is otherwise totally untested!
2
3 This patch also implements --times-only.
4
5 Implementation details for the --source-filter and -dest-filter options:
6
7  - These options open a *HUGE* security hole in daemon mode unless they
8    are refused in your rsyncd.conf!
9
10  - Filtering disables rsync alogrithm. (This should be fixed.)
11
12  - Source filter makes temporary files in /tmp. (Should be overridable.)
13
14  - If source filter fails, data is send unfiltered. (Should be changed
15    to abort.)
16
17  - Failure of destination filter, causes data loss!!! (Should be changed
18    to abort.)
19
20  - If filter changes size of file, you should use --times-only option to
21    prevent repeated transfers of unchanged files.
22
23  - If the COMMAND contains single quotes, option-passing breaks.  (Needs
24    to be fixed.)
25
26 You should run "make proto" before running "make".
27
28 --- orig/generator.c    2005-08-17 06:45:07
29 +++ generator.c 2005-08-17 07:28:01
30 @@ -58,6 +58,7 @@ extern int append_mode;
31  extern int make_backups;
32  extern int csum_length;
33  extern int ignore_times;
34 +extern int times_only;
35  extern int size_only;
36  extern OFF_T max_size;
37  extern int io_error;
38 @@ -360,7 +361,7 @@ void itemize(struct file_struct *file, i
39  /* Perform our quick-check heuristic for determining if a file is unchanged. */
40  static int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st)
41  {
42 -       if (st->st_size != file->length)
43 +       if (!times_only && st->st_size != file->length)
44                 return 0;
45  
46         /* if always checksum is set then we use the checksum instead
47 --- orig/options.c      2005-08-27 21:11:26
48 +++ options.c   2005-08-27 21:27:17
49 @@ -90,6 +90,7 @@ int keep_partial = 0;
50  int safe_symlinks = 0;
51  int copy_unsafe_links = 0;
52  int size_only = 0;
53 +int times_only = 0;
54  int daemon_bwlimit = 0;
55  int bwlimit = 0;
56  int fuzzy_basis = 0;
57 @@ -138,6 +139,8 @@ char *basis_dir[MAX_BASIS_DIRS+1];
58  char *config_file = NULL;
59  char *shell_cmd = NULL;
60  char *log_format = NULL;
61 +char *source_filter = NULL;
62 +char *dest_filter = NULL;
63  char *password_file = NULL;
64  char *rsync_path = RSYNC_PATH;
65  char *backup_dir = NULL;
66 @@ -316,6 +319,7 @@ void usage(enum logcode F)
67    rprintf(F,"     --timeout=TIME          set I/O timeout in seconds\n");
68    rprintf(F," -I, --ignore-times          don't skip files that match in size and mod-time\n");
69    rprintf(F,"     --size-only             skip files that match in size\n");
70 +  rprintf(F,"     --times-only            skip files that match in mod-time\n");
71    rprintf(F,"     --modify-window=NUM     compare mod-times with reduced accuracy\n");
72    rprintf(F," -T, --temp-dir=DIR          create temporary files in directory DIR\n");
73    rprintf(F," -y, --fuzzy                 find similar file for basis if no dest file\n");
74 @@ -347,6 +351,8 @@ void usage(enum logcode F)
75    rprintf(F,"     --write-batch=FILE      write a batched update to FILE\n");
76    rprintf(F,"     --only-write-batch=FILE like --write-batch but w/o updating destination\n");
77    rprintf(F,"     --read-batch=FILE       read a batched update from FILE\n");
78 +  rprintf(F,"     --source-filter=COMMAND filter file through COMMAND at source\n");
79 +  rprintf(F,"     --dest-filter=COMMAND   filter file through COMMAND at destination\n");
80    rprintf(F,"     --protocol=NUM          force an older protocol version to be used\n");
81  #ifdef INET6
82    rprintf(F," -4, --ipv4                  prefer IPv4\n");
83 @@ -413,6 +419,7 @@ static struct poptOption long_options[] 
84    {"no-implied-dirs",  0,  POPT_ARG_VAL,    &implied_dirs, 0, 0, 0 },
85    {"ignore-times",    'I', POPT_ARG_NONE,   &ignore_times, 0, 0, 0 },
86    {"size-only",        0,  POPT_ARG_NONE,   &size_only, 0, 0, 0 },
87 +  {"times-only",       0,  POPT_ARG_NONE,   &times_only , 0, 0, 0 },
88    {"one-file-system", 'x', POPT_ARG_NONE,   &one_file_system, 0, 0, 0 },
89    {"update",          'u', POPT_ARG_NONE,   &update_only, 0, 0, 0 },
90    {"existing",         0,  POPT_ARG_NONE,   &only_existing, 0, 0, 0 },
91 @@ -481,6 +488,8 @@ static struct poptOption long_options[] 
92    {"password-file",    0,  POPT_ARG_STRING, &password_file, 0, 0, 0 },
93    {"blocking-io",      0,  POPT_ARG_VAL,    &blocking_io, 1, 0, 0 },
94    {"no-blocking-io",   0,  POPT_ARG_VAL,    &blocking_io, 0, 0, 0 },
95 +  {"source-filter",    0,  POPT_ARG_STRING, &source_filter, 0, 0, 0 },
96 +  {"dest-filter",      0,  POPT_ARG_STRING, &dest_filter, 0, 0, 0 },
97    {"protocol",         0,  POPT_ARG_INT,    &protocol_version, 0, 0, 0 },
98    {"checksum-seed",    0,  POPT_ARG_INT,    &checksum_seed, 0, 0, 0 },
99    {"server",           0,  POPT_ARG_NONE,   &am_server, 0, 0, 0 },
100 @@ -1219,6 +1228,16 @@ int parse_arguments(int *argc, const cha
101                 }
102         }
103  
104 +       if (source_filter || dest_filter) {
105 +               if (whole_file == 0) {
106 +                       snprintf(err_buf, sizeof err_buf,
107 +                                "--no-whole-file cannot be used with --%s-filter\n",
108 +                                source_filter ? "source" : "dest");
109 +                       return 0;
110 +               }
111 +               whole_file = 1;
112 +       }
113 +
114         if (files_from) {
115                 char *h, *p;
116                 int q;
117 @@ -1442,6 +1461,25 @@ void server_options(char **args,int *arg
118                         args[ac++] = "--only-write-batch=X";
119         }
120  
121 +       if (times_only && am_sender)
122 +               args[ac++] = "--times-only";
123 +
124 +       if (source_filter && !am_sender) {
125 +               /* Need to single quote the arg to keep the remote shell
126 +                * from splitting it.  FIXME: breaks if command has single quotes. */
127 +               if (asprintf(&arg, "--source-filter='%s'", source_filter) < 0)
128 +                       goto oom;
129 +               args[ac++] = arg;
130 +       }
131 +
132 +       if (dest_filter && am_sender) {
133 +               /* Need to single quote the arg to keep the remote shell
134 +                * from splitting it.  FIXME: breaks if command has single quotes. */
135 +               if (asprintf(&arg, "--dest-filter='%s'", dest_filter) < 0)
136 +                       goto oom;
137 +               args[ac++] = arg;
138 +       }
139 +
140         if (size_only)
141                 args[ac++] = "--size-only";
142  
143 --- orig/pipe.c 2005-04-09 18:00:29
144 +++ pipe.c      2005-08-17 07:24:44
145 @@ -158,3 +158,79 @@ pid_t local_child(int argc, char **argv,
146  
147         return pid;
148  }
149 +
150 +pid_t run_filter(char *command[], int out, int *pipe_to_filter)
151 +{
152 +       pid_t pid;
153 +       int pipefds[2];
154 +       
155 +       if (verbose >= 2)
156 +               print_child_argv(command);
157 +
158 +       if (pipe(pipefds) < 0) {
159 +               rsyserr(FERROR, errno, "pipe");
160 +               exit_cleanup(RERR_IPC);
161 +       }
162 +
163 +       pid = do_fork();
164 +       if (pid == -1) {
165 +               rsyserr(FERROR, errno, "fork");
166 +               exit_cleanup(RERR_IPC);
167 +       }
168 +
169 +       if (pid == 0) {
170 +               if (dup2(pipefds[0], STDIN_FILENO) < 0
171 +                || close(pipefds[1]) < 0
172 +                || dup2(out, STDOUT_FILENO) < 0) {
173 +                       rsyserr(FERROR, errno, "Failed dup/close");
174 +                       exit_cleanup(RERR_IPC);
175 +               }
176 +               umask(orig_umask);
177 +               set_blocking(STDIN_FILENO);
178 +               if (blocking_io)
179 +                       set_blocking(STDOUT_FILENO);
180 +               execvp(command[0], command);
181 +               rsyserr(FERROR, errno, "Failed to exec %s",
182 +                       safe_fname(command[0]));
183 +               exit_cleanup(RERR_IPC);
184 +       }
185 +
186 +       if (close(pipefds[0]) < 0) {
187 +               rsyserr(FERROR, errno, "Failed to close");
188 +               exit_cleanup(RERR_IPC);
189 +       }
190 +
191 +       *pipe_to_filter = pipefds[1];
192 +
193 +       return pid;
194 +}
195 +
196 +pid_t run_filter_on_file(char *command[], int out, int in)
197 +{
198 +       pid_t pid;
199 +       
200 +       if (verbose >= 2)
201 +               print_child_argv(command);
202 +
203 +       pid = do_fork();
204 +       if (pid == -1) {
205 +               rsyserr(FERROR, errno, "fork");
206 +               exit_cleanup(RERR_IPC);
207 +       }
208 +
209 +       if (pid == 0) {
210 +               if (dup2(in, STDIN_FILENO) < 0
211 +                || dup2(out, STDOUT_FILENO) < 0) {
212 +                       rsyserr(FERROR, errno, "Failed to dup2");
213 +                       exit_cleanup(RERR_IPC);
214 +               }
215 +               if (blocking_io)
216 +                       set_blocking(STDOUT_FILENO);
217 +               execvp(command[0], command);
218 +               rsyserr(FERROR, errno, "Failed to exec %s",
219 +                       safe_fname(command[0]));
220 +               exit_cleanup(RERR_IPC);
221 +       }
222 +
223 +       return pid;
224 +}
225 --- orig/receiver.c     2005-08-17 06:45:08
226 +++ receiver.c  2005-08-17 07:57:33
227 @@ -52,6 +52,7 @@ extern int inplace;
228  extern int delay_updates;
229  extern struct stats stats;
230  extern char *log_format;
231 +extern char *dest_filter;
232  extern char *tmpdir;
233  extern char *partial_dir;
234  extern char *basis_dir[];
235 @@ -412,6 +413,8 @@ int recv_files(int f_in, struct file_lis
236                       : !am_server && log_format_has_i;
237         int max_phase = protocol_version >= 29 ? 2 : 1;
238         int i, recv_ok;
239 +       pid_t pid = 0;
240 +       char *filter_argv[MAX_FILTER_ARGS + 1];
241  
242         if (verbose > 2)
243                 rprintf(FINFO,"recv_files(%d) starting\n",flist->count);
244 @@ -424,6 +427,23 @@ int recv_files(int f_in, struct file_lis
245         if (delay_updates)
246                 init_delayed_bits(flist->count);
247  
248 +       if (dest_filter) {
249 +               char *p;
250 +               char *sep = " \t";
251 +               int i;
252 +               for (p = strtok(dest_filter, sep), i = 0;
253 +                    p && i < MAX_FILTER_ARGS;
254 +                    p = strtok(0, sep))
255 +                       filter_argv[i++] = p;
256 +               filter_argv[i] = NULL;
257 +               if (p) {
258 +                       rprintf(FERROR,
259 +                               "Too many arguments to dest-filter (> %d)\n",
260 +                               MAX_FILTER_ARGS);
261 +                       exit_cleanup(RERR_SYNTAX);
262 +               }
263 +       }
264 +
265         while (1) {
266                 cleanup_disable();
267  
268 @@ -662,6 +682,9 @@ int recv_files(int f_in, struct file_lis
269                 else if (!am_server && verbose && do_progress)
270                         rprintf(FINFO, "%s\n", safe_fname(fname));
271  
272 +               if (dest_filter)
273 +                       pid = run_filter(filter_argv, fd2, &fd2);
274 +
275                 /* recv file data */
276                 recv_ok = receive_data(f_in, fnamecmp, fd1, st.st_size,
277                                        fname, fd2, file->length);
278 @@ -677,6 +700,16 @@ int recv_files(int f_in, struct file_lis
279                         exit_cleanup(RERR_FILEIO);
280                 }
281  
282 +               if (dest_filter) {
283 +                       int status;
284 +                       wait_process(pid, &status);
285 +                       if (status != 0) {
286 +                               rprintf(FERROR, "filter %s exited code: %d\n",
287 +                                       dest_filter, status);
288 +                               continue;
289 +                       }
290 +               }
291 +
292                 if ((recv_ok && (!delay_updates || !partialptr)) || inplace) {
293                         finish_transfer(fname, fnametmp, file, recv_ok, 1);
294                         if (partialptr != fname && fnamecmp == partialptr) {
295 --- orig/rsync.h        2005-08-17 06:45:08
296 +++ rsync.h     2005-08-17 07:10:11
297 @@ -103,6 +103,7 @@
298  #define IOERR_DEL_LIMIT (1<<2)
299  
300  #define MAX_ARGS 1000
301 +#define MAX_FILTER_ARGS 100
302  #define MAX_BASIS_DIRS 20
303  #define MAX_SERVER_ARGS (MAX_BASIS_DIRS*2 + 100)
304  
305 --- orig/rsync.yo       2005-08-27 21:05:12
306 +++ rsync.yo    2005-08-17 07:08:21
307 @@ -350,6 +350,7 @@ to the detailed description below for a 
308       --timeout=TIME          set I/O timeout in seconds
309   -I, --ignore-times          don't skip files that match size and time
310       --size-only             skip files that match in size
311 +     --times-only            skip files that match in mod-time
312       --modify-window=NUM     compare mod-times with reduced accuracy
313   -T, --temp-dir=DIR          create temporary files in directory DIR
314   -y, --fuzzy                 find similar file for basis if no dest file
315 @@ -381,6 +382,8 @@ to the detailed description below for a 
316       --write-batch=FILE      write a batched update to FILE
317       --only-write-batch=FILE like --write-batch but w/o updating dest
318       --read-batch=FILE       read a batched update from FILE
319 +     --source-filter=COMMAND filter file through COMMAND at source
320 +     --dest-filter=COMMAND   filter file through COMMAND at destination
321       --protocol=NUM          force an older protocol version to be used
322       --checksum-seed=NUM     set block/file checksum seed (advanced)
323   -4, --ipv4                  prefer IPv4
324 @@ -1346,6 +1349,33 @@ file previously generated by bf(--write-
325  If em(FILE) is "-" the batch data will be read from standard input.
326  See the "BATCH MODE" section for details.
327  
328 +dit(bf(--source-filter=COMMAND)) This option allows the user to specify a
329 +filter program that will be applied to the contents of all transferred
330 +regular files before the data is sent to destination.  COMMAND will receive
331 +the data on its standard input and it should write the filtered data to
332 +standard output.  COMMAND should exit non-zero if it cannot process the
333 +data or if it encounters an error when writing the data to stdout.
334 +
335 +Example: --source-filter="gzip -9" will cause remote files to be
336 +compressed.
337 +Use of --source-filter automatically enables --whole-file.
338 +If your filter does not output the same number of bytes that it received on
339 +input, you should use --times-only to disable size and content checks on
340 +subsequent rsync runs.
341 +
342 +dit(bf(--dest-filter=COMMAND)) This option allows you to specify a filter
343 +program that will be applied to the contents of all transferred regular
344 +files before the data is written to disk.  COMMAND will receive the data on
345 +its standard input and it should write the filtered data to standard
346 +output.  COMMAND should exit non-zero if it cannot process the data or if
347 +it encounters an error when writing the data to stdout.
348 +
349 +Example: --dest-filter="gzip -9" will cause remote files to be compressed.
350 +Use of --dest-filter automatically enables --whole-file.
351 +If your filter does not output the same number of bytes that it
352 +received on input, you should use --times-only to disable size and
353 +content checks on subsequent rsync runs.
354 +
355  dit(bf(--protocol=NUM)) Force an older protocol version to be used.  This
356  is useful for creating a batch file that is compatible with an older
357  version of rsync.  For instance, if rsync 2.6.4 is being used with the
358 --- orig/sender.c       2005-07-28 01:46:25
359 +++ sender.c    2005-08-17 07:40:49
360 @@ -41,6 +41,7 @@ extern int write_batch;
361  extern struct stats stats;
362  extern struct file_list *the_file_list;
363  extern char *log_format;
364 +extern char *source_filter;
365  
366  
367  /**
368 @@ -221,6 +222,26 @@ void send_files(struct file_list *flist,
369                       : !am_server && log_format_has_i;
370         int f_xfer = write_batch < 0 ? batch_fd : f_out;
371         int i, j;
372 +       char *filter_argv[MAX_FILTER_ARGS + 1];
373 +       char *tmp = 0;
374 +       int unlink_tmp = 0;
375 +
376 +       if (source_filter) {
377 +               char *p;
378 +               char *sep = " \t";
379 +               int i;
380 +               for (p = strtok(source_filter, sep), i = 0;
381 +                    p && i < MAX_FILTER_ARGS;
382 +                    p = strtok(0, sep))
383 +                       filter_argv[i++] = p;
384 +               filter_argv[i] = NULL;
385 +               if (p) {
386 +                       rprintf(FERROR,
387 +                               "Too many arguments to source-filter (> %d)\n",
388 +                               MAX_FILTER_ARGS);
389 +                       exit_cleanup(RERR_SYNTAX);
390 +               }
391 +       }
392  
393         if (verbose > 2)
394                 rprintf(FINFO, "send_files starting\n");
395 @@ -295,6 +316,7 @@ void send_files(struct file_list *flist,
396                         return;
397                 }
398  
399 +               unlink_tmp = 0;
400                 fd = do_open(fname, O_RDONLY, 0);
401                 if (fd == -1) {
402                         if (errno == ENOENT) {
403 @@ -323,6 +345,33 @@ void send_files(struct file_list *flist,
404                         return;
405                 }
406  
407 +               if (source_filter) {
408 +                       int fd2;
409 +                       char *tmpl = "/tmp/rsync-filtered_sourceXXXXXX";
410 +
411 +                       tmp = strdup(tmpl);
412 +                       fd2 = mkstemp(tmp);
413 +                       if (fd2 == -1) {
414 +                               rprintf(FERROR, "mkstemp %s failed: %s\n",
415 +                                       tmp, strerror(errno));
416 +                       } else {
417 +                               int status;
418 +                               pid_t pid = run_filter_on_file(filter_argv, fd2, fd);
419 +                               close(fd);
420 +                               close(fd2);
421 +                               wait_process(pid, &status);
422 +                               if (status != 0) {
423 +                                       rprintf(FERROR,
424 +                                           "bypassing source filter %s; exited with code: %d\n",
425 +                                           source_filter, status);
426 +                                       fd = do_open(fname, O_RDONLY, 0);
427 +                               } else {
428 +                                       fd = do_open(tmp, O_RDONLY, 0);
429 +                                       unlink_tmp = 1;
430 +                               }
431 +                       }
432 +               }
433 +
434                 if (st.st_size) {
435                         int32 read_size = MAX(s->blength * 3, MAX_MAP_SIZE);
436                         mbuf = map_file(fd, st.st_size, read_size, s->blength);
437 @@ -367,6 +416,8 @@ void send_files(struct file_list *flist,
438                         }
439                 }
440                 close(fd);
441 +               if (unlink_tmp)
442 +                       unlink(tmp);
443  
444                 free_sums(s);
445