Made this actually work again.
[rsync/rsync-patches.git] / source-filter_dest-filter.diff
CommitLineData
33d38bc8
WD
1CAUTION: This patch compiles, but is otherwise totally untested!
2
3This patch also implements --times-only.
4
5Implementation details for the --source-filter and -dest-filter options:
6
7 - These options open a *HUGE* security hole in daemon mode unless they
8 are refused in your rsyncd.conf!
9
10 - Filtering disables rsync alogrithm. (This should be fixed.)
11
12 - Source filter makes temporary files in /tmp. (Should be overridable.)
13
14 - If source filter fails, data is send unfiltered. (Should be changed
15 to abort.)
16
17 - Failure of destination filter, causes data loss!!! (Should be changed
18 to abort.)
19
20 - If filter changes size of file, you should use --times-only option to
21 prevent repeated transfers of unchanged files.
22
23 - If the COMMAND contains single quotes, option-passing breaks. (Needs
24 to be fixed.)
25
26You should run "make proto" before running "make".
27
93ca4d27 28--- orig/generator.c 2006-01-14 08:14:30
33d38bc8 29+++ generator.c 2005-08-17 07:28:01
93ca4d27 30@@ -59,6 +59,7 @@ extern int append_mode;
33d38bc8
WD
31 extern int make_backups;
32 extern int csum_length;
33 extern int ignore_times;
34+extern int times_only;
35 extern int size_only;
36 extern OFF_T max_size;
93ca4d27
WD
37 extern OFF_T min_size;
38@@ -378,7 +379,7 @@ void itemize(struct file_struct *file, i
33d38bc8 39 /* Perform our quick-check heuristic for determining if a file is unchanged. */
93ca4d27 40 int unchanged_file(char *fn, struct file_struct *file, STRUCT_STAT *st)
33d38bc8
WD
41 {
42- if (st->st_size != file->length)
43+ if (!times_only && st->st_size != file->length)
44 return 0;
45
46 /* if always checksum is set then we use the checksum instead
93ca4d27 47--- orig/options.c 2006-01-14 08:14:30
489b0a72 48+++ options.c 2005-08-27 21:27:17
93ca4d27 49@@ -93,6 +93,7 @@ int keep_partial = 0;
33d38bc8
WD
50 int safe_symlinks = 0;
51 int copy_unsafe_links = 0;
52 int size_only = 0;
53+int times_only = 0;
54 int daemon_bwlimit = 0;
55 int bwlimit = 0;
56 int fuzzy_basis = 0;
93ca4d27 57@@ -142,6 +143,8 @@ char *basis_dir[MAX_BASIS_DIRS+1];
33d38bc8
WD
58 char *config_file = NULL;
59 char *shell_cmd = NULL;
60 char *log_format = NULL;
61+char *source_filter = NULL;
62+char *dest_filter = NULL;
63 char *password_file = NULL;
64 char *rsync_path = RSYNC_PATH;
65 char *backup_dir = NULL;
93ca4d27 66@@ -324,6 +327,7 @@ void usage(enum logcode F)
33d38bc8
WD
67 rprintf(F," --timeout=TIME set I/O timeout in seconds\n");
68 rprintf(F," -I, --ignore-times don't skip files that match in size and mod-time\n");
69 rprintf(F," --size-only skip files that match in size\n");
70+ rprintf(F," --times-only skip files that match in mod-time\n");
71 rprintf(F," --modify-window=NUM compare mod-times with reduced accuracy\n");
72 rprintf(F," -T, --temp-dir=DIR create temporary files in directory DIR\n");
73 rprintf(F," -y, --fuzzy find similar file for basis if no dest file\n");
93ca4d27 74@@ -358,6 +362,8 @@ void usage(enum logcode F)
33d38bc8
WD
75 rprintf(F," --write-batch=FILE write a batched update to FILE\n");
76 rprintf(F," --only-write-batch=FILE like --write-batch but w/o updating destination\n");
77 rprintf(F," --read-batch=FILE read a batched update from FILE\n");
78+ rprintf(F," --source-filter=COMMAND filter file through COMMAND at source\n");
79+ rprintf(F," --dest-filter=COMMAND filter file through COMMAND at destination\n");
80 rprintf(F," --protocol=NUM force an older protocol version to be used\n");
81 #ifdef INET6
82 rprintf(F," -4, --ipv4 prefer IPv4\n");
93ca4d27
WD
83@@ -428,6 +434,7 @@ static struct poptOption long_options[]
84 {"chmod", 0, POPT_ARG_STRING, &chmod_mode, 0, 0, 0 },
33d38bc8
WD
85 {"ignore-times", 'I', POPT_ARG_NONE, &ignore_times, 0, 0, 0 },
86 {"size-only", 0, POPT_ARG_NONE, &size_only, 0, 0, 0 },
87+ {"times-only", 0, POPT_ARG_NONE, &times_only , 0, 0, 0 },
33d38bc8 88 {"one-file-system", 'x', POPT_ARG_NONE, &one_file_system, 0, 0, 0 },
489b0a72 89 {"update", 'u', POPT_ARG_NONE, &update_only, 0, 0, 0 },
93ca4d27
WD
90 {"existing", 0, POPT_ARG_NONE, &ignore_non_existing, 0, 0, 0 },
91@@ -499,6 +506,8 @@ static struct poptOption long_options[]
489b0a72
WD
92 {"password-file", 0, POPT_ARG_STRING, &password_file, 0, 0, 0 },
93 {"blocking-io", 0, POPT_ARG_VAL, &blocking_io, 1, 0, 0 },
94 {"no-blocking-io", 0, POPT_ARG_VAL, &blocking_io, 0, 0, 0 },
33d38bc8
WD
95+ {"source-filter", 0, POPT_ARG_STRING, &source_filter, 0, 0, 0 },
96+ {"dest-filter", 0, POPT_ARG_STRING, &dest_filter, 0, 0, 0 },
97 {"protocol", 0, POPT_ARG_INT, &protocol_version, 0, 0, 0 },
98 {"checksum-seed", 0, POPT_ARG_INT, &checksum_seed, 0, 0, 0 },
489b0a72 99 {"server", 0, POPT_ARG_NONE, &am_server, 0, 0, 0 },
93ca4d27 100@@ -1324,6 +1333,16 @@ int parse_arguments(int *argc, const cha
33d38bc8
WD
101 }
102 }
103
104+ if (source_filter || dest_filter) {
105+ if (whole_file == 0) {
106+ snprintf(err_buf, sizeof err_buf,
107+ "--no-whole-file cannot be used with --%s-filter\n",
108+ source_filter ? "source" : "dest");
109+ return 0;
110+ }
111+ whole_file = 1;
112+ }
113+
114 if (files_from) {
115 char *h, *p;
116 int q;
93ca4d27 117@@ -1558,6 +1577,25 @@ void server_options(char **args,int *arg
33d38bc8
WD
118 args[ac++] = "--only-write-batch=X";
119 }
120
121+ if (times_only && am_sender)
122+ args[ac++] = "--times-only";
123+
124+ if (source_filter && !am_sender) {
125+ /* Need to single quote the arg to keep the remote shell
126+ * from splitting it. FIXME: breaks if command has single quotes. */
127+ if (asprintf(&arg, "--source-filter='%s'", source_filter) < 0)
128+ goto oom;
129+ args[ac++] = arg;
130+ }
131+
132+ if (dest_filter && am_sender) {
133+ /* Need to single quote the arg to keep the remote shell
134+ * from splitting it. FIXME: breaks if command has single quotes. */
135+ if (asprintf(&arg, "--dest-filter='%s'", dest_filter) < 0)
136+ goto oom;
137+ args[ac++] = arg;
138+ }
139+
140 if (size_only)
141 args[ac++] = "--size-only";
142
93ca4d27
WD
143--- orig/pipe.c 2006-01-14 08:14:31
144+++ pipe.c 2006-01-14 08:34:59
145@@ -157,3 +157,77 @@ pid_t local_child(int argc, char **argv,
33d38bc8
WD
146
147 return pid;
148 }
149+
150+pid_t run_filter(char *command[], int out, int *pipe_to_filter)
151+{
152+ pid_t pid;
153+ int pipefds[2];
154+
155+ if (verbose >= 2)
156+ print_child_argv(command);
157+
158+ if (pipe(pipefds) < 0) {
159+ rsyserr(FERROR, errno, "pipe");
160+ exit_cleanup(RERR_IPC);
161+ }
162+
163+ pid = do_fork();
164+ if (pid == -1) {
165+ rsyserr(FERROR, errno, "fork");
166+ exit_cleanup(RERR_IPC);
167+ }
168+
169+ if (pid == 0) {
170+ if (dup2(pipefds[0], STDIN_FILENO) < 0
171+ || close(pipefds[1]) < 0
172+ || dup2(out, STDOUT_FILENO) < 0) {
173+ rsyserr(FERROR, errno, "Failed dup/close");
174+ exit_cleanup(RERR_IPC);
175+ }
176+ umask(orig_umask);
177+ set_blocking(STDIN_FILENO);
178+ if (blocking_io)
179+ set_blocking(STDOUT_FILENO);
180+ execvp(command[0], command);
93ca4d27 181+ rsyserr(FERROR, errno, "Failed to exec %s", command[0]);
33d38bc8
WD
182+ exit_cleanup(RERR_IPC);
183+ }
184+
185+ if (close(pipefds[0]) < 0) {
186+ rsyserr(FERROR, errno, "Failed to close");
187+ exit_cleanup(RERR_IPC);
188+ }
189+
190+ *pipe_to_filter = pipefds[1];
191+
192+ return pid;
193+}
194+
195+pid_t run_filter_on_file(char *command[], int out, int in)
196+{
197+ pid_t pid;
198+
199+ if (verbose >= 2)
200+ print_child_argv(command);
201+
202+ pid = do_fork();
203+ if (pid == -1) {
204+ rsyserr(FERROR, errno, "fork");
205+ exit_cleanup(RERR_IPC);
206+ }
207+
208+ if (pid == 0) {
209+ if (dup2(in, STDIN_FILENO) < 0
210+ || dup2(out, STDOUT_FILENO) < 0) {
211+ rsyserr(FERROR, errno, "Failed to dup2");
212+ exit_cleanup(RERR_IPC);
213+ }
214+ if (blocking_io)
215+ set_blocking(STDOUT_FILENO);
216+ execvp(command[0], command);
93ca4d27 217+ rsyserr(FERROR, errno, "Failed to exec %s", command[0]);
33d38bc8
WD
218+ exit_cleanup(RERR_IPC);
219+ }
220+
221+ return pid;
222+}
93ca4d27 223--- orig/receiver.c 2006-01-14 08:14:31
33d38bc8 224+++ receiver.c 2005-08-17 07:57:33
93ca4d27 225@@ -53,6 +53,7 @@ extern int inplace;
33d38bc8
WD
226 extern int delay_updates;
227 extern struct stats stats;
228 extern char *log_format;
229+extern char *dest_filter;
230 extern char *tmpdir;
231 extern char *partial_dir;
232 extern char *basis_dir[];
93ca4d27 233@@ -411,6 +412,8 @@ int recv_files(int f_in, struct file_lis
33d38bc8
WD
234 : !am_server && log_format_has_i;
235 int max_phase = protocol_version >= 29 ? 2 : 1;
236 int i, recv_ok;
237+ pid_t pid = 0;
238+ char *filter_argv[MAX_FILTER_ARGS + 1];
239
240 if (verbose > 2)
241 rprintf(FINFO,"recv_files(%d) starting\n",flist->count);
93ca4d27 242@@ -423,6 +426,23 @@ int recv_files(int f_in, struct file_lis
33d38bc8
WD
243 if (delay_updates)
244 init_delayed_bits(flist->count);
245
246+ if (dest_filter) {
247+ char *p;
248+ char *sep = " \t";
249+ int i;
250+ for (p = strtok(dest_filter, sep), i = 0;
251+ p && i < MAX_FILTER_ARGS;
252+ p = strtok(0, sep))
253+ filter_argv[i++] = p;
254+ filter_argv[i] = NULL;
255+ if (p) {
256+ rprintf(FERROR,
257+ "Too many arguments to dest-filter (> %d)\n",
258+ MAX_FILTER_ARGS);
259+ exit_cleanup(RERR_SYNTAX);
260+ }
261+ }
262+
263 while (1) {
264 cleanup_disable();
265
93ca4d27 266@@ -665,6 +685,9 @@ int recv_files(int f_in, struct file_lis
33d38bc8 267 else if (!am_server && verbose && do_progress)
93ca4d27 268 rprintf(FINFO, "%s\n", fname);
33d38bc8
WD
269
270+ if (dest_filter)
271+ pid = run_filter(filter_argv, fd2, &fd2);
272+
273 /* recv file data */
274 recv_ok = receive_data(f_in, fnamecmp, fd1, st.st_size,
275 fname, fd2, file->length);
93ca4d27 276@@ -680,6 +703,16 @@ int recv_files(int f_in, struct file_lis
33d38bc8
WD
277 exit_cleanup(RERR_FILEIO);
278 }
279
280+ if (dest_filter) {
281+ int status;
282+ wait_process(pid, &status);
283+ if (status != 0) {
284+ rprintf(FERROR, "filter %s exited code: %d\n",
285+ dest_filter, status);
286+ continue;
287+ }
288+ }
289+
290 if ((recv_ok && (!delay_updates || !partialptr)) || inplace) {
291 finish_transfer(fname, fnametmp, file, recv_ok, 1);
292 if (partialptr != fname && fnamecmp == partialptr) {
93ca4d27 293--- orig/rsync.h 2006-01-14 08:14:31
33d38bc8
WD
294+++ rsync.h 2005-08-17 07:10:11
295@@ -103,6 +103,7 @@
296 #define IOERR_DEL_LIMIT (1<<2)
297
298 #define MAX_ARGS 1000
299+#define MAX_FILTER_ARGS 100
300 #define MAX_BASIS_DIRS 20
301 #define MAX_SERVER_ARGS (MAX_BASIS_DIRS*2 + 100)
302
93ca4d27 303--- orig/rsync.yo 2006-01-14 08:14:31
33d38bc8 304+++ rsync.yo 2005-08-17 07:08:21
489b0a72 305@@ -350,6 +350,7 @@ to the detailed description below for a
33d38bc8
WD
306 --timeout=TIME set I/O timeout in seconds
307 -I, --ignore-times don't skip files that match size and time
308 --size-only skip files that match in size
309+ --times-only skip files that match in mod-time
310 --modify-window=NUM compare mod-times with reduced accuracy
311 -T, --temp-dir=DIR create temporary files in directory DIR
312 -y, --fuzzy find similar file for basis if no dest file
93ca4d27 313@@ -384,6 +385,8 @@ to the detailed description below for a
33d38bc8
WD
314 --write-batch=FILE write a batched update to FILE
315 --only-write-batch=FILE like --write-batch but w/o updating dest
316 --read-batch=FILE read a batched update from FILE
317+ --source-filter=COMMAND filter file through COMMAND at source
318+ --dest-filter=COMMAND filter file through COMMAND at destination
319 --protocol=NUM force an older protocol version to be used
320 --checksum-seed=NUM set block/file checksum seed (advanced)
321 -4, --ipv4 prefer IPv4
93ca4d27
WD
322@@ -1411,6 +1414,33 @@ file previously generated by bf(--write-
323 If em(FILE) is bf(-), the batch data will be read from standard input.
33d38bc8
WD
324 See the "BATCH MODE" section for details.
325
326+dit(bf(--source-filter=COMMAND)) This option allows the user to specify a
327+filter program that will be applied to the contents of all transferred
328+regular files before the data is sent to destination. COMMAND will receive
329+the data on its standard input and it should write the filtered data to
330+standard output. COMMAND should exit non-zero if it cannot process the
331+data or if it encounters an error when writing the data to stdout.
332+
333+Example: --source-filter="gzip -9" will cause remote files to be
334+compressed.
335+Use of --source-filter automatically enables --whole-file.
336+If your filter does not output the same number of bytes that it received on
337+input, you should use --times-only to disable size and content checks on
338+subsequent rsync runs.
339+
340+dit(bf(--dest-filter=COMMAND)) This option allows you to specify a filter
341+program that will be applied to the contents of all transferred regular
342+files before the data is written to disk. COMMAND will receive the data on
343+its standard input and it should write the filtered data to standard
344+output. COMMAND should exit non-zero if it cannot process the data or if
345+it encounters an error when writing the data to stdout.
346+
347+Example: --dest-filter="gzip -9" will cause remote files to be compressed.
348+Use of --dest-filter automatically enables --whole-file.
349+If your filter does not output the same number of bytes that it
350+received on input, you should use --times-only to disable size and
351+content checks on subsequent rsync runs.
352+
353 dit(bf(--protocol=NUM)) Force an older protocol version to be used. This
354 is useful for creating a batch file that is compatible with an older
355 version of rsync. For instance, if rsync 2.6.4 is being used with the
93ca4d27 356--- orig/sender.c 2006-01-14 08:14:31
33d38bc8
WD
357+++ sender.c 2005-08-17 07:40:49
358@@ -41,6 +41,7 @@ extern int write_batch;
359 extern struct stats stats;
360 extern struct file_list *the_file_list;
361 extern char *log_format;
362+extern char *source_filter;
363
364
365 /**
93ca4d27 366@@ -219,6 +220,26 @@ void send_files(struct file_list *flist,
33d38bc8
WD
367 : !am_server && log_format_has_i;
368 int f_xfer = write_batch < 0 ? batch_fd : f_out;
369 int i, j;
370+ char *filter_argv[MAX_FILTER_ARGS + 1];
371+ char *tmp = 0;
372+ int unlink_tmp = 0;
373+
374+ if (source_filter) {
375+ char *p;
376+ char *sep = " \t";
377+ int i;
378+ for (p = strtok(source_filter, sep), i = 0;
379+ p && i < MAX_FILTER_ARGS;
380+ p = strtok(0, sep))
381+ filter_argv[i++] = p;
382+ filter_argv[i] = NULL;
383+ if (p) {
384+ rprintf(FERROR,
385+ "Too many arguments to source-filter (> %d)\n",
386+ MAX_FILTER_ARGS);
387+ exit_cleanup(RERR_SYNTAX);
388+ }
389+ }
390
391 if (verbose > 2)
392 rprintf(FINFO, "send_files starting\n");
93ca4d27 393@@ -293,6 +314,7 @@ void send_files(struct file_list *flist,
33d38bc8
WD
394 return;
395 }
396
397+ unlink_tmp = 0;
398 fd = do_open(fname, O_RDONLY, 0);
399 if (fd == -1) {
400 if (errno == ENOENT) {
93ca4d27 401@@ -321,6 +343,33 @@ void send_files(struct file_list *flist,
33d38bc8
WD
402 return;
403 }
404
405+ if (source_filter) {
406+ int fd2;
407+ char *tmpl = "/tmp/rsync-filtered_sourceXXXXXX";
408+
409+ tmp = strdup(tmpl);
410+ fd2 = mkstemp(tmp);
411+ if (fd2 == -1) {
412+ rprintf(FERROR, "mkstemp %s failed: %s\n",
413+ tmp, strerror(errno));
414+ } else {
415+ int status;
416+ pid_t pid = run_filter_on_file(filter_argv, fd2, fd);
417+ close(fd);
418+ close(fd2);
419+ wait_process(pid, &status);
420+ if (status != 0) {
421+ rprintf(FERROR,
422+ "bypassing source filter %s; exited with code: %d\n",
423+ source_filter, status);
424+ fd = do_open(fname, O_RDONLY, 0);
425+ } else {
426+ fd = do_open(tmp, O_RDONLY, 0);
427+ unlink_tmp = 1;
428+ }
429+ }
430+ }
431+
432 if (st.st_size) {
433 int32 read_size = MAX(s->blength * 3, MAX_MAP_SIZE);
434 mbuf = map_file(fd, st.st_size, read_size, s->blength);
93ca4d27 435@@ -363,6 +412,8 @@ void send_files(struct file_list *flist,
33d38bc8
WD
436 }
437 }
438 close(fd);
439+ if (unlink_tmp)
440+ unlink(tmp);
441
442 free_sums(s);
443