3 One feature missing from rsync, and requested on this list before, is
4 on-the-fly conversion of filename character encoding. For example, I
5 often need to sync files having Hebrew filenames from a UTF-8 system
6 (Linux) to an ISO8859-8 system (Cygwin on Windows 2000 using the
7 non-Unicode Win32 interface). Other circumstances surely abound.
9 Attached is a patch against rsync 2.6.2 that adds an "--fname-convert"
10 option. When the argument "--fname-convert CONV" is given, rsync pipes
11 every filename through the program CONV, and filename presented to the
12 server will be CONV's output instead of the raw filename.
16 $ rsync -fname-convert 'tr y Y' /tmp/xyz /tmp/
20 Perhaps the most useful case is using iconv:
21 $ rsync --fname-convert 'iconv -f utf8 -t iso8859-8' ...
23 I chose to allow invocation of arbitrary programs instead of using
24 libiconv (or equivalent) in order to avoid external dependencies, and to
25 offer more flexibility. The price is that some heuristics were needed to
26 avoid the deadlock problems that tend to occur when filtering data
27 through a program that uses buffered I/O -- see the comments at the top
28 of the new file fnameconv.c. The delay you may have noticed in the above
29 artificial example using "tr" is due to these heuristics; it occurs just
30 once per rsync invocation, not for every file.
32 I believe there are no server-side security implications, since all
33 conversion is done at the client and the server is oblivious to it. On
34 the client, conversion is done before sanitize_path() and besides,
35 providing a sane converter program is the client's responsibility anyway.
37 In verbose mode the updating of non-regular files is reported via
38 rprintf() by the server, so the client will see the converted filename
39 instead the raw filename -- see my comment in recv_generator(). Fixing
40 this requires some delicate changes so I left it as is, but it seems
43 Most of the new code is in the new file fnameconv.c. The patch lightly
44 touches some other files, mostly flist.c and the addition/extension of
45 some utility functions. I took the opportunity to fix an argument
46 parsing buffer overflow bug in main.c. Note that you'll need to run
47 autoconf and 'make proto'.
50 --- Makefile.in 15 May 2004 00:48:11 -0000 1.101
51 +++ Makefile.in 29 May 2004 21:31:45 -0000
52 @@ -35,7 +35,7 @@ OBJS1=rsync.o generator.o receiver.o cle
53 main.o checksum.o match.o syscall.o log.o backup.o
54 OBJS2=options.o flist.o io.o compat.o hlink.o token.o uidlist.o socket.o \
55 fileio.o batch.o clientname.o
56 -OBJS3=progress.o pipe.o
57 +OBJS3=progress.o pipe.o fnameconv.o
58 DAEMON_OBJ = params.o loadparm.o clientserver.o access.o connection.o authenticate.o
59 popt_OBJS=popt/findme.o popt/popt.o popt/poptconfig.o \
60 popt/popthelp.o popt/poptparse.o
61 --- cleanup.c 13 May 2004 07:08:18 -0000 1.22
62 +++ cleanup.c 29 May 2004 21:31:45 -0000
65 extern int keep_partial;
66 extern int log_got_error;
67 +extern char *fname_convert_cmd;
70 * Close all open sockets and files, allowing a (somewhat) graceful
71 @@ -121,6 +122,8 @@ void _exit_cleanup(int code, const char
72 finish_transfer(cleanup_new_fname, fname, cleanup_file, 0);
75 + if (fname_convert_cmd)
76 + cleanup_fname_convert();
78 do_unlink(cleanup_fname);
80 --- errcode.h 15 Dec 2003 08:04:14 -0000 1.8
81 +++ errcode.h 29 May 2004 21:31:45 -0000
83 #define RERR_STREAMIO 12 /* error in rsync protocol data stream */
84 #define RERR_MESSAGEIO 13 /* errors with program diagnostics */
85 #define RERR_IPC 14 /* error in IPC code */
86 +#define RERR_FNAMECONV 15 /* error in filename conversion */
88 #define RERR_SIGNAL 20 /* status returned when sent SIGUSR1, SIGINT */
89 #define RERR_WAITCHILD 21 /* some error returned by waitpid() */
90 --- flist.c 29 May 2004 21:21:17 -0000 1.226
91 +++ flist.c 29 May 2004 21:31:46 -0000
92 @@ -43,6 +43,7 @@ extern int cvs_exclude;
95 extern char curr_dir[MAXPATHLEN];
96 +extern char *fname_convert_cmd;
97 extern char *files_from;
98 extern int filesfrom_fd;
100 @@ -346,7 +347,10 @@ void send_file_entry(struct file_struct
102 io_write_phase = "send_file_entry";
104 - f_name_to(file, fname);
105 + if (fname_convert_cmd && !am_server) /* fname conversion always done on client */
106 + convert_fname(fname, f_name(file), MAXPATHLEN);
108 + f_name_to(file, fname);
112 @@ -559,6 +563,9 @@ void receive_file_entry(struct file_stru
114 strlcpy(lastname, thisname, MAXPATHLEN);
116 + if (fname_convert_cmd && !am_server) /* fname conversion always done on client */
117 + convert_fname(thisname, lastname, MAXPATHLEN);
119 clean_fname(thisname);
122 @@ -1041,6 +1048,9 @@ struct file_list *send_file_list(int f,
124 start_write = stats.total_written;
127 + init_fname_convert();
129 flist = flist_new(f == -1 ? WITHOUT_HLINK : WITH_HLINK,
132 @@ -1215,6 +1225,9 @@ struct file_list *send_file_list(int f,
133 write_batch_flist_info(flist->count, flist->files);
136 + if (fname_convert_cmd && !am_server)
137 + cleanup_fname_convert();
142 @@ -1237,6 +1250,9 @@ struct file_list *recv_file_list(int f)
144 start_read = stats.total_read;
146 + if (fname_convert_cmd && !am_server)
147 + init_fname_convert();
149 flist = flist_new(WITH_HLINK, "recv_file_list");
152 @@ -1291,6 +1307,9 @@ struct file_list *recv_file_list(int f)
156 + if (fname_convert_cmd && !am_server)
157 + cleanup_fname_convert();
162 --- /dev/null 1 Jan 1970 00:00:00 -0000
163 +++ fnameconv.c 29 May 2004 21:31:46 -0000
165 +/* -*- c-file-style: "linux" -*-
167 + * Copyright (C) 2004 by Eran Tromer
169 + * This program is free software; you can redistribute it and/or modify
170 + * it under the terms of the GNU General Public License as published by
171 + * the Free Software Foundation; either version 2 of the License, or
172 + * (at your option) any later version.
174 + * This program is distributed in the hope that it will be useful,
175 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
176 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
177 + * GNU General Public License for more details.
179 + * You should have received a copy of the GNU General Public License
180 + * along with this program; if not, write to the Free Software
181 + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
184 +/* Handles filename conversion through an external process. Implements
185 + * two modes of operation:
186 + * In persistent mode, a single filename converter is kept running;
187 + * for each query we feed it a single line and read back a single
188 + * line. This will fail for programs that used buffered I/O, and will
189 + * get into a deadlock.
190 + * In non-persistent mode, a converter is invoked and killed for each
191 + * query. This has a very high overhead, but will work for any
193 + * We start in persistence mode, and if we suspect a deadlock (i.e.,
194 + * nothing happens for FNAME_CONV_PERSISTENCE_TIMEOUT milliseconds)
195 + * then we smoothly fall back to non-persistent mode.
197 + * Filename conversion errors are always considered fatal, since an
198 + * incorrectly named file could cause unpredictable damage.
203 +#define FNAME_CONV_PERSISTENCE_TIMEOUT 3000 /* milliseconds */
205 +static int conv_persistent = 1;
206 +static pid_t conv_pid = -1;
207 +static int conv_write_fd = -1, conv_read_fd;
208 +extern char *fname_convert_cmd;
209 +extern int blocking_io;
212 + * Splits cmd on spaces.
214 +static void split_on_spaces(char *cmd, char **parts) {
217 + char *cmd2 = strdup(cmd);
219 + rprintf(FERROR, "Out of memory while parsing filename filter %s\n", cmd);
220 + exit_cleanup(RERR_MALLOC);
223 + for (tok = strtok(cmd2, " "); tok; tok = strtok(NULL, " ")) {
224 + if (nparts >= MAX_ARGS) {
225 + rprintf(FERROR, "Filename conversion command is too long: %s\n", cmd);
226 + exit_cleanup(RERR_SYNTAX);
228 + parts[nparts++] = tok;
230 + parts[nparts] = NULL;
235 + * Runs the filename converter process. Should be called before filename
236 + * conversion begins (actually it's not necessarh, but it keeps the proress report
239 +void init_fname_convert()
241 + if (fname_convert_cmd && conv_pid < 0) {
242 + char *args[MAX_ARGS];
245 + rprintf(FINFO, "Running filename converter: %s\n", fname_convert_cmd);
246 + split_on_spaces(fname_convert_cmd, args);
247 + /* Invoke child pipe with non-blocking IO and without registering it for
248 + * autocleanup (the latter may blow up the all_pids table, and is not needed
249 + * since we have our own cleanup handler. */
250 + conv_pid = piped_child(args, &conv_read_fd, &conv_write_fd, 0, 0);
251 + set_nonblocking(conv_write_fd);
252 + set_nonblocking(conv_read_fd);
257 + * Kills the filename converter process. Should be called when the file
258 + * list creation is done. We assume that the converter will terminate
259 + * soon after its standard input is closed.
261 +void cleanup_fname_convert()
263 + if (conv_pid >= 0) {
265 + if (conv_write_fd >= 0) {
266 + close(conv_write_fd);
267 + conv_write_fd = -1;
269 + close(conv_read_fd);
270 + waitpid(conv_pid, &status, 0);
276 + * Converts the filename from src into dest, using at most maxlen
277 + * characters of dest.
279 +void convert_fname(char *dest, const char *src, unsigned int maxlen)
284 + unsigned int srcrem, dstrem;
286 + init_fname_convert();
288 + /* Send and receive strings simultaneously to avoid deadlock: */
289 + srcrem = strlen(src)+1; /* chars left to send (incl. terminating LF) */
290 + dstrem = maxlen-1; /* free chars left in dest */
294 + /* Write as much as possible: */
296 + res = write(conv_write_fd, srcp, srcrem-1);
297 + if (res < 0 && errno != EAGAIN) {
298 + rprintf(FERROR, "Error writing to fname converter (filename: %s): %s\n", strerror(errno), src);
299 + exit_cleanup(RERR_FNAMECONV);
301 + if (res > 0) { /* wrote something */
306 + if (srcrem == 1) { /* final LF */
307 + res = write(conv_write_fd, "\n", 1);
308 + if (res < 0 && errno != EAGAIN) {
309 + rprintf(FERROR, "Error writing to fname converter (filename: %s): %s\n", strerror(errno), src);
310 + exit_cleanup(RERR_FNAMECONV);
312 + if (res > 0) { /* wrote final LF */
314 + if (!conv_persistent) {
315 + close(conv_write_fd);
316 + conv_write_fd = -1;
321 + /* Read as much as possible: */
322 + res = read(conv_read_fd, destp, dstrem);
323 + if (res < 0 && errno != EAGAIN) {
324 + rprintf(FERROR, "Error reading from filename converter (filename: %s):%s \n", strerror(errno), src);
325 + exit_cleanup(RERR_FNAMECONV);
327 + if (res == 0) { /* EOF */
328 + rprintf(FERROR, "EOF from filename converter (filename: %s)\n", src);
329 + exit_cleanup(RERR_FNAMECONV);
334 + if (destp[-1] == '\n' || destp[-1] == '\r')
335 + break; /* End of line. Yippy! */
337 + rprintf(FINFO, "Name converter output too long (filename: %s)\n", src);
338 + exit_cleanup(RERR_FNAMECONV);
342 + /* Await activity */
343 + if (!await_fds(conv_read_fd, !srcrem ? -1 : conv_write_fd, FNAME_CONV_PERSISTENCE_TIMEOUT)) {
344 + if (srcrem == 0 && conv_persistent) {
345 + /* We finished writing but nothing happens. It looks like the converter program
346 + * is using buffered I/O and thus wait to read more input, but we can't give it
347 + * the next filename yet. Fall back to non-persistent mode. */
349 + rprintf(FINFO, "Filename converter blocked, disabling persistence to recover.\n");
351 + conv_persistent = 0;
352 + close(conv_write_fd);
353 + conv_write_fd = -1;
358 + /* Cleanup and sanity check */
359 + if (!conv_persistent)
360 + cleanup_fname_convert();
362 + close(conv_write_fd);
363 + rprintf(FERROR, "Name converter produced output before reading all its input for file: %s\n", src);
364 + exit_cleanup(RERR_FNAMECONV);
367 + /* Chop newline chars */
369 + if (destp > dest && *destp == '\n')
371 + if (destp > dest && *destp == '\r')
373 + if (++destp == dest) {
374 + rprintf(FERROR, "Name converter output is empty (filename: %s)\n", src);
375 + exit_cleanup(RERR_FNAMECONV);
378 + /* Also, we may have a leading CR left over from a CRLF of the previous line */
380 + memmove(dest, dest+1, destp-dest-1);
383 + rprintf(FINFO, "Converted filename: %s -> %s\n", src, dest);
385 --- generator.c 18 May 2004 08:50:17 -0000 1.85
386 +++ generator.c 29 May 2004 21:31:46 -0000
387 @@ -267,6 +267,12 @@ static void generate_and_send_sums(struc
389 * @note This comment was added later by mbp who was trying to work it
390 * out. It might be wrong.
392 + * TODO: The filename seen in recv_generator is after filename
393 + * conversion. In verbose mode, directories, symlinks and device
394 + * files are printf()ed here but regular files are rprintf()ed on the
395 + * sender (unconverted). To solve the above, move all progress
396 + * reporting to the sender.
398 void recv_generator(char *fname, struct file_struct *file, int i, int f_out)
400 --- log.c 15 May 2004 19:31:16 -0000 1.73
401 +++ log.c 29 May 2004 21:31:47 -0000
402 @@ -57,6 +57,7 @@ struct {
403 { RERR_STREAMIO , "error in rsync protocol data stream" },
404 { RERR_MESSAGEIO , "errors with program diagnostics" },
405 { RERR_IPC , "error in IPC code" },
406 + { RERR_FNAMECONV , "error in filename conversion" },
407 { RERR_SIGNAL , "received SIGUSR1 or SIGINT" },
408 { RERR_WAITCHILD , "some error returned by waitpid()" },
409 { RERR_MALLOC , "error allocating core memory buffers" },
410 --- main.c 19 May 2004 22:19:19 -0000 1.195
411 +++ main.c 29 May 2004 21:31:47 -0000
412 @@ -217,7 +217,7 @@ static pid_t do_cmd(char *cmd, char *mac
413 int *f_in, int *f_out)
417 + char *args[MAX_ARGS];
419 char *tok, *dir = NULL;
421 @@ -232,8 +232,13 @@ static pid_t do_cmd(char *cmd, char *mac
425 - for (tok = strtok(cmd, " "); tok; tok = strtok(NULL, " "))
426 + for (tok = strtok(cmd, " "); tok; tok = strtok(NULL, " ")) {
427 + if (argc >= MAX_ARGS) {
428 + rprintf(FERROR, "Command is too long\n");
429 + exit_cleanup(RERR_SYNTAX);
434 /* check to see if we've already been given '-l user' in
435 * the remote-shell command */
436 @@ -296,7 +301,7 @@ static pid_t do_cmd(char *cmd, char *mac
437 create_flist_from_batch(); /* sets batch_flist */
438 ret = local_child(argc, args, f_in, f_out, child_main);
440 - ret = piped_child(args,f_in,f_out);
441 + ret = piped_child(args, f_in, f_out, blocking_io, 1);
445 --- options.c 27 May 2004 21:51:53 -0000 1.153
446 +++ options.c 29 May 2004 21:31:48 -0000
447 @@ -125,6 +125,7 @@ char *backup_dir = NULL;
448 char backup_dir_buf[MAXPATHLEN];
449 int rsync_port = RSYNC_PORT;
451 +char *fname_convert_cmd = NULL;
455 @@ -268,6 +269,7 @@ void usage(enum logcode F)
456 rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n");
457 rprintf(F," --link-dest=DIR create hardlinks to DIR for unchanged files\n");
458 rprintf(F," -P equivalent to --partial --progress\n");
459 + rprintf(F," --fname-convert=CMD invoke CMD for filename conversion\n");
460 rprintf(F," -z, --compress compress file data\n");
461 rprintf(F," -C, --cvs-exclude auto ignore files in the same way CVS does\n");
462 rprintf(F," --exclude=PATTERN exclude files matching PATTERN\n");
463 @@ -364,6 +366,7 @@ static struct poptOption long_options[]
464 {"temp-dir", 'T', POPT_ARG_STRING, &tmpdir, 0, 0, 0 },
465 {"compare-dest", 0, POPT_ARG_STRING, &compare_dest, 0, 0, 0 },
466 {"link-dest", 0, POPT_ARG_STRING, &compare_dest, OPT_LINK_DEST, 0, 0 },
467 + {"fname-convert", 0, POPT_ARG_STRING, &fname_convert_cmd, 0, 0, 0 },
468 /* TODO: Should this take an optional int giving the compression level? */
469 {"compress", 'z', POPT_ARG_NONE, &do_compression, 0, 0, 0 },
470 {"daemon", 0, POPT_ARG_NONE, &daemon_opt, 0, 0, 0 },
471 --- pipe.c 15 May 2004 19:31:10 -0000 1.7
472 +++ pipe.c 29 May 2004 21:31:48 -0000
475 extern int am_sender;
476 extern int am_server;
477 -extern int blocking_io;
478 extern int orig_umask;
479 extern int read_batch;
480 extern int filesfrom_fd;
481 @@ -40,8 +39,10 @@ extern int filesfrom_fd;
482 * If blocking_io is set then use blocking io on both fds. That can be
483 * used to cope with badly broken rsh implementations like the one on
486 + * If register_child is nonzero then the child is registered for autocleanup.
488 -pid_t piped_child(char **command, int *f_in, int *f_out)
489 +pid_t piped_child(char **command, int *f_in, int *f_out, int blocking_io, int register_child)
492 int to_child_pipe[2];
493 @@ -57,7 +58,7 @@ pid_t piped_child(char **command, int *f
498 + pid = register_child ? do_fork() : fork();
500 rsyserr(FERROR, errno, "fork");
501 exit_cleanup(RERR_IPC);
502 --- syscall.c 18 Feb 2004 22:33:21 -0000 1.30
503 +++ syscall.c 29 May 2004 21:31:48 -0000
504 @@ -231,3 +231,34 @@ char *d_name(struct dirent *di)
510 + * A wrapper around select(2) that guarantees Linux-like updating of
511 + * the timeout argument to contain the time left, so we can simply
512 + * re-invoke in case of EINTR or EAGAIN. On BSD, select(2) doesn't
513 + * change the timeout argument by itself.
515 +int do_select(int n, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout)
517 + struct timeval intended, before, after;
520 + if (timeout == NULL)
521 + return select(n, readfds, writefds, exceptfds, timeout);
523 + intended = *timeout;
524 + gettimeofday(&before, NULL);
525 + result = select(n, readfds, writefds, exceptfds, timeout);
526 + gettimeofday(&after, NULL);
527 + timeout->tv_sec = intended.tv_sec - (after.tv_sec - before.tv_sec);
528 + timeout->tv_usec = intended.tv_usec - (after.tv_usec - before.tv_usec);
529 + if (timeout->tv_usec >= 1000000) {
531 + timeout->tv_usec -= 1000000;
532 + } else if (timeout->tv_usec < 0) {
533 + --(timeout)->tv_sec;
534 + timeout->tv_usec += 1000000;
539 --- util.c 24 May 2004 22:59:16 -0000 1.147
540 +++ util.c 29 May 2004 21:31:48 -0000
541 @@ -1135,3 +1135,52 @@ void *_realloc_array(void *ptr, unsigned
542 return malloc(size * num);
543 return realloc(ptr, size * num);
547 + * Blocks until one of the following happens:
548 + * - read_fd is nonnegative and has data to read
549 + * - write_fd is nonnegative and can be written to
550 + * - something terrible happened to either
551 + * - the timeout (in milliseconds) has elapsed
552 + * Return value is zero iff the timeout occured.
554 +char await_fds(int read_fd, int write_fd, int timeout_ms)
556 + fd_set read_fds, write_fds, except_fds;
560 + tv.tv_sec = timeout_ms/1000;
561 + tv.tv_usec = (timeout_ms%1000)*1000;
564 + FD_ZERO(&read_fds);
565 + FD_ZERO(&write_fds);
566 + FD_ZERO(&except_fds);
567 + if (write_fd >= 0) {
568 + FD_SET(write_fd, &write_fds);
569 + FD_SET(write_fd, &except_fds);
571 + if (read_fd >= 0) {
572 + FD_SET(read_fd, &read_fds);
573 + FD_SET(read_fd, &except_fds);
576 + res = do_select(MAX(0,MAX(read_fd, write_fd)+1), &read_fds, &write_fds, &except_fds, &tv);
579 + if (read_fd >= 0 && (FD_ISSET(read_fd, &read_fds) || FD_ISSET(read_fd, &except_fds)))
581 + if (write_fd >= 0 && (FD_ISSET(write_fd, &write_fds) || FD_ISSET(write_fd, &except_fds)))
583 + if (res == EINTR || res == EAGAIN) {
584 + continue; /* Retry */
587 + rprintf(FERROR, "Error awaiting fname converter: %s\n", strerror(errno));
588 + exit_cleanup(RERR_FNAMECONV);
590 + return 0; /* res == 0 and no FDs set, hence a timeout. */