From 4a751edb65f6011cb12187e32b07a81e03315b77 Mon Sep 17 00:00:00 2001 From: Wayne Davison Date: Sat, 29 May 2004 21:34:26 +0000 Subject: [PATCH] New patch from Eran Tromer (tweaked for CVS). --- fname-convert.diff | 594 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 594 insertions(+) create mode 100644 fname-convert.diff diff --git a/fname-convert.diff b/fname-convert.diff new file mode 100644 index 0000000..0bfe920 --- /dev/null +++ b/fname-convert.diff @@ -0,0 +1,594 @@ +Eran Tromer writes: + +One feature missing from rsync, and requested on this list before, is +on-the-fly conversion of filename character encoding. For example, I +often need to sync files having Hebrew filenames from a UTF-8 system +(Linux) to an ISO8859-8 system (Cygwin on Windows 2000 using the +non-Unicode Win32 interface). Other circumstances surely abound. + +Attached is a patch against rsync 2.6.2 that adds an "--fname-convert" +option. When the argument "--fname-convert CONV" is given, rsync pipes +every filename through the program CONV, and filename presented to the +server will be CONV's output instead of the raw filename. + +Artificial example: +$ touch /tmp/xyz +$ rsync -fname-convert 'tr y Y' /tmp/xyz /tmp/ +$ ls /tmp/x?z +/tmp/xyz /tmp/xYz + +Perhaps the most useful case is using iconv: +$ rsync --fname-convert 'iconv -f utf8 -t iso8859-8' ... + +I chose to allow invocation of arbitrary programs instead of using +libiconv (or equivalent) in order to avoid external dependencies, and to +offer more flexibility. The price is that some heuristics were needed to +avoid the deadlock problems that tend to occur when filtering data +through a program that uses buffered I/O -- see the comments at the top +of the new file fnameconv.c. The delay you may have noticed in the above +artificial example using "tr" is due to these heuristics; it occurs just +once per rsync invocation, not for every file. + +I believe there are no server-side security implications, since all +conversion is done at the client and the server is oblivious to it. On +the client, conversion is done before sanitize_path() and besides, +providing a sane converter program is the client's responsibility anyway. + +In verbose mode the updating of non-regular files is reported via +rprintf() by the server, so the client will see the converted filename +instead the raw filename -- see my comment in recv_generator(). Fixing +this requires some delicate changes so I left it as is, but it seems +like a minor concern. + +Most of the new code is in the new file fnameconv.c. The patch lightly +touches some other files, mostly flist.c and the addition/extension of +some utility functions. I took the opportunity to fix an argument +parsing buffer overflow bug in main.c. Note that you'll need to run +autoconf and 'make proto'. + + +--- Makefile.in 15 May 2004 00:48:11 -0000 1.101 ++++ Makefile.in 29 May 2004 21:31:45 -0000 +@@ -35,7 +35,7 @@ OBJS1=rsync.o generator.o receiver.o cle + main.o checksum.o match.o syscall.o log.o backup.o + OBJS2=options.o flist.o io.o compat.o hlink.o token.o uidlist.o socket.o \ + fileio.o batch.o clientname.o +-OBJS3=progress.o pipe.o ++OBJS3=progress.o pipe.o fnameconv.o + DAEMON_OBJ = params.o loadparm.o clientserver.o access.o connection.o authenticate.o + popt_OBJS=popt/findme.o popt/popt.o popt/poptconfig.o \ + popt/popthelp.o popt/poptparse.o +--- cleanup.c 13 May 2004 07:08:18 -0000 1.22 ++++ cleanup.c 29 May 2004 21:31:45 -0000 +@@ -24,6 +24,7 @@ + extern int io_error; + extern int keep_partial; + extern int log_got_error; ++extern char *fname_convert_cmd; + + /** + * Close all open sockets and files, allowing a (somewhat) graceful +@@ -121,6 +122,8 @@ void _exit_cleanup(int code, const char + finish_transfer(cleanup_new_fname, fname, cleanup_file, 0); + } + io_flush(FULL_FLUSH); ++ if (fname_convert_cmd) ++ cleanup_fname_convert(); + if (cleanup_fname) + do_unlink(cleanup_fname); + if (code) { +--- errcode.h 15 Dec 2003 08:04:14 -0000 1.8 ++++ errcode.h 29 May 2004 21:31:45 -0000 +@@ -34,6 +34,7 @@ + #define RERR_STREAMIO 12 /* error in rsync protocol data stream */ + #define RERR_MESSAGEIO 13 /* errors with program diagnostics */ + #define RERR_IPC 14 /* error in IPC code */ ++#define RERR_FNAMECONV 15 /* error in filename conversion */ + + #define RERR_SIGNAL 20 /* status returned when sent SIGUSR1, SIGINT */ + #define RERR_WAITCHILD 21 /* some error returned by waitpid() */ +--- flist.c 29 May 2004 21:21:17 -0000 1.226 ++++ flist.c 29 May 2004 21:31:46 -0000 +@@ -43,6 +43,7 @@ extern int cvs_exclude; + + extern int recurse; + extern char curr_dir[MAXPATHLEN]; ++extern char *fname_convert_cmd; + extern char *files_from; + extern int filesfrom_fd; + +@@ -346,7 +347,11 @@ void send_file_entry(struct file_struct + + io_write_phase = "send_file_entry"; + +- f_name_to(file, fname); ++ if (fname_convert_cmd && !am_server) /* fname conversion always done on client */ ++ convert_fname(fname, f_name(file), MAXPATHLEN); ++ else ++ f_name_to(file, fname); ++ + + flags = base_flags; + +@@ -559,6 +564,9 @@ void receive_file_entry(struct file_stru + + strlcpy(lastname, thisname, MAXPATHLEN); + ++ if (fname_convert_cmd && !am_server) /* fname conversion always done on client */ ++ convert_fname(thisname, lastname, MAXPATHLEN); ++ + clean_fname(thisname); + + if (sanitize_paths) +@@ -1041,6 +1049,9 @@ struct file_list *send_file_list(int f, + + start_write = stats.total_written; + ++ if (!am_server) ++ init_fname_convert(); ++ + flist = flist_new(f == -1 ? WITHOUT_HLINK : WITH_HLINK, + "send_file_list"); + +@@ -1215,6 +1226,9 @@ struct file_list *send_file_list(int f, + write_batch_flist_info(flist->count, flist->files); + } + ++ if (fname_convert_cmd && !am_server) ++ cleanup_fname_convert(); ++ + if (verbose > 3) + output_flist(flist); + +@@ -1237,6 +1251,9 @@ struct file_list *recv_file_list(int f) + + start_read = stats.total_read; + ++ if (fname_convert_cmd && !am_server) ++ init_fname_convert(); ++ + flist = flist_new(WITH_HLINK, "recv_file_list"); + + flist->count = 0; +@@ -1291,6 +1308,9 @@ struct file_list *recv_file_list(int f) + } + } + ++ if (fname_convert_cmd && !am_server) ++ cleanup_fname_convert(); ++ + if (verbose > 3) + output_flist(flist); + +--- /dev/null 1 Jan 1970 00:00:00 -0000 ++++ fnameconv.c 29 May 2004 21:31:46 -0000 +@@ -0,0 +1,220 @@ ++/* -*- c-file-style: "linux" -*- ++ * ++ * Copyright (C) 2004 by Eran Tromer ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ */ ++ ++/* Handles filename conversion through an external process. Implements ++ * two modes of operation: ++ * In persistent mode, a single filename converter is kept running; ++ * for each query we feed it a single line and read back a single ++ * line. This will fail for programs that used buffered I/O, and will ++ * get into a deadlock. ++ * In non-persistent mode, a converter is invoked and killed for each ++ * query. This has a very high overhead, but will work for any ++ * program. ++ * We start in persistence mode, and if we suspect a deadlock (i.e., ++ * nothing happens for FNAME_CONV_PERSISTENCE_TIMEOUT milliseconds) ++ * then we smoothly fall back to non-persistent mode. ++ * ++ * Filename conversion errors are always considered fatal, since an ++ * incorrectly named file could cause unpredictable damage. ++ */ ++ ++#include ++ ++#define FNAME_CONV_PERSISTENCE_TIMEOUT 3000 /* milliseconds */ ++ ++static int conv_persistent = 1; ++static pid_t conv_pid = -1; ++static int conv_write_fd = -1, conv_read_fd; ++extern char *fname_convert_cmd; ++extern int blocking_io; ++ ++/** ++ * Splits cmd on spaces. ++ */ ++static void split_on_spaces(char *cmd, char **parts) { ++ int nparts = 0; ++ char *tok; ++ char *cmd2 = strdup(cmd); ++ if (!cmd2) { ++ rprintf(FERROR, "Out of memory while parsing filename filter %s\n", cmd); ++ exit_cleanup(RERR_MALLOC); ++ } ++ ++ for (tok = strtok(cmd2, " "); tok; tok = strtok(NULL, " ")) { ++ if (nparts >= MAX_ARGS) { ++ rprintf(FERROR, "Filename conversion command is too long: %s\n", cmd); ++ exit_cleanup(RERR_SYNTAX); ++ } ++ parts[nparts++] = tok; ++ } ++ parts[nparts] = NULL; ++} ++ ++ ++/** ++ * Runs the filename converter process. Should be called before filename ++ * conversion begins (actually it's not necessarh, but it keeps the proress report ++ * nice and clean. ++ **/ ++void init_fname_convert() ++{ ++ if (fname_convert_cmd && conv_pid < 0) { ++ char *args[MAX_ARGS]; ++ ++ if (verbose > 2) ++ rprintf(FINFO, "Running filename converter: %s\n", fname_convert_cmd); ++ split_on_spaces(fname_convert_cmd, args); ++ /* Invoke child pipe with non-blocking IO and without registering it for ++ * autocleanup (the latter may blow up the all_pids table, and is not needed ++ * since we have our own cleanup handler. */ ++ conv_pid = piped_child(args, &conv_read_fd, &conv_write_fd, 0, 0); ++ set_nonblocking(conv_write_fd); ++ set_nonblocking(conv_read_fd); ++ } ++} ++ ++/** ++ * Kills the filename converter process. Should be called when the file ++ * list creation is done. We assume that the converter will terminate ++ * soon after its standard input is closed. ++ **/ ++void cleanup_fname_convert() ++{ ++ if (conv_pid >= 0) { ++ int status; ++ if (conv_write_fd >= 0) { ++ close(conv_write_fd); ++ conv_write_fd = -1; ++ } ++ close(conv_read_fd); ++ waitpid(conv_pid, &status, 0); ++ conv_pid = -1; ++ } ++} ++ ++/** ++ * Converts the filename from src into dest, using at most maxlen ++ * characters of dest. ++ **/ ++void convert_fname(char *dest, const char *src, unsigned int maxlen) ++{ ++ int res; ++ const char *srcp; ++ char *destp; ++ unsigned int srcrem, dstrem; ++ ++ init_fname_convert(); ++ ++ /* Send and receive strings simultaneously to avoid deadlock: */ ++ srcrem = strlen(src)+1; /* chars left to send (incl. terminating LF) */ ++ dstrem = maxlen-1; /* free chars left in dest */ ++ srcp = src; ++ destp = dest; ++ while(1) { ++ /* Write as much as possible: */ ++ if (srcrem > 1) { ++ res = write(conv_write_fd, srcp, srcrem-1); ++ if (res < 0 && errno != EAGAIN) { ++ rprintf(FERROR, "Error writing to fname converter (filename: %s): %s\n", strerror(errno), src); ++ exit_cleanup(RERR_FNAMECONV); ++ } ++ if (res > 0) { /* wrote something */ ++ srcp += res; ++ srcrem -= res; ++ } ++ } ++ if (srcrem == 1) { /* final LF */ ++ res = write(conv_write_fd, "\n", 1); ++ if (res < 0 && errno != EAGAIN) { ++ rprintf(FERROR, "Error writing to fname converter (filename: %s): %s\n", strerror(errno), src); ++ exit_cleanup(RERR_FNAMECONV); ++ } ++ if (res > 0) { /* wrote final LF */ ++ srcrem = 0; ++ if (!conv_persistent) { ++ close(conv_write_fd); ++ conv_write_fd = -1; ++ } ++ } ++ } ++ ++ /* Read as much as possible: */ ++ res = read(conv_read_fd, destp, dstrem); ++ if (res < 0 && errno != EAGAIN) { ++ rprintf(FERROR, "Error reading from filename converter (filename: %s):%s \n", strerror(errno), src); ++ exit_cleanup(RERR_FNAMECONV); ++ } ++ if (res == 0) { /* EOF */ ++ rprintf(FERROR, "EOF from filename converter (filename: %s)\n", src); ++ exit_cleanup(RERR_FNAMECONV); ++ } ++ if (res > 0) { ++ destp += res; ++ dstrem -= res; ++ if (destp[-1] == '\n' || destp[-1] == '\r') ++ break; /* End of line. Yippy! */ ++ if (dstrem == 0) { ++ rprintf(FINFO, "Name converter output too long (filename: %s)\n", src); ++ exit_cleanup(RERR_FNAMECONV); ++ } ++ } ++ ++ /* Await activity */ ++ if (!await_fds(conv_read_fd, !srcrem ? -1 : conv_write_fd, FNAME_CONV_PERSISTENCE_TIMEOUT)) { ++ if (srcrem == 0 && conv_persistent) { ++ /* We finished writing but nothing happens. It looks like the converter program ++ * is using buffered I/O and thus wait to read more input, but we can't give it ++ * the next filename yet. Fall back to non-persistent mode. */ ++ if (verbose > 0) ++ rprintf(FINFO, "Filename converter blocked, disabling persistence to recover.\n"); ++ ++ conv_persistent = 0; ++ close(conv_write_fd); ++ conv_write_fd = -1; ++ } ++ } ++ } ++ ++ /* Cleanup and sanity check */ ++ if (!conv_persistent) ++ cleanup_fname_convert(); ++ if (srcrem > 0) { ++ close(conv_write_fd); ++ rprintf(FERROR, "Name converter produced output before reading all its input for file: %s\n", src); ++ exit_cleanup(RERR_FNAMECONV); ++ } ++ ++ /* Chop newline chars */ ++ destp--; ++ if (destp > dest && *destp == '\n') ++ --destp; ++ if (destp > dest && *destp == '\r') ++ --destp; ++ if (++destp == dest) { ++ rprintf(FERROR, "Name converter output is empty (filename: %s)\n", src); ++ exit_cleanup(RERR_FNAMECONV); ++ } ++ *destp = 0; ++ /* Also, we may have a leading CR left over from a CRLF of the previous line */ ++ if (*dest == '\n') ++ memmove(dest, dest+1, destp-dest-1); ++ ++ if (verbose > 2) ++ rprintf(FINFO, "Converted filename: %s -> %s\n", src, dest); ++} +--- generator.c 18 May 2004 08:50:17 -0000 1.85 ++++ generator.c 29 May 2004 21:31:46 -0000 +@@ -267,6 +267,12 @@ static void generate_and_send_sums(struc + * + * @note This comment was added later by mbp who was trying to work it + * out. It might be wrong. ++ * ++ * TODO: The filename seen in recv_generator is after filename ++ * conversion. In verbose mode, directories, symlinks and device ++ * files are printf()ed here but regular files are rprintf()ed on the ++ * sender (unconverted). To solve the above, move all progress ++ * reporting to the sender. + **/ + void recv_generator(char *fname, struct file_struct *file, int i, int f_out) + { +--- log.c 15 May 2004 19:31:16 -0000 1.73 ++++ log.c 29 May 2004 21:31:47 -0000 +@@ -57,6 +57,7 @@ struct { + { RERR_STREAMIO , "error in rsync protocol data stream" }, + { RERR_MESSAGEIO , "errors with program diagnostics" }, + { RERR_IPC , "error in IPC code" }, ++ { RERR_FNAMECONV , "error in filename conversion" }, + { RERR_SIGNAL , "received SIGUSR1 or SIGINT" }, + { RERR_WAITCHILD , "some error returned by waitpid()" }, + { RERR_MALLOC , "error allocating core memory buffers" }, +--- main.c 19 May 2004 22:19:19 -0000 1.195 ++++ main.c 29 May 2004 21:31:47 -0000 +@@ -217,7 +217,7 @@ static pid_t do_cmd(char *cmd, char *mac + int *f_in, int *f_out) + { + int i, argc = 0; +- char *args[100]; ++ char *args[MAX_ARGS]; + pid_t ret; + char *tok, *dir = NULL; + int dash_l_set = 0; +@@ -232,8 +232,13 @@ static pid_t do_cmd(char *cmd, char *mac + if (!cmd) + goto oom; + +- for (tok = strtok(cmd, " "); tok; tok = strtok(NULL, " ")) ++ for (tok = strtok(cmd, " "); tok; tok = strtok(NULL, " ")) { ++ if (argc >= MAX_ARGS) { ++ rprintf(FERROR, "Command is too long\n"); ++ exit_cleanup(RERR_SYNTAX); ++ } + args[argc++] = tok; ++ } + + /* check to see if we've already been given '-l user' in + * the remote-shell command */ +@@ -296,7 +301,7 @@ static pid_t do_cmd(char *cmd, char *mac + create_flist_from_batch(); /* sets batch_flist */ + ret = local_child(argc, args, f_in, f_out, child_main); + } else { +- ret = piped_child(args,f_in,f_out); ++ ret = piped_child(args, f_in, f_out, blocking_io, 1); + } + + if (dir) free(dir); +--- options.c 27 May 2004 21:51:53 -0000 1.153 ++++ options.c 29 May 2004 21:31:48 -0000 +@@ -125,6 +125,7 @@ char *backup_dir = NULL; + char backup_dir_buf[MAXPATHLEN]; + int rsync_port = RSYNC_PORT; + int link_dest = 0; ++char *fname_convert_cmd = NULL; + + int verbose = 0; + int quiet = 0; +@@ -268,6 +269,7 @@ void usage(enum logcode F) + rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n"); + rprintf(F," --link-dest=DIR create hardlinks to DIR for unchanged files\n"); + rprintf(F," -P equivalent to --partial --progress\n"); ++ rprintf(F," --fname-convert=CMD invoke CMD for filename conversion\n"); + rprintf(F," -z, --compress compress file data\n"); + rprintf(F," -C, --cvs-exclude auto ignore files in the same way CVS does\n"); + rprintf(F," --exclude=PATTERN exclude files matching PATTERN\n"); +@@ -364,6 +366,7 @@ static struct poptOption long_options[] + {"temp-dir", 'T', POPT_ARG_STRING, &tmpdir, 0, 0, 0 }, + {"compare-dest", 0, POPT_ARG_STRING, &compare_dest, 0, 0, 0 }, + {"link-dest", 0, POPT_ARG_STRING, &compare_dest, OPT_LINK_DEST, 0, 0 }, ++ {"fname-convert", 0, POPT_ARG_STRING, &fname_convert_cmd, 0, 0, 0 }, + /* TODO: Should this take an optional int giving the compression level? */ + {"compress", 'z', POPT_ARG_NONE, &do_compression, 0, 0, 0 }, + {"daemon", 0, POPT_ARG_NONE, &daemon_opt, 0, 0, 0 }, +--- pipe.c 15 May 2004 19:31:10 -0000 1.7 ++++ pipe.c 29 May 2004 21:31:48 -0000 +@@ -23,7 +23,6 @@ + + extern int am_sender; + extern int am_server; +-extern int blocking_io; + extern int orig_umask; + extern int read_batch; + extern int filesfrom_fd; +@@ -40,8 +39,10 @@ extern int filesfrom_fd; + * If blocking_io is set then use blocking io on both fds. That can be + * used to cope with badly broken rsh implementations like the one on + * Solaris. ++ * ++ * If register_child is nonzero then the child is registered for autocleanup. + **/ +-pid_t piped_child(char **command, int *f_in, int *f_out) ++pid_t piped_child(char **command, int *f_in, int *f_out, int blocking_io, int register_child) + { + pid_t pid; + int to_child_pipe[2]; +@@ -57,7 +58,7 @@ pid_t piped_child(char **command, int *f + } + + +- pid = do_fork(); ++ pid = register_child ? do_fork() : fork(); + if (pid == -1) { + rsyserr(FERROR, errno, "fork"); + exit_cleanup(RERR_IPC); +--- syscall.c 18 Feb 2004 22:33:21 -0000 1.30 ++++ syscall.c 29 May 2004 21:31:48 -0000 +@@ -231,3 +231,34 @@ char *d_name(struct dirent *di) + return di->d_name; + #endif + } ++ ++/** ++ * A wrapper around select(2) that guarantees Linux-like updating of ++ * the timeout argument to contain the time left, so we can simply ++ * re-invoke in case of EINTR or EAGAIN. On BSD, select(2) doesn't ++ * change the timeout argument by itself. ++ **/ ++int do_select(int n, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout) ++{ ++ struct timeval intended, before, after; ++ int result; ++ ++ if (timeout == NULL) { ++ return select(n, readfds, writefds, exceptfds, timeout); ++ ++ intended = *timeout; ++ gettimeofday(&before, NULL); ++ result = select(n, readfds, writefds, exceptfds, timeout); ++ gettimeofday(&after, NULL); ++ timeout->tv_sec = intended.tv_sec - (after.tv_sec - before.tv_sec); ++ timeout->tv_usec = intended.tv_usec - (after.tv_usec - before.tv_usec); ++ if (timeout->tv_usec >= 1000000) { ++ ++timeout->tv_sec; ++ timeout->tv_usec -= 1000000; ++ } else if (timeout->tv_usec < 0) { ++ --(timeout)->tv_sec; ++ timeout->tv_usec += 1000000; ++ } ++ ++ return result; ++} +--- util.c 24 May 2004 22:59:16 -0000 1.147 ++++ util.c 29 May 2004 21:31:48 -0000 +@@ -1135,3 +1135,52 @@ void *_realloc_array(void *ptr, unsigned + return malloc(size * num); + return realloc(ptr, size * num); + } ++ ++/** ++ * Blocks until one of the following happens: ++ * - read_fd is nonnegative and has data to read ++ * - write_fd is nonnegative and can be written to ++ * - something terrible happened to either ++ * - the timeout (in milliseconds) has elapsed ++ * Return value is zero iff the timeout occured. ++ */ ++char await_fds(int read_fd, int write_fd, int timeout_ms) ++{ ++ fd_set read_fds, write_fds, except_fds; ++ struct timeval tv; ++ int res; ++ ++ tv.tv_sec = timeout_ms/1000; ++ tv.tv_usec = (timeout_ms%1000)*1000; ++ ++ while (1) { ++ FD_ZERO(&read_fds); ++ FD_ZERO(&write_fds); ++ FD_ZERO(&except_fds); ++ if (write_fd >= 0) { ++ FD_SET(write_fd, &write_fds); ++ FD_SET(write_fd, &except_fds); ++ } ++ if (read_fd >= 0) { ++ FD_SET(read_fd, &read_fds); ++ FD_SET(read_fd, &except_fds); ++ } ++ ++ res = do_select(MAX(0,MAX(read_fd, write_fd)+1), &read_fds, &write_fds, &except_fds, &tv); ++ if (res > 0) ++ return 1; ++ if (read_fd >= 0 && (FD_ISSET(read_fd, &read_fds) || FD_ISSET(read_fd, &except_fds))) ++ return 1; ++ if (write_fd >= 0 && (FD_ISSET(write_fd, &write_fds) || FD_ISSET(write_fd, &except_fds))) ++ return 1; ++ if (res == EINTR || res == EAGAIN) { ++ continue; /* Retry */ ++ } ++ if (res < 0) { ++ rprintf(FERROR, "Error awaiting fname converter: %s\n", strerror(errno)); ++ exit_cleanup(RERR_FNAMECONV); ++ } ++ return 0; /* res == 0 and no FDs set, hence a timeout. */ ++ } ++} ++ -- 2.34.1