+++ /dev/null
-Eran Tromer writes:
-
-One feature missing from rsync, and requested on this list before, is
-on-the-fly conversion of filename character encoding. For example, I
-often need to sync files having Hebrew filenames from a UTF-8 system
-(Linux) to an ISO8859-8 system (Cygwin on Windows 2000 using the
-non-Unicode Win32 interface). Other circumstances surely abound.
-
-Attached is a patch against rsync 2.6.2 that adds an "--fname-convert"
-option. When the argument "--fname-convert CONV" is given, rsync pipes
-every filename through the program CONV, and filename presented to the
-server will be CONV's output instead of the raw filename.
-
-Artificial example:
-$ touch /tmp/xyz
-$ rsync -fname-convert 'tr y Y' /tmp/xyz /tmp/
-$ ls /tmp/x?z
-/tmp/xyz /tmp/xYz
-
-Perhaps the most useful case is using iconv:
-$ rsync --fname-convert 'iconv -f utf8 -t iso8859-8' ...
-
-I chose to allow invocation of arbitrary programs instead of using
-libiconv (or equivalent) in order to avoid external dependencies, and to
-offer more flexibility. The price is that some heuristics were needed to
-avoid the deadlock problems that tend to occur when filtering data
-through a program that uses buffered I/O -- see the comments at the top
-of the new file fnameconv.c. The delay you may have noticed in the above
-artificial example using "tr" is due to these heuristics; it occurs just
-once per rsync invocation, not for every file.
-
-I believe there are no server-side security implications, since all
-conversion is done at the client and the server is oblivious to it. On
-the client, conversion is done before sanitize_path() and besides,
-providing a sane converter program is the client's responsibility anyway.
-
-In verbose mode the updating of non-regular files is reported via
-rprintf() by the server, so the client will see the converted filename
-instead the raw filename -- see my comment in recv_generator(). Fixing
-this requires some delicate changes so I left it as is, but it seems
-like a minor concern.
-
-Most of the new code is in the new file fnameconv.c. The patch lightly
-touches some other files, mostly flist.c and the addition/extension of
-some utility functions.
-
-Note that you'll need to run 'make proto' after applying this patch.
-
-
---- orig/Makefile.in 2006-01-14 08:14:29
-+++ Makefile.in 2005-11-07 04:36:50
-@@ -34,7 +34,7 @@ OBJS1=rsync.o generator.o receiver.o cle
- main.o checksum.o match.o syscall.o log.o backup.o
- OBJS2=options.o flist.o io.o compat.o hlink.o token.o uidlist.o socket.o \
- fileio.o batch.o clientname.o chmod.o
--OBJS3=progress.o pipe.o
-+OBJS3=progress.o pipe.o fnameconv.o
- DAEMON_OBJ = params.o loadparm.o clientserver.o access.o connection.o authenticate.o
- popt_OBJS=popt/findme.o popt/popt.o popt/poptconfig.o \
- popt/popthelp.o popt/poptparse.o
---- orig/cleanup.c 2006-01-14 08:14:29
-+++ cleanup.c 2005-01-10 10:40:51
-@@ -25,6 +25,7 @@ extern int io_error;
- extern int keep_partial;
- extern int log_got_error;
- extern char *partial_dir;
-+extern char *fname_convert_cmd;
-
- /**
- * Close all open sockets and files, allowing a (somewhat) graceful
-@@ -125,6 +126,8 @@ void _exit_cleanup(int code, const char
- !partial_dir);
- }
- io_flush(FULL_FLUSH);
-+ if (fname_convert_cmd)
-+ cleanup_fname_convert();
- if (cleanup_fname)
- do_unlink(cleanup_fname);
- if (code)
---- orig/errcode.h 2005-12-16 23:48:43
-+++ errcode.h 2005-09-29 17:23:35
-@@ -36,6 +36,7 @@
- #define RERR_IPC 14 /* error in IPC code */
- #define RERR_CRASHED 15 /* sibling crashed */
- #define RERR_TERMINATED 16 /* sibling terminated abnormally */
-+#define RERR_FNAMECONV 17 /* error in filename conversion */
-
- #define RERR_SIGNAL1 19 /* status returned when sent SIGUSR1 */
- #define RERR_SIGNAL 20 /* status returned when sent SIGINT, SIGTERM, SIGHUP */
---- orig/flist.c 2006-01-17 02:15:59
-+++ flist.c 2005-03-05 00:29:08
-@@ -57,6 +57,7 @@ extern int copy_unsafe_links;
- extern int protocol_version;
- extern int sanitize_paths;
- extern int orig_umask;
-+extern char *fname_convert_cmd;
- extern struct stats stats;
- extern struct file_list *the_file_list;
-
-@@ -335,7 +336,10 @@ void send_file_entry(struct file_struct
-
- io_write_phase = "send_file_entry";
-
-- f_name(file, fname);
-+ if (fname_convert_cmd && !am_server) /* fname conversion always done on client */
-+ convert_fname(fname, f_name(file, NULL), MAXPATHLEN);
-+ else
-+ f_name(file, fname);
-
- flags = base_flags;
-
-@@ -543,6 +547,9 @@ static struct file_struct *receive_file_
-
- strlcpy(lastname, thisname, MAXPATHLEN);
-
-+ if (fname_convert_cmd && !am_server) /* fname conversion always done on client */
-+ convert_fname(thisname, lastname, MAXPATHLEN);
-+
- clean_fname(thisname, 0);
-
- if (sanitize_paths)
-@@ -1072,6 +1079,9 @@ struct file_list *send_file_list(int f,
- start_write = stats.total_written;
- gettimeofday(&start_tv, NULL);
-
-+ if (!am_server)
-+ init_fname_convert();
-+
- flist = flist_new(WITH_HLINK, "send_file_list");
-
- io_start_buffering_out();
-@@ -1283,6 +1293,9 @@ struct file_list *send_file_list(int f,
- stats.flist_size = stats.total_written - start_write;
- stats.num_files = flist->count;
-
-+ if (fname_convert_cmd && !am_server)
-+ cleanup_fname_convert();
-+
- if (verbose > 3)
- output_flist(flist);
-
-@@ -1303,6 +1316,9 @@ struct file_list *recv_file_list(int f)
-
- start_read = stats.total_read;
-
-+ if (fname_convert_cmd && !am_server)
-+ init_fname_convert();
-+
- flist = flist_new(WITH_HLINK, "recv_file_list");
-
- flist->count = 0;
-@@ -1354,6 +1370,9 @@ struct file_list *recv_file_list(int f)
- io_error |= read_int(f);
- }
-
-+ if (fname_convert_cmd && !am_server)
-+ cleanup_fname_convert();
-+
- if (verbose > 3)
- output_flist(flist);
-
---- orig/fnameconv.c 2004-07-02 21:38:59
-+++ fnameconv.c 2004-07-02 21:38:59
-@@ -0,0 +1,220 @@
-+/* -*- c-file-style: "linux" -*-
-+ *
-+ * Copyright (C) 2004 by Eran Tromer
-+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or
-+ * (at your option) any later version.
-+ *
-+ * This program is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ * GNU General Public License for more details.
-+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with this program; if not, write to the Free Software
-+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-+ */
-+
-+/* Handles filename conversion through an external process. Implements
-+ * two modes of operation:
-+ * In persistent mode, a single filename converter is kept running;
-+ * for each query we feed it a single line and read back a single
-+ * line. This will fail for programs that used buffered I/O, and will
-+ * get into a deadlock.
-+ * In non-persistent mode, a converter is invoked and killed for each
-+ * query. This has a very high overhead, but will work for any
-+ * program.
-+ * We start in persistence mode, and if we suspect a deadlock (i.e.,
-+ * nothing happens for FNAME_CONV_PERSISTENCE_TIMEOUT milliseconds)
-+ * then we smoothly fall back to non-persistent mode.
-+ *
-+ * Filename conversion errors are always considered fatal, since an
-+ * incorrectly named file could cause unpredictable damage.
-+ */
-+
-+#include <rsync.h>
-+
-+#define FNAME_CONV_PERSISTENCE_TIMEOUT 3000 /* milliseconds */
-+
-+static int conv_persistent = 1;
-+static pid_t conv_pid = -1;
-+static int conv_write_fd = -1, conv_read_fd;
-+extern char *fname_convert_cmd;
-+extern int blocking_io;
-+
-+/**
-+ * Splits cmd on spaces.
-+ */
-+static void split_on_spaces(char *cmd, char **parts) {
-+ int nparts = 0;
-+ char *tok;
-+ char *cmd2 = strdup(cmd);
-+ if (!cmd2) {
-+ rprintf(FERROR, "Out of memory while parsing filename filter %s\n", cmd);
-+ exit_cleanup(RERR_MALLOC);
-+ }
-+
-+ for (tok = strtok(cmd2, " "); tok; tok = strtok(NULL, " ")) {
-+ if (nparts >= MAX_ARGS) {
-+ rprintf(FERROR, "Filename conversion command is too long: %s\n", cmd);
-+ exit_cleanup(RERR_SYNTAX);
-+ }
-+ parts[nparts++] = tok;
-+ }
-+ parts[nparts] = NULL;
-+}
-+
-+
-+/**
-+ * Runs the filename converter process. Should be called before filename
-+ * conversion begins (actually it's not necessarh, but it keeps the progress report
-+ * nice and clean.
-+ **/
-+void init_fname_convert(void)
-+{
-+ if (fname_convert_cmd && conv_pid < 0) {
-+ char *args[MAX_ARGS];
-+
-+ if (verbose > 2)
-+ rprintf(FINFO, "Running filename converter: %s\n", fname_convert_cmd);
-+ split_on_spaces(fname_convert_cmd, args);
-+ /* Invoke child pipe with non-blocking IO and without registering it for
-+ * autocleanup (the latter may blow up the all_pids table, and is not needed
-+ * since we have our own cleanup handler. */
-+ conv_pid = piped_child(args, &conv_read_fd, &conv_write_fd, 0, 0);
-+ set_nonblocking(conv_write_fd);
-+ set_nonblocking(conv_read_fd);
-+ }
-+}
-+
-+/**
-+ * Kills the filename converter process. Should be called when the file
-+ * list creation is done. We assume that the converter will terminate
-+ * soon after its standard input is closed.
-+ **/
-+void cleanup_fname_convert()
-+{
-+ if (conv_pid >= 0) {
-+ int status;
-+ if (conv_write_fd >= 0) {
-+ close(conv_write_fd);
-+ conv_write_fd = -1;
-+ }
-+ close(conv_read_fd);
-+ waitpid(conv_pid, &status, 0);
-+ conv_pid = -1;
-+ }
-+}
-+
-+/**
-+ * Converts the filename from src into dest, using at most maxlen
-+ * characters of dest.
-+ **/
-+void convert_fname(char *dest, const char *src, unsigned int maxlen)
-+{
-+ int res;
-+ const char *srcp;
-+ char *destp;
-+ unsigned int srcrem, dstrem;
-+
-+ init_fname_convert();
-+
-+ /* Send and receive strings simultaneously to avoid deadlock: */
-+ srcrem = strlen(src)+1; /* chars left to send (incl. terminating LF) */
-+ dstrem = maxlen-1; /* free chars left in dest */
-+ srcp = src;
-+ destp = dest;
-+ while(1) {
-+ /* Write as much as possible: */
-+ if (srcrem > 1) {
-+ res = write(conv_write_fd, srcp, srcrem-1);
-+ if (res < 0 && errno != EAGAIN) {
-+ rprintf(FERROR, "Error writing to fname converter (filename: %s): %s\n", strerror(errno), src);
-+ exit_cleanup(RERR_FNAMECONV);
-+ }
-+ if (res > 0) { /* wrote something */
-+ srcp += res;
-+ srcrem -= res;
-+ }
-+ }
-+ if (srcrem == 1) { /* final LF */
-+ res = write(conv_write_fd, "\n", 1);
-+ if (res < 0 && errno != EAGAIN) {
-+ rprintf(FERROR, "Error writing to fname converter (filename: %s): %s\n", strerror(errno), src);
-+ exit_cleanup(RERR_FNAMECONV);
-+ }
-+ if (res > 0) { /* wrote final LF */
-+ srcrem = 0;
-+ if (!conv_persistent) {
-+ close(conv_write_fd);
-+ conv_write_fd = -1;
-+ }
-+ }
-+ }
-+
-+ /* Read as much as possible: */
-+ res = read(conv_read_fd, destp, dstrem);
-+ if (res < 0 && errno != EAGAIN) {
-+ rprintf(FERROR, "Error reading from filename converter (filename: %s):%s \n", strerror(errno), src);
-+ exit_cleanup(RERR_FNAMECONV);
-+ }
-+ if (res == 0) { /* EOF */
-+ rprintf(FERROR, "EOF from filename converter (filename: %s)\n", src);
-+ exit_cleanup(RERR_FNAMECONV);
-+ }
-+ if (res > 0) {
-+ destp += res;
-+ dstrem -= res;
-+ if (destp[-1] == '\n' || destp[-1] == '\r')
-+ break; /* End of line. Yippy! */
-+ if (dstrem == 0) {
-+ rprintf(FINFO, "Name converter output too long (filename: %s)\n", src);
-+ exit_cleanup(RERR_FNAMECONV);
-+ }
-+ }
-+
-+ /* Await activity */
-+ if (!await_fds(conv_read_fd, !srcrem ? -1 : conv_write_fd, FNAME_CONV_PERSISTENCE_TIMEOUT)) {
-+ if (srcrem == 0 && conv_persistent) {
-+ /* We finished writing but nothing happens. It looks like the converter program
-+ * is using buffered I/O and thus wait to read more input, but we can't give it
-+ * the next filename yet. Fall back to non-persistent mode. */
-+ if (verbose > 0)
-+ rprintf(FINFO, "Filename converter blocked, disabling persistence to recover.\n");
-+
-+ conv_persistent = 0;
-+ close(conv_write_fd);
-+ conv_write_fd = -1;
-+ }
-+ }
-+ }
-+
-+ /* Cleanup and sanity check */
-+ if (!conv_persistent)
-+ cleanup_fname_convert();
-+ if (srcrem > 0) {
-+ close(conv_write_fd);
-+ rprintf(FERROR, "Name converter produced output before reading all its input for file: %s\n", src);
-+ exit_cleanup(RERR_FNAMECONV);
-+ }
-+
-+ /* Chop newline chars */
-+ destp--;
-+ if (destp > dest && *destp == '\n')
-+ --destp;
-+ if (destp > dest && *destp == '\r')
-+ --destp;
-+ if (++destp == dest) {
-+ rprintf(FERROR, "Name converter output is empty (filename: %s)\n", src);
-+ exit_cleanup(RERR_FNAMECONV);
-+ }
-+ *destp = 0;
-+ /* Also, we may have a leading CR left over from a CRLF of the previous line */
-+ if (*dest == '\n')
-+ memmove(dest, dest+1, destp-dest-1);
-+
-+ if (verbose > 2)
-+ rprintf(FINFO, "Converted filename: %s -> %s\n", src, dest);
-+}
---- orig/generator.c 2006-01-14 20:27:09
-+++ generator.c 2005-03-05 00:29:37
-@@ -761,7 +761,13 @@ static int phase = 0;
- * When fname is non-null, it must point to a MAXPATHLEN buffer!
- *
- * Note that f_out is set to -1 when doing final directory-permission and
-- * modification-time repair. */
-+ * modification-time repair.
-+ *
-+ * TODO: The filename seen in recv_generator is after filename
-+ * conversion. In verbose mode, directories, symlinks and device
-+ * files are printf()ed here but regular files are rprintf()ed on the
-+ * sender (unconverted). To solve the above, move all progress
-+ * reporting to the sender. */
- static void recv_generator(char *fname, struct file_struct *file, int ndx,
- int itemizing, int maybe_PERMS_REPORT,
- enum logcode code, int f_out)
---- orig/log.c 2006-01-17 02:16:40
-+++ log.c 2005-12-16 23:49:07
-@@ -68,6 +68,7 @@ struct {
- { RERR_IPC , "error in IPC code" },
- { RERR_CRASHED , "sibling process crashed" },
- { RERR_TERMINATED , "sibling process terminated abnormally" },
-+ { RERR_FNAMECONV , "error in filename conversion" },
- { RERR_SIGNAL1 , "received SIGUSR1" },
- { RERR_SIGNAL , "received SIGINT, SIGTERM, or SIGHUP" },
- { RERR_WAITCHILD , "waitpid() failed" },
---- orig/main.c 2006-01-15 14:46:15
-+++ main.c 2004-07-22 00:31:47
-@@ -414,7 +414,7 @@ static pid_t do_cmd(char *cmd, char *mac
- whole_file = 1;
- ret = local_child(argc, args, f_in, f_out, child_main);
- } else
-- ret = piped_child(args,f_in,f_out);
-+ ret = piped_child(args, f_in, f_out, blocking_io, 1);
-
- if (dir)
- free(dir);
---- orig/options.c 2006-01-14 08:14:30
-+++ options.c 2005-10-26 16:49:33
-@@ -142,6 +142,7 @@ char *basis_dir[MAX_BASIS_DIRS+1];
- char *config_file = NULL;
- char *shell_cmd = NULL;
- char *log_format = NULL;
-+char *fname_convert_cmd = NULL;
- char *password_file = NULL;
- char *rsync_path = RSYNC_PATH;
- char *backup_dir = NULL;
-@@ -330,6 +331,7 @@ void usage(enum logcode F)
- rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n");
- rprintf(F," --copy-dest=DIR ... and include copies of unchanged files\n");
- rprintf(F," --link-dest=DIR hardlink to files in DIR when unchanged\n");
-+ rprintf(F," --fname-convert=CMD invoke CMD for filename conversion\n");
- rprintf(F," -z, --compress compress file data during the transfer\n");
- rprintf(F," --compress-level=NUM explicitly set compression level\n");
- rprintf(F," -C, --cvs-exclude auto-ignore files the same way CVS does\n");
-@@ -464,6 +466,7 @@ static struct poptOption long_options[]
- {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 },
- {"link-dest", 0, POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 },
- {"fuzzy", 'y', POPT_ARG_NONE, &fuzzy_basis, 0, 0, 0 },
-+ {"fname-convert", 0, POPT_ARG_STRING, &fname_convert_cmd, 0, 0, 0 },
- {"compress", 'z', POPT_ARG_NONE, 0, 'z', 0, 0 },
- {"compress-level", 0, POPT_ARG_INT, &def_compress_level, 'z', 0, 0 },
- {0, 'P', POPT_ARG_NONE, 0, 'P', 0, 0 },
---- orig/pipe.c 2006-01-14 08:14:31
-+++ pipe.c 2004-07-03 20:18:02
-@@ -23,7 +23,6 @@
-
- extern int am_sender;
- extern int am_server;
--extern int blocking_io;
- extern int orig_umask;
- extern int filesfrom_fd;
-
-@@ -39,8 +38,10 @@ extern int filesfrom_fd;
- * If blocking_io is set then use blocking io on both fds. That can be
- * used to cope with badly broken rsh implementations like the one on
- * Solaris.
-+ *
-+ * If register_child is nonzero then the child is registered for autocleanup.
- **/
--pid_t piped_child(char **command, int *f_in, int *f_out)
-+pid_t piped_child(char **command, int *f_in, int *f_out, int blocking_io, int register_child)
- {
- pid_t pid;
- int to_child_pipe[2];
-@@ -55,7 +56,7 @@ pid_t piped_child(char **command, int *f
- exit_cleanup(RERR_IPC);
- }
-
-- pid = do_fork();
-+ pid = register_child ? do_fork() : fork();
- if (pid == -1) {
- rsyserr(FERROR, errno, "fork");
- exit_cleanup(RERR_IPC);
---- orig/syscall.c 2005-09-15 18:09:15
-+++ syscall.c 2004-07-02 21:39:00
-@@ -274,3 +274,34 @@ char *d_name(struct dirent *di)
- return di->d_name;
- #endif
- }
-+
-+/**
-+ * A wrapper around select(2) that guarantees Linux-like updating of
-+ * the timeout argument to contain the time left, so we can simply
-+ * re-invoke in case of EINTR or EAGAIN. On BSD, select(2) doesn't
-+ * change the timeout argument by itself.
-+ **/
-+int do_select(int n, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout)
-+{
-+ struct timeval intended, before, after;
-+ int result;
-+
-+ if (timeout == NULL)
-+ return select(n, readfds, writefds, exceptfds, timeout);
-+
-+ intended = *timeout;
-+ gettimeofday(&before, NULL);
-+ result = select(n, readfds, writefds, exceptfds, timeout);
-+ gettimeofday(&after, NULL);
-+ timeout->tv_sec = intended.tv_sec - (after.tv_sec - before.tv_sec);
-+ timeout->tv_usec = intended.tv_usec - (after.tv_usec - before.tv_usec);
-+ if (timeout->tv_usec >= 1000000) {
-+ ++timeout->tv_sec;
-+ timeout->tv_usec -= 1000000;
-+ } else if (timeout->tv_usec < 0) {
-+ --(timeout)->tv_sec;
-+ timeout->tv_usec += 1000000;
-+ }
-+
-+ return result;
-+}
---- orig/util.c 2006-01-14 08:14:31
-+++ util.c 2004-07-03 20:18:02
-@@ -1373,3 +1373,55 @@ uint32 fuzzy_distance(const char *s1, in
-
- return a[len2-1];
- }
-+
-+/**
-+ * Blocks until one of the following happens:
-+ * - read_fd is nonnegative and has data to read
-+ * - write_fd is nonnegative and can be written to
-+ * - something terrible happened to either
-+ * - the timeout (in milliseconds) has elapsed
-+ * Return value is zero iff the timeout occured.
-+ */
-+char await_fds(int read_fd, int write_fd, int timeout_ms)
-+{
-+ fd_set read_fds, write_fds, except_fds;
-+ struct timeval tv;
-+ int res;
-+
-+ tv.tv_sec = timeout_ms / 1000;
-+ tv.tv_usec = (timeout_ms % 1000) * 1000;
-+
-+ while (1) {
-+ int maxfd = 0;
-+ FD_ZERO(&read_fds);
-+ FD_ZERO(&write_fds);
-+ FD_ZERO(&except_fds);
-+ if (write_fd >= 0) {
-+ FD_SET(write_fd, &write_fds);
-+ FD_SET(write_fd, &except_fds);
-+ if (write_fd > maxfd)
-+ maxfd = write_fd;
-+ }
-+ if (read_fd >= 0) {
-+ FD_SET(read_fd, &read_fds);
-+ FD_SET(read_fd, &except_fds);
-+ if (read_fd > maxfd)
-+ maxfd = read_fd;
-+ }
-+
-+ res = do_select(maxfd+1, &read_fds, &write_fds, &except_fds, &tv);
-+ if (res > 0)
-+ return 1;
-+ if (res < 0) {
-+ if (errno == EINTR || errno == EAGAIN)
-+ continue; /* Retry */
-+ rprintf(FERROR, "Error awaiting fname converter: %s\n", strerror(errno));
-+ exit_cleanup(RERR_FNAMECONV);
-+ }
-+ if (read_fd >= 0 && (FD_ISSET(read_fd, &read_fds) || FD_ISSET(read_fd, &except_fds)))
-+ return 1;
-+ if (write_fd >= 0 && (FD_ISSET(write_fd, &write_fds) || FD_ISSET(write_fd, &except_fds)))
-+ return 1;
-+ return 0; /* res == 0 and no FDs set, hence a timeout. */
-+ }
-+}