From b8fdd4ad3ed81891535ec211ee2505513e9e39da Mon Sep 17 00:00:00 2001 From: Wayne Davison Date: Fri, 27 Jan 2006 00:50:01 +0000 Subject: [PATCH] Getting rid of this patch since the patch that uses iconv is a better design than this. --- fname-convert.diff | 574 --------------------------------------------- 1 file changed, 574 deletions(-) delete mode 100644 fname-convert.diff diff --git a/fname-convert.diff b/fname-convert.diff deleted file mode 100644 index 116c807..0000000 --- a/fname-convert.diff +++ /dev/null @@ -1,574 +0,0 @@ -Eran Tromer writes: - -One feature missing from rsync, and requested on this list before, is -on-the-fly conversion of filename character encoding. For example, I -often need to sync files having Hebrew filenames from a UTF-8 system -(Linux) to an ISO8859-8 system (Cygwin on Windows 2000 using the -non-Unicode Win32 interface). Other circumstances surely abound. - -Attached is a patch against rsync 2.6.2 that adds an "--fname-convert" -option. When the argument "--fname-convert CONV" is given, rsync pipes -every filename through the program CONV, and filename presented to the -server will be CONV's output instead of the raw filename. - -Artificial example: -$ touch /tmp/xyz -$ rsync -fname-convert 'tr y Y' /tmp/xyz /tmp/ -$ ls /tmp/x?z -/tmp/xyz /tmp/xYz - -Perhaps the most useful case is using iconv: -$ rsync --fname-convert 'iconv -f utf8 -t iso8859-8' ... - -I chose to allow invocation of arbitrary programs instead of using -libiconv (or equivalent) in order to avoid external dependencies, and to -offer more flexibility. The price is that some heuristics were needed to -avoid the deadlock problems that tend to occur when filtering data -through a program that uses buffered I/O -- see the comments at the top -of the new file fnameconv.c. The delay you may have noticed in the above -artificial example using "tr" is due to these heuristics; it occurs just -once per rsync invocation, not for every file. - -I believe there are no server-side security implications, since all -conversion is done at the client and the server is oblivious to it. On -the client, conversion is done before sanitize_path() and besides, -providing a sane converter program is the client's responsibility anyway. - -In verbose mode the updating of non-regular files is reported via -rprintf() by the server, so the client will see the converted filename -instead the raw filename -- see my comment in recv_generator(). Fixing -this requires some delicate changes so I left it as is, but it seems -like a minor concern. - -Most of the new code is in the new file fnameconv.c. The patch lightly -touches some other files, mostly flist.c and the addition/extension of -some utility functions. - -Note that you'll need to run 'make proto' after applying this patch. - - ---- orig/Makefile.in 2006-01-14 08:14:29 -+++ Makefile.in 2005-11-07 04:36:50 -@@ -34,7 +34,7 @@ OBJS1=rsync.o generator.o receiver.o cle - main.o checksum.o match.o syscall.o log.o backup.o - OBJS2=options.o flist.o io.o compat.o hlink.o token.o uidlist.o socket.o \ - fileio.o batch.o clientname.o chmod.o --OBJS3=progress.o pipe.o -+OBJS3=progress.o pipe.o fnameconv.o - DAEMON_OBJ = params.o loadparm.o clientserver.o access.o connection.o authenticate.o - popt_OBJS=popt/findme.o popt/popt.o popt/poptconfig.o \ - popt/popthelp.o popt/poptparse.o ---- orig/cleanup.c 2006-01-14 08:14:29 -+++ cleanup.c 2005-01-10 10:40:51 -@@ -25,6 +25,7 @@ extern int io_error; - extern int keep_partial; - extern int log_got_error; - extern char *partial_dir; -+extern char *fname_convert_cmd; - - /** - * Close all open sockets and files, allowing a (somewhat) graceful -@@ -125,6 +126,8 @@ void _exit_cleanup(int code, const char - !partial_dir); - } - io_flush(FULL_FLUSH); -+ if (fname_convert_cmd) -+ cleanup_fname_convert(); - if (cleanup_fname) - do_unlink(cleanup_fname); - if (code) ---- orig/errcode.h 2005-12-16 23:48:43 -+++ errcode.h 2005-09-29 17:23:35 -@@ -36,6 +36,7 @@ - #define RERR_IPC 14 /* error in IPC code */ - #define RERR_CRASHED 15 /* sibling crashed */ - #define RERR_TERMINATED 16 /* sibling terminated abnormally */ -+#define RERR_FNAMECONV 17 /* error in filename conversion */ - - #define RERR_SIGNAL1 19 /* status returned when sent SIGUSR1 */ - #define RERR_SIGNAL 20 /* status returned when sent SIGINT, SIGTERM, SIGHUP */ ---- orig/flist.c 2006-01-17 02:15:59 -+++ flist.c 2005-03-05 00:29:08 -@@ -57,6 +57,7 @@ extern int copy_unsafe_links; - extern int protocol_version; - extern int sanitize_paths; - extern int orig_umask; -+extern char *fname_convert_cmd; - extern struct stats stats; - extern struct file_list *the_file_list; - -@@ -335,7 +336,10 @@ void send_file_entry(struct file_struct - - io_write_phase = "send_file_entry"; - -- f_name(file, fname); -+ if (fname_convert_cmd && !am_server) /* fname conversion always done on client */ -+ convert_fname(fname, f_name(file, NULL), MAXPATHLEN); -+ else -+ f_name(file, fname); - - flags = base_flags; - -@@ -543,6 +547,9 @@ static struct file_struct *receive_file_ - - strlcpy(lastname, thisname, MAXPATHLEN); - -+ if (fname_convert_cmd && !am_server) /* fname conversion always done on client */ -+ convert_fname(thisname, lastname, MAXPATHLEN); -+ - clean_fname(thisname, 0); - - if (sanitize_paths) -@@ -1072,6 +1079,9 @@ struct file_list *send_file_list(int f, - start_write = stats.total_written; - gettimeofday(&start_tv, NULL); - -+ if (!am_server) -+ init_fname_convert(); -+ - flist = flist_new(WITH_HLINK, "send_file_list"); - - io_start_buffering_out(); -@@ -1283,6 +1293,9 @@ struct file_list *send_file_list(int f, - stats.flist_size = stats.total_written - start_write; - stats.num_files = flist->count; - -+ if (fname_convert_cmd && !am_server) -+ cleanup_fname_convert(); -+ - if (verbose > 3) - output_flist(flist); - -@@ -1303,6 +1316,9 @@ struct file_list *recv_file_list(int f) - - start_read = stats.total_read; - -+ if (fname_convert_cmd && !am_server) -+ init_fname_convert(); -+ - flist = flist_new(WITH_HLINK, "recv_file_list"); - - flist->count = 0; -@@ -1354,6 +1370,9 @@ struct file_list *recv_file_list(int f) - io_error |= read_int(f); - } - -+ if (fname_convert_cmd && !am_server) -+ cleanup_fname_convert(); -+ - if (verbose > 3) - output_flist(flist); - ---- orig/fnameconv.c 2004-07-02 21:38:59 -+++ fnameconv.c 2004-07-02 21:38:59 -@@ -0,0 +1,220 @@ -+/* -*- c-file-style: "linux" -*- -+ * -+ * Copyright (C) 2004 by Eran Tromer -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License as published by -+ * the Free Software Foundation; either version 2 of the License, or -+ * (at your option) any later version. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public License -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -+ */ -+ -+/* Handles filename conversion through an external process. Implements -+ * two modes of operation: -+ * In persistent mode, a single filename converter is kept running; -+ * for each query we feed it a single line and read back a single -+ * line. This will fail for programs that used buffered I/O, and will -+ * get into a deadlock. -+ * In non-persistent mode, a converter is invoked and killed for each -+ * query. This has a very high overhead, but will work for any -+ * program. -+ * We start in persistence mode, and if we suspect a deadlock (i.e., -+ * nothing happens for FNAME_CONV_PERSISTENCE_TIMEOUT milliseconds) -+ * then we smoothly fall back to non-persistent mode. -+ * -+ * Filename conversion errors are always considered fatal, since an -+ * incorrectly named file could cause unpredictable damage. -+ */ -+ -+#include -+ -+#define FNAME_CONV_PERSISTENCE_TIMEOUT 3000 /* milliseconds */ -+ -+static int conv_persistent = 1; -+static pid_t conv_pid = -1; -+static int conv_write_fd = -1, conv_read_fd; -+extern char *fname_convert_cmd; -+extern int blocking_io; -+ -+/** -+ * Splits cmd on spaces. -+ */ -+static void split_on_spaces(char *cmd, char **parts) { -+ int nparts = 0; -+ char *tok; -+ char *cmd2 = strdup(cmd); -+ if (!cmd2) { -+ rprintf(FERROR, "Out of memory while parsing filename filter %s\n", cmd); -+ exit_cleanup(RERR_MALLOC); -+ } -+ -+ for (tok = strtok(cmd2, " "); tok; tok = strtok(NULL, " ")) { -+ if (nparts >= MAX_ARGS) { -+ rprintf(FERROR, "Filename conversion command is too long: %s\n", cmd); -+ exit_cleanup(RERR_SYNTAX); -+ } -+ parts[nparts++] = tok; -+ } -+ parts[nparts] = NULL; -+} -+ -+ -+/** -+ * Runs the filename converter process. Should be called before filename -+ * conversion begins (actually it's not necessarh, but it keeps the progress report -+ * nice and clean. -+ **/ -+void init_fname_convert(void) -+{ -+ if (fname_convert_cmd && conv_pid < 0) { -+ char *args[MAX_ARGS]; -+ -+ if (verbose > 2) -+ rprintf(FINFO, "Running filename converter: %s\n", fname_convert_cmd); -+ split_on_spaces(fname_convert_cmd, args); -+ /* Invoke child pipe with non-blocking IO and without registering it for -+ * autocleanup (the latter may blow up the all_pids table, and is not needed -+ * since we have our own cleanup handler. */ -+ conv_pid = piped_child(args, &conv_read_fd, &conv_write_fd, 0, 0); -+ set_nonblocking(conv_write_fd); -+ set_nonblocking(conv_read_fd); -+ } -+} -+ -+/** -+ * Kills the filename converter process. Should be called when the file -+ * list creation is done. We assume that the converter will terminate -+ * soon after its standard input is closed. -+ **/ -+void cleanup_fname_convert() -+{ -+ if (conv_pid >= 0) { -+ int status; -+ if (conv_write_fd >= 0) { -+ close(conv_write_fd); -+ conv_write_fd = -1; -+ } -+ close(conv_read_fd); -+ waitpid(conv_pid, &status, 0); -+ conv_pid = -1; -+ } -+} -+ -+/** -+ * Converts the filename from src into dest, using at most maxlen -+ * characters of dest. -+ **/ -+void convert_fname(char *dest, const char *src, unsigned int maxlen) -+{ -+ int res; -+ const char *srcp; -+ char *destp; -+ unsigned int srcrem, dstrem; -+ -+ init_fname_convert(); -+ -+ /* Send and receive strings simultaneously to avoid deadlock: */ -+ srcrem = strlen(src)+1; /* chars left to send (incl. terminating LF) */ -+ dstrem = maxlen-1; /* free chars left in dest */ -+ srcp = src; -+ destp = dest; -+ while(1) { -+ /* Write as much as possible: */ -+ if (srcrem > 1) { -+ res = write(conv_write_fd, srcp, srcrem-1); -+ if (res < 0 && errno != EAGAIN) { -+ rprintf(FERROR, "Error writing to fname converter (filename: %s): %s\n", strerror(errno), src); -+ exit_cleanup(RERR_FNAMECONV); -+ } -+ if (res > 0) { /* wrote something */ -+ srcp += res; -+ srcrem -= res; -+ } -+ } -+ if (srcrem == 1) { /* final LF */ -+ res = write(conv_write_fd, "\n", 1); -+ if (res < 0 && errno != EAGAIN) { -+ rprintf(FERROR, "Error writing to fname converter (filename: %s): %s\n", strerror(errno), src); -+ exit_cleanup(RERR_FNAMECONV); -+ } -+ if (res > 0) { /* wrote final LF */ -+ srcrem = 0; -+ if (!conv_persistent) { -+ close(conv_write_fd); -+ conv_write_fd = -1; -+ } -+ } -+ } -+ -+ /* Read as much as possible: */ -+ res = read(conv_read_fd, destp, dstrem); -+ if (res < 0 && errno != EAGAIN) { -+ rprintf(FERROR, "Error reading from filename converter (filename: %s):%s \n", strerror(errno), src); -+ exit_cleanup(RERR_FNAMECONV); -+ } -+ if (res == 0) { /* EOF */ -+ rprintf(FERROR, "EOF from filename converter (filename: %s)\n", src); -+ exit_cleanup(RERR_FNAMECONV); -+ } -+ if (res > 0) { -+ destp += res; -+ dstrem -= res; -+ if (destp[-1] == '\n' || destp[-1] == '\r') -+ break; /* End of line. Yippy! */ -+ if (dstrem == 0) { -+ rprintf(FINFO, "Name converter output too long (filename: %s)\n", src); -+ exit_cleanup(RERR_FNAMECONV); -+ } -+ } -+ -+ /* Await activity */ -+ if (!await_fds(conv_read_fd, !srcrem ? -1 : conv_write_fd, FNAME_CONV_PERSISTENCE_TIMEOUT)) { -+ if (srcrem == 0 && conv_persistent) { -+ /* We finished writing but nothing happens. It looks like the converter program -+ * is using buffered I/O and thus wait to read more input, but we can't give it -+ * the next filename yet. Fall back to non-persistent mode. */ -+ if (verbose > 0) -+ rprintf(FINFO, "Filename converter blocked, disabling persistence to recover.\n"); -+ -+ conv_persistent = 0; -+ close(conv_write_fd); -+ conv_write_fd = -1; -+ } -+ } -+ } -+ -+ /* Cleanup and sanity check */ -+ if (!conv_persistent) -+ cleanup_fname_convert(); -+ if (srcrem > 0) { -+ close(conv_write_fd); -+ rprintf(FERROR, "Name converter produced output before reading all its input for file: %s\n", src); -+ exit_cleanup(RERR_FNAMECONV); -+ } -+ -+ /* Chop newline chars */ -+ destp--; -+ if (destp > dest && *destp == '\n') -+ --destp; -+ if (destp > dest && *destp == '\r') -+ --destp; -+ if (++destp == dest) { -+ rprintf(FERROR, "Name converter output is empty (filename: %s)\n", src); -+ exit_cleanup(RERR_FNAMECONV); -+ } -+ *destp = 0; -+ /* Also, we may have a leading CR left over from a CRLF of the previous line */ -+ if (*dest == '\n') -+ memmove(dest, dest+1, destp-dest-1); -+ -+ if (verbose > 2) -+ rprintf(FINFO, "Converted filename: %s -> %s\n", src, dest); -+} ---- orig/generator.c 2006-01-14 20:27:09 -+++ generator.c 2005-03-05 00:29:37 -@@ -761,7 +761,13 @@ static int phase = 0; - * When fname is non-null, it must point to a MAXPATHLEN buffer! - * - * Note that f_out is set to -1 when doing final directory-permission and -- * modification-time repair. */ -+ * modification-time repair. -+ * -+ * TODO: The filename seen in recv_generator is after filename -+ * conversion. In verbose mode, directories, symlinks and device -+ * files are printf()ed here but regular files are rprintf()ed on the -+ * sender (unconverted). To solve the above, move all progress -+ * reporting to the sender. */ - static void recv_generator(char *fname, struct file_struct *file, int ndx, - int itemizing, int maybe_PERMS_REPORT, - enum logcode code, int f_out) ---- orig/log.c 2006-01-17 02:16:40 -+++ log.c 2005-12-16 23:49:07 -@@ -68,6 +68,7 @@ struct { - { RERR_IPC , "error in IPC code" }, - { RERR_CRASHED , "sibling process crashed" }, - { RERR_TERMINATED , "sibling process terminated abnormally" }, -+ { RERR_FNAMECONV , "error in filename conversion" }, - { RERR_SIGNAL1 , "received SIGUSR1" }, - { RERR_SIGNAL , "received SIGINT, SIGTERM, or SIGHUP" }, - { RERR_WAITCHILD , "waitpid() failed" }, ---- orig/main.c 2006-01-15 14:46:15 -+++ main.c 2004-07-22 00:31:47 -@@ -414,7 +414,7 @@ static pid_t do_cmd(char *cmd, char *mac - whole_file = 1; - ret = local_child(argc, args, f_in, f_out, child_main); - } else -- ret = piped_child(args,f_in,f_out); -+ ret = piped_child(args, f_in, f_out, blocking_io, 1); - - if (dir) - free(dir); ---- orig/options.c 2006-01-14 08:14:30 -+++ options.c 2005-10-26 16:49:33 -@@ -142,6 +142,7 @@ char *basis_dir[MAX_BASIS_DIRS+1]; - char *config_file = NULL; - char *shell_cmd = NULL; - char *log_format = NULL; -+char *fname_convert_cmd = NULL; - char *password_file = NULL; - char *rsync_path = RSYNC_PATH; - char *backup_dir = NULL; -@@ -330,6 +331,7 @@ void usage(enum logcode F) - rprintf(F," --compare-dest=DIR also compare destination files relative to DIR\n"); - rprintf(F," --copy-dest=DIR ... and include copies of unchanged files\n"); - rprintf(F," --link-dest=DIR hardlink to files in DIR when unchanged\n"); -+ rprintf(F," --fname-convert=CMD invoke CMD for filename conversion\n"); - rprintf(F," -z, --compress compress file data during the transfer\n"); - rprintf(F," --compress-level=NUM explicitly set compression level\n"); - rprintf(F," -C, --cvs-exclude auto-ignore files the same way CVS does\n"); -@@ -464,6 +466,7 @@ static struct poptOption long_options[] - {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 }, - {"link-dest", 0, POPT_ARG_STRING, 0, OPT_LINK_DEST, 0, 0 }, - {"fuzzy", 'y', POPT_ARG_NONE, &fuzzy_basis, 0, 0, 0 }, -+ {"fname-convert", 0, POPT_ARG_STRING, &fname_convert_cmd, 0, 0, 0 }, - {"compress", 'z', POPT_ARG_NONE, 0, 'z', 0, 0 }, - {"compress-level", 0, POPT_ARG_INT, &def_compress_level, 'z', 0, 0 }, - {0, 'P', POPT_ARG_NONE, 0, 'P', 0, 0 }, ---- orig/pipe.c 2006-01-14 08:14:31 -+++ pipe.c 2004-07-03 20:18:02 -@@ -23,7 +23,6 @@ - - extern int am_sender; - extern int am_server; --extern int blocking_io; - extern int orig_umask; - extern int filesfrom_fd; - -@@ -39,8 +38,10 @@ extern int filesfrom_fd; - * If blocking_io is set then use blocking io on both fds. That can be - * used to cope with badly broken rsh implementations like the one on - * Solaris. -+ * -+ * If register_child is nonzero then the child is registered for autocleanup. - **/ --pid_t piped_child(char **command, int *f_in, int *f_out) -+pid_t piped_child(char **command, int *f_in, int *f_out, int blocking_io, int register_child) - { - pid_t pid; - int to_child_pipe[2]; -@@ -55,7 +56,7 @@ pid_t piped_child(char **command, int *f - exit_cleanup(RERR_IPC); - } - -- pid = do_fork(); -+ pid = register_child ? do_fork() : fork(); - if (pid == -1) { - rsyserr(FERROR, errno, "fork"); - exit_cleanup(RERR_IPC); ---- orig/syscall.c 2005-09-15 18:09:15 -+++ syscall.c 2004-07-02 21:39:00 -@@ -274,3 +274,34 @@ char *d_name(struct dirent *di) - return di->d_name; - #endif - } -+ -+/** -+ * A wrapper around select(2) that guarantees Linux-like updating of -+ * the timeout argument to contain the time left, so we can simply -+ * re-invoke in case of EINTR or EAGAIN. On BSD, select(2) doesn't -+ * change the timeout argument by itself. -+ **/ -+int do_select(int n, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout) -+{ -+ struct timeval intended, before, after; -+ int result; -+ -+ if (timeout == NULL) -+ return select(n, readfds, writefds, exceptfds, timeout); -+ -+ intended = *timeout; -+ gettimeofday(&before, NULL); -+ result = select(n, readfds, writefds, exceptfds, timeout); -+ gettimeofday(&after, NULL); -+ timeout->tv_sec = intended.tv_sec - (after.tv_sec - before.tv_sec); -+ timeout->tv_usec = intended.tv_usec - (after.tv_usec - before.tv_usec); -+ if (timeout->tv_usec >= 1000000) { -+ ++timeout->tv_sec; -+ timeout->tv_usec -= 1000000; -+ } else if (timeout->tv_usec < 0) { -+ --(timeout)->tv_sec; -+ timeout->tv_usec += 1000000; -+ } -+ -+ return result; -+} ---- orig/util.c 2006-01-14 08:14:31 -+++ util.c 2004-07-03 20:18:02 -@@ -1373,3 +1373,55 @@ uint32 fuzzy_distance(const char *s1, in - - return a[len2-1]; - } -+ -+/** -+ * Blocks until one of the following happens: -+ * - read_fd is nonnegative and has data to read -+ * - write_fd is nonnegative and can be written to -+ * - something terrible happened to either -+ * - the timeout (in milliseconds) has elapsed -+ * Return value is zero iff the timeout occured. -+ */ -+char await_fds(int read_fd, int write_fd, int timeout_ms) -+{ -+ fd_set read_fds, write_fds, except_fds; -+ struct timeval tv; -+ int res; -+ -+ tv.tv_sec = timeout_ms / 1000; -+ tv.tv_usec = (timeout_ms % 1000) * 1000; -+ -+ while (1) { -+ int maxfd = 0; -+ FD_ZERO(&read_fds); -+ FD_ZERO(&write_fds); -+ FD_ZERO(&except_fds); -+ if (write_fd >= 0) { -+ FD_SET(write_fd, &write_fds); -+ FD_SET(write_fd, &except_fds); -+ if (write_fd > maxfd) -+ maxfd = write_fd; -+ } -+ if (read_fd >= 0) { -+ FD_SET(read_fd, &read_fds); -+ FD_SET(read_fd, &except_fds); -+ if (read_fd > maxfd) -+ maxfd = read_fd; -+ } -+ -+ res = do_select(maxfd+1, &read_fds, &write_fds, &except_fds, &tv); -+ if (res > 0) -+ return 1; -+ if (res < 0) { -+ if (errno == EINTR || errno == EAGAIN) -+ continue; /* Retry */ -+ rprintf(FERROR, "Error awaiting fname converter: %s\n", strerror(errno)); -+ exit_cleanup(RERR_FNAMECONV); -+ } -+ if (read_fd >= 0 && (FD_ISSET(read_fd, &read_fds) || FD_ISSET(read_fd, &except_fds))) -+ return 1; -+ if (write_fd >= 0 && (FD_ISSET(write_fd, &write_fds) || FD_ISSET(write_fd, &except_fds))) -+ return 1; -+ return 0; /* res == 0 and no FDs set, hence a timeout. */ -+ } -+} -- 2.34.1