| 1 | This patch adds an option --tr=BAD/GOOD to transliterate filenames. It |
| 2 | can be used to remove characters illegal on the destination filesystem. |
| 3 | Jeff Weber expressed interest in this: |
| 4 | |
| 5 | http://lists.samba.org/archive/rsync/2007-October/018996.html |
| 6 | |
| 7 | This patch is a COMPLETE HACK that covers the most common cases. Others |
| 8 | are welcome to improve it. |
| 9 | |
| 10 | To use this patch, run these commands for a successful build: |
| 11 | |
| 12 | patch -p1 <patches/transliterate.diff |
| 13 | ./configure (optional if already run) |
| 14 | make |
| 15 | |
| 16 | diff --git a/flist.c b/flist.c |
| 17 | --- a/flist.c |
| 18 | +++ b/flist.c |
| 19 | @@ -83,6 +83,9 @@ extern int filesfrom_convert; |
| 20 | extern iconv_t ic_send, ic_recv; |
| 21 | #endif |
| 22 | |
| 23 | +extern char *tr_opt, *tr_left, *tr_right; |
| 24 | +extern int tr_right_len; |
| 25 | + |
| 26 | #define PTR_SIZE (sizeof (struct file_struct *)) |
| 27 | |
| 28 | int io_error; |
| 29 | @@ -616,6 +619,24 @@ static void send_file_entry(int f, const char *fname, struct file_struct *file, |
| 30 | stats.total_size += F_LENGTH(file); |
| 31 | } |
| 32 | |
| 33 | +static void transliterate(char *thisname) |
| 34 | +{ |
| 35 | + char *p1, *p2, *pleft; |
| 36 | + |
| 37 | + for (p1 = p2 = thisname; *p1; p1++) { |
| 38 | + /* Look up the current character in the left string. */ |
| 39 | + pleft = strchr(tr_left, *p1); |
| 40 | + if (!pleft) |
| 41 | + /* Not found: no change. */ |
| 42 | + *p2++ = *p1; |
| 43 | + else if (pleft - tr_left < tr_right_len) |
| 44 | + /* Store replacement from the right string. */ |
| 45 | + *p2++ = tr_right[pleft - tr_left]; |
| 46 | + /* Otherwise delete. */ |
| 47 | + } |
| 48 | + *p2 = '\0'; |
| 49 | +} |
| 50 | + |
| 51 | static struct file_struct *recv_file_entry(struct file_list *flist, |
| 52 | int xflags, int f) |
| 53 | { |
| 54 | @@ -684,6 +705,9 @@ static struct file_struct *recv_file_entry(struct file_list *flist, |
| 55 | } |
| 56 | #endif |
| 57 | |
| 58 | + if (tr_opt) |
| 59 | + transliterate(thisname); |
| 60 | + |
| 61 | if (*thisname) |
| 62 | clean_fname(thisname, 0); |
| 63 | |
| 64 | diff --git a/options.c b/options.c |
| 65 | --- a/options.c |
| 66 | +++ b/options.c |
| 67 | @@ -184,6 +184,8 @@ int logfile_format_has_i = 0; |
| 68 | int logfile_format_has_o_or_i = 0; |
| 69 | int always_checksum = 0; |
| 70 | int list_only = 0; |
| 71 | +char *tr_opt = NULL, *tr_left = NULL, *tr_right = NULL; |
| 72 | +int tr_right_len = 0; |
| 73 | |
| 74 | #define MAX_BATCH_NAME_LEN 256 /* Must be less than MAXPATHLEN-13 */ |
| 75 | char *batch_name = NULL; |
| 76 | @@ -431,6 +433,7 @@ void usage(enum logcode F) |
| 77 | #ifdef ICONV_OPTION |
| 78 | rprintf(F," --iconv=CONVERT_SPEC request charset conversion of filenames\n"); |
| 79 | #endif |
| 80 | + rprintf(F," --tr=BAD/GOOD transliterate filenames\n"); |
| 81 | rprintf(F," -4, --ipv4 prefer IPv4\n"); |
| 82 | rprintf(F," -6, --ipv6 prefer IPv6\n"); |
| 83 | rprintf(F," --version print version number\n"); |
| 84 | @@ -633,6 +636,7 @@ static struct poptOption long_options[] = { |
| 85 | {"iconv", 0, POPT_ARG_STRING, &iconv_opt, 0, 0, 0 }, |
| 86 | {"no-iconv", 0, POPT_ARG_NONE, 0, OPT_NO_ICONV, 0, 0 }, |
| 87 | #endif |
| 88 | + {"tr", 0, POPT_ARG_STRING, &tr_opt, 0, 0, 0 }, |
| 89 | {"ipv4", '4', POPT_ARG_VAL, &default_af_hint, AF_INET, 0, 0 }, |
| 90 | {"ipv6", '6', POPT_ARG_VAL, &default_af_hint, AF_INET6, 0, 0 }, |
| 91 | {"8-bit-output", '8', POPT_ARG_VAL, &allow_8bit_chars, 1, 0, 0 }, |
| 92 | @@ -1680,6 +1684,31 @@ int parse_arguments(int *argc_p, const char ***argv_p) |
| 93 | } |
| 94 | } |
| 95 | |
| 96 | + /* Easiest way to get a local server right is to do this on both sides */ |
| 97 | + if (tr_opt) { |
| 98 | + if (*tr_opt) { |
| 99 | + char *p; |
| 100 | + |
| 101 | + need_unsorted_flist = 1; |
| 102 | + /* Our mutation shouldn't interfere with transmission of the |
| 103 | + * original option to the server. */ |
| 104 | + tr_left = strdup(tr_opt); |
| 105 | + p = strchr(tr_left, '/'); |
| 106 | + if (p != NULL) { |
| 107 | + *p = '\0'; |
| 108 | + p++; |
| 109 | + tr_right = p; |
| 110 | + tr_right_len = strlen(tr_right); |
| 111 | + if (strchr(tr_right, '/') != NULL) { |
| 112 | + snprintf(err_buf, sizeof err_buf, |
| 113 | + "--tr cannot transliterate slashes\n"); |
| 114 | + return 0; |
| 115 | + } |
| 116 | + } |
| 117 | + } else |
| 118 | + tr_opt = NULL; |
| 119 | + } |
| 120 | + |
| 121 | am_starting_up = 0; |
| 122 | |
| 123 | return 1; |
| 124 | @@ -2048,6 +2077,12 @@ void server_options(char **args, int *argc_p) |
| 125 | else if (remove_source_files) |
| 126 | args[ac++] = "--remove-sent-files"; |
| 127 | |
| 128 | + if (tr_opt) { |
| 129 | + if (asprintf(&arg, "--tr=%s", tr_opt) < 0) |
| 130 | + goto oom; |
| 131 | + args[ac++] = arg; |
| 132 | + } |
| 133 | + |
| 134 | *argc_p = ac; |
| 135 | return; |
| 136 | |
| 137 | diff --git a/rsync.yo b/rsync.yo |
| 138 | --- a/rsync.yo |
| 139 | +++ b/rsync.yo |
| 140 | @@ -423,6 +423,7 @@ to the detailed description below for a complete description. verb( |
| 141 | --read-batch=FILE read a batched update from FILE |
| 142 | --protocol=NUM force an older protocol version to be used |
| 143 | --iconv=CONVERT_SPEC request charset conversion of filenames |
| 144 | + --tr=BAD/GOOD transliterate filenames |
| 145 | --checksum-seed=NUM set block/file checksum seed (advanced) |
| 146 | -4, --ipv4 prefer IPv4 |
| 147 | -6, --ipv6 prefer IPv6 |
| 148 | @@ -2077,6 +2078,22 @@ daemon uses the charset specified in its "charset" configuration parameter |
| 149 | regardless of the remote charset you actually pass. Thus, you may feel free to |
| 150 | specify just the local charset for a daemon transfer (e.g. bf(--iconv=utf8)). |
| 151 | |
| 152 | +dit(bf(--tr=BAD/GOOD)) Transliterates filenames on the receiver, after the |
| 153 | +iconv conversion (if any). This can be used to remove characters illegal |
| 154 | +on the destination filesystem. If you use this option, consider saving a |
| 155 | +"find . -ls" listing of the source in the destination to help you determine |
| 156 | +the original filenames in case of need. |
| 157 | + |
| 158 | +The argument consists of a string of characters to remove, optionally |
| 159 | +followed by a slash and a string of corresponding characters with which to |
| 160 | +replace them. The second string may be shorter, in which case any leftover |
| 161 | +characters in the first string are simply deleted. For example, |
| 162 | +bf(--tr=':\/!') replaces colons with exclamation marks and deletes backslashes. |
| 163 | +Slashes cannot be transliterated because it would cause havoc. |
| 164 | + |
| 165 | +If the receiver is invoked over a remote shell, use bf(--protect-args) to |
| 166 | +stop the shell from interpreting any nasty characters in the argument. |
| 167 | + |
| 168 | dit(bf(-4, --ipv4) or bf(-6, --ipv6)) Tells rsync to prefer IPv4/IPv6 |
| 169 | when creating sockets. This only affects sockets that rsync has direct |
| 170 | control over, such as the outgoing socket when directly contacting an |