| 1 | This patch adds the --sparse-block option. Andrea Righi writes: |
| 2 | |
| 3 | In some filesystems, typically optimized for large I/O throughputs (like |
| 4 | IBM GPFS, IBM SAN FS, or distributed filesystems in general) a lot of |
| 5 | lseek() operations can strongly impact on performances. In this cases it |
| 6 | can be helpful to enlarge the block size used to handle sparse files |
| 7 | directly from a command line parameter. |
| 8 | |
| 9 | For example, using a sparse write size of 32KB, I've been able to |
| 10 | increase the transfer rate of an order of magnitude copying the output |
| 11 | files of scientific applications from GPFS to GPFS or GPFS to SAN FS. |
| 12 | |
| 13 | -Andrea |
| 14 | |
| 15 | To use this patch, run these commands for a successful build: |
| 16 | |
| 17 | patch -p1 <patches/sparse-block.diff |
| 18 | ./configure (optional if already run) |
| 19 | make |
| 20 | |
| 21 | based-on: a01e3b490eb36ccf9e704840e1b6683dab867550 |
| 22 | diff --git a/fileio.c b/fileio.c |
| 23 | --- a/fileio.c |
| 24 | +++ b/fileio.c |
| 25 | @@ -27,6 +27,7 @@ |
| 26 | #endif |
| 27 | |
| 28 | extern int sparse_files; |
| 29 | +extern int sparse_files_block_size; |
| 30 | |
| 31 | static char last_byte; |
| 32 | static OFF_T sparse_seek = 0; |
| 33 | @@ -116,7 +117,7 @@ int write_file(int f, char *buf, int len) |
| 34 | while (len > 0) { |
| 35 | int r1; |
| 36 | if (sparse_files > 0) { |
| 37 | - int len1 = MIN(len, SPARSE_WRITE_SIZE); |
| 38 | + int len1 = MIN(len, sparse_files_block_size); |
| 39 | r1 = write_sparse(f, buf, len1); |
| 40 | } else { |
| 41 | if (!wf_writeBuf) { |
| 42 | diff --git a/options.c b/options.c |
| 43 | --- a/options.c |
| 44 | +++ b/options.c |
| 45 | @@ -73,6 +73,7 @@ int remove_source_files = 0; |
| 46 | int one_file_system = 0; |
| 47 | int protocol_version = PROTOCOL_VERSION; |
| 48 | int sparse_files = 0; |
| 49 | +long sparse_files_block_size = SPARSE_WRITE_SIZE; |
| 50 | int do_compression = 0; |
| 51 | int def_compress_level = Z_DEFAULT_COMPRESSION; |
| 52 | int am_root = 0; /* 0 = normal, 1 = root, 2 = --super, -1 = --fake-super */ |
| 53 | @@ -704,6 +705,7 @@ void usage(enum logcode F) |
| 54 | rprintf(F," --fake-super store/recover privileged attrs using xattrs\n"); |
| 55 | #endif |
| 56 | rprintf(F," -S, --sparse handle sparse files efficiently\n"); |
| 57 | + rprintf(F," --sparse-block=SIZE set the block size used to handle sparse files\n"); |
| 58 | rprintf(F," -n, --dry-run perform a trial run with no changes made\n"); |
| 59 | rprintf(F," -W, --whole-file copy files whole (without delta-xfer algorithm)\n"); |
| 60 | rprintf(F," -x, --one-file-system don't cross filesystem boundaries\n"); |
| 61 | @@ -900,6 +902,7 @@ static struct poptOption long_options[] = { |
| 62 | {"sparse", 'S', POPT_ARG_VAL, &sparse_files, 1, 0, 0 }, |
| 63 | {"no-sparse", 0, POPT_ARG_VAL, &sparse_files, 0, 0, 0 }, |
| 64 | {"no-S", 0, POPT_ARG_VAL, &sparse_files, 0, 0, 0 }, |
| 65 | + {"sparse-block", 0, POPT_ARG_LONG, &sparse_files_block_size, 0, 0, 0 }, |
| 66 | {"inplace", 0, POPT_ARG_VAL, &inplace, 1, 0, 0 }, |
| 67 | {"no-inplace", 0, POPT_ARG_VAL, &inplace, 0, 0, 0 }, |
| 68 | {"append", 0, POPT_ARG_NONE, 0, OPT_APPEND, 0, 0 }, |
| 69 | @@ -2468,6 +2471,12 @@ void server_options(char **args, int *argc_p) |
| 70 | args[ac++] = arg; |
| 71 | } |
| 72 | |
| 73 | + if (sparse_files_block_size) { |
| 74 | + if (asprintf(&arg, "--sparse-block=%lu", sparse_files_block_size) < 0) |
| 75 | + goto oom; |
| 76 | + args[ac++] = arg; |
| 77 | + } |
| 78 | + |
| 79 | if (io_timeout) { |
| 80 | if (asprintf(&arg, "--timeout=%d", io_timeout) < 0) |
| 81 | goto oom; |
| 82 | diff --git a/rsync.yo b/rsync.yo |
| 83 | --- a/rsync.yo |
| 84 | +++ b/rsync.yo |
| 85 | @@ -359,6 +359,7 @@ to the detailed description below for a complete description. verb( |
| 86 | --super receiver attempts super-user activities |
| 87 | --fake-super store/recover privileged attrs using xattrs |
| 88 | -S, --sparse handle sparse files efficiently |
| 89 | + --sparse-block=SIZE set block size used to handle sparse files |
| 90 | -n, --dry-run perform a trial run with no changes made |
| 91 | -W, --whole-file copy files whole (w/o delta-xfer algorithm) |
| 92 | -x, --one-file-system don't cross filesystem boundaries |
| 93 | @@ -1127,6 +1128,15 @@ NOTE: Don't use this option when the destination is a Solaris "tmpfs" |
| 94 | filesystem. It seems to have problems seeking over null regions, |
| 95 | and ends up corrupting the files. |
| 96 | |
| 97 | +dit(bf(--sparse-block=SIZE)) Change the block size used to handle sparse files |
| 98 | +to SIZE bytes. This option only has an effect if the bf(--sparse) (bf(-S)) |
| 99 | +option was also specified. The default block size used by rsync to detect a |
| 100 | +file hole is 1024 bytes; when the receiver writes data to the destination file |
| 101 | +and option bf(--sparse) is used, rsync checks every 1024-bytes chunk to detect |
| 102 | +if they are actually filled with data or not. With certain filesystems, |
| 103 | +optimized to receive data streams for example, enlarging this block size can |
| 104 | +strongly increase performance. The option can be used to tune this block size. |
| 105 | + |
| 106 | dit(bf(-n, --dry-run)) This makes rsync perform a trial run that doesn't |
| 107 | make any changes (and produces mostly the same output as a real run). It |
| 108 | is most commonly used in combination with the bf(-v, --verbose) and/or |