| 1 | This patch adds the --sparse-block option. Andrea Righi writes: |
| 2 | |
| 3 | In some filesystems, typically optimized for large I/O throughputs (like |
| 4 | IBM GPFS, IBM SAN FS, or distributed filesystems in general) a lot of |
| 5 | lseek() operations can strongly impact on performances. In this cases it |
| 6 | can be helpful to enlarge the block size used to handle sparse files |
| 7 | directly from a command line parameter. |
| 8 | |
| 9 | For example, using a sparse write size of 32KB, I've been able to |
| 10 | increase the transfer rate of an order of magnitude copying the output |
| 11 | files of scientific applications from GPFS to GPFS or GPFS to SAN FS. |
| 12 | |
| 13 | -Andrea |
| 14 | |
| 15 | To use this patch, run these commands for a successful build: |
| 16 | |
| 17 | patch -p1 <patches/sparse-block.diff |
| 18 | ./configure (optional if already run) |
| 19 | make |
| 20 | |
| 21 | diff --git a/fileio.c b/fileio.c |
| 22 | --- a/fileio.c |
| 23 | +++ b/fileio.c |
| 24 | @@ -26,6 +26,7 @@ |
| 25 | #endif |
| 26 | |
| 27 | extern int sparse_files; |
| 28 | +extern long sparse_files_block_size; |
| 29 | |
| 30 | static char last_byte; |
| 31 | static size_t sparse_seek = 0; |
| 32 | @@ -115,7 +116,7 @@ int write_file(int f,char *buf,size_t len) |
| 33 | while (len > 0) { |
| 34 | int r1; |
| 35 | if (sparse_files > 0) { |
| 36 | - int len1 = MIN(len, SPARSE_WRITE_SIZE); |
| 37 | + int len1 = MIN(len, (size_t)sparse_files_block_size); |
| 38 | r1 = write_sparse(f, buf, len1); |
| 39 | } else { |
| 40 | if (!wf_writeBuf) { |
| 41 | diff --git a/options.c b/options.c |
| 42 | --- a/options.c |
| 43 | +++ b/options.c |
| 44 | @@ -73,6 +73,7 @@ int remove_source_files = 0; |
| 45 | int one_file_system = 0; |
| 46 | int protocol_version = PROTOCOL_VERSION; |
| 47 | int sparse_files = 0; |
| 48 | +long sparse_files_block_size = SPARSE_WRITE_SIZE; |
| 49 | int do_compression = 0; |
| 50 | int def_compress_level = Z_DEFAULT_COMPRESSION; |
| 51 | int am_root = 0; /* 0 = normal, 1 = root, 2 = --super, -1 = --fake-super */ |
| 52 | @@ -358,6 +359,7 @@ void usage(enum logcode F) |
| 53 | rprintf(F," --fake-super store/recover privileged attrs using xattrs\n"); |
| 54 | #endif |
| 55 | rprintf(F," -S, --sparse handle sparse files efficiently\n"); |
| 56 | + rprintf(F," --sparse-block=SIZE set the block size used to handle sparse files\n"); |
| 57 | rprintf(F," -n, --dry-run perform a trial run with no changes made\n"); |
| 58 | rprintf(F," -W, --whole-file copy files whole (without delta-xfer algorithm)\n"); |
| 59 | rprintf(F," -x, --one-file-system don't cross filesystem boundaries\n"); |
| 60 | @@ -542,6 +544,7 @@ static struct poptOption long_options[] = { |
| 61 | {"sparse", 'S', POPT_ARG_VAL, &sparse_files, 1, 0, 0 }, |
| 62 | {"no-sparse", 0, POPT_ARG_VAL, &sparse_files, 0, 0, 0 }, |
| 63 | {"no-S", 0, POPT_ARG_VAL, &sparse_files, 0, 0, 0 }, |
| 64 | + {"sparse-block", 0, POPT_ARG_LONG, &sparse_files_block_size, 0, 0, 0 }, |
| 65 | {"inplace", 0, POPT_ARG_VAL, &inplace, 1, 0, 0 }, |
| 66 | {"no-inplace", 0, POPT_ARG_VAL, &inplace, 0, 0, 0 }, |
| 67 | {"append", 0, POPT_ARG_NONE, 0, OPT_APPEND, 0, 0 }, |
| 68 | @@ -1899,6 +1902,12 @@ void server_options(char **args, int *argc_p) |
| 69 | args[ac++] = arg; |
| 70 | } |
| 71 | |
| 72 | + if (sparse_files_block_size) { |
| 73 | + if (asprintf(&arg, "--sparse-block=%lu", sparse_files_block_size) < 0) |
| 74 | + goto oom; |
| 75 | + args[ac++] = arg; |
| 76 | + } |
| 77 | + |
| 78 | if (io_timeout) { |
| 79 | if (asprintf(&arg, "--timeout=%d", io_timeout) < 0) |
| 80 | goto oom; |
| 81 | diff --git a/rsync.yo b/rsync.yo |
| 82 | --- a/rsync.yo |
| 83 | +++ b/rsync.yo |
| 84 | @@ -352,6 +352,7 @@ to the detailed description below for a complete description. verb( |
| 85 | --super receiver attempts super-user activities |
| 86 | --fake-super store/recover privileged attrs using xattrs |
| 87 | -S, --sparse handle sparse files efficiently |
| 88 | + --sparse-block=SIZE set block size used to handle sparse files |
| 89 | -n, --dry-run perform a trial run with no changes made |
| 90 | -W, --whole-file copy files whole (w/o delta-xfer algorithm) |
| 91 | -x, --one-file-system don't cross filesystem boundaries |
| 92 | @@ -1049,6 +1050,15 @@ NOTE: Don't use this option when the destination is a Solaris "tmpfs" |
| 93 | filesystem. It doesn't seem to handle seeks over null regions |
| 94 | correctly and ends up corrupting the files. |
| 95 | |
| 96 | +dit(bf(--sparse-block=SIZE)) Change the block size used to handle sparse files |
| 97 | +to SIZE bytes. This option only has an effect if the bf(--sparse) (bf(-S)) |
| 98 | +option was also specified. The default block size used by rsync to detect a |
| 99 | +file hole is 1024 bytes; when the receiver writes data to the destination file |
| 100 | +and option bf(--sparse) is used, rsync checks every 1024-bytes chunk to detect |
| 101 | +if they are actually filled with data or not. With certain filesystems, |
| 102 | +optimized to receive data streams for example, enlarging this block size can |
| 103 | +strongly increase performance. The option can be used to tune this block size. |
| 104 | + |
| 105 | dit(bf(-n, --dry-run)) This makes rsync perform a trial run that doesn't |
| 106 | make any changes (and produces mostly the same output as a real run). It |
| 107 | is most commonly used in combination with the bf(-v, --verbose) and/or |