This patch adds the --preallocate option that asks rsync to preallocate the copied files. This slows down the copy, but should reduce fragmentation on systems that need that. To use this patch, run these commands for a successful build: patch -p1 +#include ], +[syscall(SYS_fallocate, 0, 0, (loff_t) 0, (loff_t) 0);], +rsync_cv_have_sys_fallocate=yes,rsync_cv_have_sys_fallocate=no)]) +if test x"$rsync_cv_have_sys_fallocate" = x"yes"; then + AC_DEFINE(HAVE_SYS_FALLOCATE, 1, [Define to 1 if you have the SYS_fallocate syscall number]) +fi + +if test x"$ac_cv_func_posix_fallocate" = x"yes"; then + AC_MSG_CHECKING([whether posix_fallocate is efficient]) + case $host_os in + *cygwin*) + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_EFFICIENT_POSIX_FALLOCATE, 1, + [Define if posix_fallocate is efficient (Cygwin)]) + ;; + *) + AC_MSG_RESULT(no) + ;; + esac +fi + +dnl End of preallocation stuff + AC_CHECK_FUNCS(getpgrp tcgetpgrp) if test $ac_cv_func_getpgrp = yes; then AC_FUNC_GETPGRP diff --git a/options.c b/options.c --- a/options.c +++ b/options.c @@ -73,6 +73,7 @@ int remove_source_files = 0; int one_file_system = 0; int protocol_version = PROTOCOL_VERSION; int sparse_files = 0; +int preallocate_files = 0; int do_compression = 0; int def_compress_level = Z_DEFAULT_COMPRESSION; int am_root = 0; /* 0 = normal, 1 = root, 2 = --super, -1 = --fake-super */ @@ -566,6 +567,7 @@ static void print_rsync_version(enum logcode f) char const *links = "no "; char const *iconv = "no "; char const *ipv6 = "no "; + char const *preallocation = "no "; STRUCT_STAT *dumstat; #if SUBPROTOCOL_VERSION != 0 @@ -599,6 +601,9 @@ static void print_rsync_version(enum logcode f) #if defined HAVE_LUTIMES && defined HAVE_UTIMES symtimes = ""; #endif +#ifdef SUPPORT_PREALLOCATION + preallocation = ""; +#endif rprintf(f, "%s version %s protocol version %d%s\n", RSYNC_NAME, RSYNC_VERSION, PROTOCOL_VERSION, subprotocol); @@ -612,8 +617,8 @@ static void print_rsync_version(enum logcode f) (int)(sizeof (int64) * 8)); rprintf(f, " %ssocketpairs, %shardlinks, %ssymlinks, %sIPv6, batchfiles, %sinplace,\n", got_socketpair, hardlinks, links, ipv6, have_inplace); - rprintf(f, " %sappend, %sACLs, %sxattrs, %siconv, %ssymtimes\n", - have_inplace, acls, xattrs, iconv, symtimes); + rprintf(f, " %sappend, %sACLs, %sxattrs, %siconv, %ssymtimes, %spreallocation\n", + have_inplace, acls, xattrs, iconv, symtimes, preallocation); #ifdef MAINTAINER_MODE rprintf(f, "Panic Action: \"%s\"\n", get_panic_action()); @@ -703,6 +708,9 @@ void usage(enum logcode F) rprintf(F," --fake-super store/recover privileged attrs using xattrs\n"); #endif rprintf(F," -S, --sparse handle sparse files efficiently\n"); +#ifdef SUPPORT_PREALLOCATION + rprintf(F," --preallocate allocate dest files before writing them\n"); +#endif rprintf(F," -n, --dry-run perform a trial run with no changes made\n"); rprintf(F," -W, --whole-file copy files whole (without delta-xfer algorithm)\n"); rprintf(F," -x, --one-file-system don't cross filesystem boundaries\n"); @@ -899,6 +907,7 @@ static struct poptOption long_options[] = { {"sparse", 'S', POPT_ARG_VAL, &sparse_files, 1, 0, 0 }, {"no-sparse", 0, POPT_ARG_VAL, &sparse_files, 0, 0, 0 }, {"no-S", 0, POPT_ARG_VAL, &sparse_files, 0, 0, 0 }, + {"preallocate", 0, POPT_ARG_NONE, &preallocate_files, 0, 0, 0}, {"inplace", 0, POPT_ARG_VAL, &inplace, 1, 0, 0 }, {"no-inplace", 0, POPT_ARG_VAL, &inplace, 0, 0, 0 }, {"append", 0, POPT_ARG_NONE, 0, OPT_APPEND, 0, 0 }, @@ -2626,6 +2635,9 @@ void server_options(char **args, int *argc_p) else if (remove_source_files) args[ac++] = "--remove-sent-files"; + if (preallocate_files && am_sender) + args[ac++] = "--preallocate"; + if (ac > MAX_SERVER_ARGS) { /* Not possible... */ rprintf(FERROR, "argc overflow in server_options().\n"); exit_cleanup(RERR_MALLOC); diff --git a/receiver.c b/receiver.c --- a/receiver.c +++ b/receiver.c @@ -44,6 +44,7 @@ extern int cleanup_got_literal; extern int remove_source_files; extern int append_mode; extern int sparse_files; +extern int preallocate_files; extern int keep_partial; extern int checksum_len; extern int checksum_seed; @@ -175,6 +176,18 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r, char *data; int32 i; char *map = NULL; +#ifdef SUPPORT_PREALLOCATION + OFF_T preallocated_len = 0; + + if (preallocate_files && fd != -1 && total_size > 0) { + /* Preallocate enough space for file's eventual length if + * possible; seems to reduce fragmentation on Windows. */ + if (do_fallocate(fd, 0, total_size) == 0) + preallocated_len = total_size; + else + rsyserr(FWARNING, errno, "do_fallocate %s", full_fname(fname)); + } +#endif read_sum_head(f_in, &sum); @@ -285,8 +298,14 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r, goto report_write_error; #ifdef HAVE_FTRUNCATE - if (inplace && fd != -1 - && ftruncate(fd, offset) < 0) { + /* inplace: New data could be shorter than old data. + * preallocate_files: total_size could have been an overestimate. + * Cut off any extra preallocated zeros from dest file. */ + if ((inplace +#ifdef SUPPORT_PREALLOCATION + || preallocated_len > offset +#endif + ) && fd != -1 && ftruncate(fd, offset) < 0) { rsyserr(FERROR_XFER, errno, "ftruncate failed on %s", full_fname(fname)); } diff --git a/rsync.h b/rsync.h --- a/rsync.h +++ b/rsync.h @@ -638,6 +638,13 @@ struct ht_int64_node { #define ACLS_NEED_MASK 1 #endif +#if defined HAVE_FTRUNCATE \ + && (defined HAVE_FALLOCATE \ + || defined HAVE_SYS_FALLOCATE \ + || defined HAVE_EFFICIENT_POSIX_FALLOCATE) +#define SUPPORT_PREALLOCATION 1 +#endif + union file_extras { int32 num; uint32 unum; diff --git a/rsync.yo b/rsync.yo --- a/rsync.yo +++ b/rsync.yo @@ -359,6 +359,7 @@ to the detailed description below for a complete description. verb( --super receiver attempts super-user activities --fake-super store/recover privileged attrs using xattrs -S, --sparse handle sparse files efficiently + --preallocate allocate dest files before writing -n, --dry-run perform a trial run with no changes made -W, --whole-file copy files whole (w/o delta-xfer algorithm) -x, --one-file-system don't cross filesystem boundaries @@ -1127,6 +1128,18 @@ NOTE: Don't use this option when the destination is a Solaris "tmpfs" filesystem. It doesn't seem to handle seeks over null regions correctly and ends up corrupting the files. +dit(bf(--preallocate)) This tells the receiver to allocate each destination +file to its eventual size before writing data to the file. Rsync will only use +the real filesystem-level preallocation support provided by bf(fallocate)(2) or +Cygwin's bf(posix_fallocate)(3), not the slow glibc implementation that writes +a zero byte into each block. If the receiver is remote, this nonstandard +option only works if the receiver also has the preallocation patch. + +Without this option on MS Windows, very large destination files tend to be +broken into thousands of fragments; advising Windows ahead of time of the +eventual file size using this option usually reduces the number of +fragments to one. The usefulness of this option on Linux is yet to be tested. + dit(bf(-n, --dry-run)) This makes rsync perform a trial run that doesn't make any changes (and produces mostly the same output as a real run). It is most commonly used in combination with the bf(-v, --verbose) and/or diff --git a/syscall.c b/syscall.c --- a/syscall.c +++ b/syscall.c @@ -29,6 +29,10 @@ #include #endif +#if defined HAVE_SYS_FALLOCATE && !defined HAVE_FALLOCATE +#include +#endif + extern int dry_run; extern int am_root; extern int read_only; @@ -282,3 +286,21 @@ OFF_T do_lseek(int fd, OFF_T offset, int whence) return lseek(fd, offset, whence); #endif } + +#ifdef SUPPORT_PREALLOCATION +int do_fallocate(int fd, OFF_T offset, OFF_T length) +{ + RETURN_ERROR_IF(dry_run, 0); + RETURN_ERROR_IF_RO_OR_LO; + /* TODO: Use FALLOC_FL_KEEP_SIZE to avoid the need to truncate. */ +#if defined HAVE_FALLOCATE + return fallocate(fd, 0, offset, length); +#elif defined HAVE_SYS_FALLOCATE + return syscall(SYS_fallocate, fd, 0, (loff_t) offset, (loff_t) length); +#elif defined HAVE_EFFICIENT_POSIX_FALLOCATE + return posix_fallocate(fd, offset, length); +#else +#error coding error in SUPPORT_PREALLOCATION +#endif +} +#endif diff --git a/t_stub.c b/t_stub.c --- a/t_stub.c +++ b/t_stub.c @@ -22,6 +22,7 @@ #include "rsync.h" int modify_window = 0; +int preallocate_files = 0; int module_id = -1; int relative_paths = 0; int module_dirlen = 0; diff --git a/util.c b/util.c --- a/util.c +++ b/util.c @@ -26,6 +26,7 @@ #include "inums.h" extern int dry_run; +extern int preallocate_files; extern int module_id; extern int modify_window; extern int relative_paths; @@ -324,6 +325,10 @@ int copy_file(const char *source, const char *dest, int ofd, mode_t mode) int ifd; char buf[1024 * 8]; int len; /* Number of bytes read into `buf'. */ +#ifdef SUPPORT_PREALLOCATION + OFF_T preallocated_len = 0; + OFF_T offset = 0; +#endif if ((ifd = do_open(source, O_RDONLY, 0)) < 0) { int save_errno = errno; @@ -349,7 +354,27 @@ int copy_file(const char *source, const char *dest, int ofd, mode_t mode) } } +#ifdef SUPPORT_PREALLOCATION + if (preallocate_files) { + /* Preallocate enough space for file's eventual length if + * possible; seems to reduce fragmentation on Windows. */ + STRUCT_STAT srcst; + if (do_fstat(ifd, &srcst) == 0) { + if (srcst.st_size > 0) { + if (do_fallocate(ofd, 0, srcst.st_size) == 0) + preallocated_len = srcst.st_size; + else + rsyserr(FWARNING, errno, "do_fallocate %s", full_fname(dest)); + } + } else + rsyserr(FWARNING, errno, "fstat %s", full_fname(source)); + } +#endif + while ((len = safe_read(ifd, buf, sizeof buf)) > 0) { +#ifdef SUPPORT_PREALLOCATION + offset += len; +#endif if (full_write(ofd, buf, len) < 0) { int save_errno = errno; rsyserr(FERROR_XFER, errno, "write %s", full_fname(dest)); @@ -374,6 +399,16 @@ int copy_file(const char *source, const char *dest, int ofd, mode_t mode) full_fname(source)); } +#ifdef SUPPORT_PREALLOCATION + /* Source file might have shrunk since we fstatted it. + * Cut off any extra preallocated zeros from dest file. */ + if (preallocated_len > offset) + if (ftruncate(ofd, offset) < 0) + /* If we fail to truncate, the dest file may be wrong, so we + * must trigger the "partial transfer" error. */ + rsyserr(FERROR_XFER, errno, "ftruncate %s", full_fname(dest)); +#endif + if (close(ofd) < 0) { int save_errno = errno; rsyserr(FERROR_XFER, errno, "close failed on %s",