This patch adds the --preallocate option that asks rsync to preallocate the copied files. This slows down the copy, but should reduce fragmentation on systems that need that. To use this patch, run these commands for a successful build: patch -p1 +#include ], +[fallocate(0, 0, 0, 0);], +rsync_cv_have_fallocate=yes,rsync_cv_have_fallocate=no)]) +if test x"$rsync_cv_have_fallocate" = x"yes"; then + AC_DEFINE(HAVE_FALLOCATE, 1, [Define to 1 if you have the fallocate function and it compiles and links without error]) +fi + +AC_CACHE_CHECK([for SYS_fallocate],rsync_cv_have_sys_fallocate,[ +AC_TRY_COMPILE([#include +#include ], +[syscall(SYS_fallocate, 0, 0, (loff_t)0, (loff_t)0);], +rsync_cv_have_sys_fallocate=yes,rsync_cv_have_sys_fallocate=no)]) +if test x"$rsync_cv_have_sys_fallocate" = x"yes"; then + AC_DEFINE(HAVE_SYS_FALLOCATE, 1, [Define to 1 if you have the SYS_fallocate syscall number]) +fi + +if test x"$ac_cv_func_posix_fallocate" = x"yes"; then + AC_MSG_CHECKING([whether posix_fallocate is efficient]) + case $host_os in + *cygwin*) + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_EFFICIENT_POSIX_FALLOCATE, 1, + [Define if posix_fallocate is efficient (Cygwin)]) + ;; + *) + AC_MSG_RESULT(no) + ;; + esac +fi + +dnl End of preallocation stuff + AC_CHECK_FUNCS(getpgrp tcgetpgrp) if test $ac_cv_func_getpgrp = yes; then AC_FUNC_GETPGRP diff --git a/options.c b/options.c --- a/options.c +++ b/options.c @@ -73,6 +73,7 @@ int remove_source_files = 0; int one_file_system = 0; int protocol_version = PROTOCOL_VERSION; int sparse_files = 0; +int preallocate_files = 0; int do_compression = 0; int def_compress_level = Z_DEFAULT_COMPRESSION; int am_root = 0; /* 0 = normal, 1 = root, 2 = --super, -1 = --fake-super */ @@ -567,6 +568,7 @@ static void print_rsync_version(enum logcode f) char const *links = "no "; char const *iconv = "no "; char const *ipv6 = "no "; + char const *preallocation = "no "; STRUCT_STAT *dumstat; #if SUBPROTOCOL_VERSION != 0 @@ -600,6 +602,9 @@ static void print_rsync_version(enum logcode f) #ifdef CAN_SET_SYMLINK_TIMES symtimes = ""; #endif +#ifdef SUPPORT_PREALLOCATION + preallocation = ""; +#endif rprintf(f, "%s version %s protocol version %d%s\n", RSYNC_NAME, RSYNC_VERSION, PROTOCOL_VERSION, subprotocol); @@ -613,8 +618,8 @@ static void print_rsync_version(enum logcode f) (int)(sizeof (int64) * 8)); rprintf(f, " %ssocketpairs, %shardlinks, %ssymlinks, %sIPv6, batchfiles, %sinplace,\n", got_socketpair, hardlinks, links, ipv6, have_inplace); - rprintf(f, " %sappend, %sACLs, %sxattrs, %siconv, %ssymtimes\n", - have_inplace, acls, xattrs, iconv, symtimes); + rprintf(f, " %sappend, %sACLs, %sxattrs, %siconv, %ssymtimes, %spreallocation\n", + have_inplace, acls, xattrs, iconv, symtimes, preallocation); #ifdef MAINTAINER_MODE rprintf(f, "Panic Action: \"%s\"\n", get_panic_action()); @@ -704,6 +709,11 @@ void usage(enum logcode F) rprintf(F," --fake-super store/recover privileged attrs using xattrs\n"); #endif rprintf(F," -S, --sparse handle sparse files efficiently\n"); +#ifdef SUPPORT_PREALLOCATION + rprintf(F," --preallocate allocate dest files before writing them\n"); +#else + rprintf(F," --preallocate pre-allocate dest files on remote receiver\n"); +#endif rprintf(F," -n, --dry-run perform a trial run with no changes made\n"); rprintf(F," -W, --whole-file copy files whole (without delta-xfer algorithm)\n"); rprintf(F," -x, --one-file-system don't cross filesystem boundaries\n"); @@ -900,6 +910,7 @@ static struct poptOption long_options[] = { {"sparse", 'S', POPT_ARG_VAL, &sparse_files, 1, 0, 0 }, {"no-sparse", 0, POPT_ARG_VAL, &sparse_files, 0, 0, 0 }, {"no-S", 0, POPT_ARG_VAL, &sparse_files, 0, 0, 0 }, + {"preallocate", 0, POPT_ARG_NONE, &preallocate_files, 0, 0, 0}, {"inplace", 0, POPT_ARG_VAL, &inplace, 1, 0, 0 }, {"no-inplace", 0, POPT_ARG_VAL, &inplace, 0, 0, 0 }, {"append", 0, POPT_ARG_NONE, 0, OPT_APPEND, 0, 0 }, @@ -2661,6 +2672,9 @@ void server_options(char **args, int *argc_p) else if (remove_source_files) args[ac++] = "--remove-sent-files"; + if (preallocate_files && am_sender) + args[ac++] = "--preallocate"; + if (ac > MAX_SERVER_ARGS) { /* Not possible... */ rprintf(FERROR, "argc overflow in server_options().\n"); exit_cleanup(RERR_MALLOC); diff --git a/receiver.c b/receiver.c --- a/receiver.c +++ b/receiver.c @@ -44,6 +44,7 @@ extern int cleanup_got_literal; extern int remove_source_files; extern int append_mode; extern int sparse_files; +extern int preallocate_files; extern int keep_partial; extern int checksum_len; extern int checksum_seed; @@ -207,6 +208,22 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r, char *data; int32 i; char *map = NULL; +#ifdef SUPPORT_PREALLOCATION +#ifdef PREALLOCATE_NEEDS_TRUNCATE + OFF_T preallocated_len = 0; +#endif + + if (preallocate_files && fd != -1 && total_size > 0) { + /* Try to preallocate enough space for file's eventual length. Can + * reduce fragmentation on filesystems like ext4, xfs, and NTFS. */ + if (do_fallocate(fd, 0, total_size) == 0) { +#ifdef PREALLOCATE_NEEDS_TRUNCATE + preallocated_len = total_size; +#endif + } else + rsyserr(FWARNING, errno, "do_fallocate %s", full_fname(fname)); + } +#endif read_sum_head(f_in, &sum); @@ -317,8 +334,14 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r, goto report_write_error; #ifdef HAVE_FTRUNCATE - if (inplace && fd != -1 - && ftruncate(fd, offset) < 0) { + /* inplace: New data could be shorter than old data. + * preallocate_files: total_size could have been an overestimate. + * Cut off any extra preallocated zeros from dest file. */ + if ((inplace +#ifdef PREALLOCATE_NEEDS_TRUNCATE + || preallocated_len > offset +#endif + ) && fd != -1 && ftruncate(fd, offset) < 0) { rsyserr(FERROR_XFER, errno, "ftruncate failed on %s", full_fname(fname)); } diff --git a/rsync.h b/rsync.h --- a/rsync.h +++ b/rsync.h @@ -646,6 +646,21 @@ struct ht_int64_node { #define ACLS_NEED_MASK 1 #endif +#if defined HAVE_FALLOCATE || HAVE_SYS_FALLOCATE +#include +#ifdef FALLOC_FL_KEEP_SIZE +#define SUPPORT_PREALLOCATION 1 +#elif defined HAVE_FTRUNCATE +#define SUPPORT_PREALLOCATION 1 +#define PREALLOCATE_NEEDS_TRUNCATE 1 +#endif +#else /* !fallocate */ +#if defined HAVE_EFFICIENT_POSIX_FALLOCATE && defined HAVE_FTRUNCATE +#define SUPPORT_PREALLOCATION 1 +#define PREALLOCATE_NEEDS_TRUNCATE 1 +#endif +#endif + union file_extras { int32 num; uint32 unum; diff --git a/rsync.yo b/rsync.yo --- a/rsync.yo +++ b/rsync.yo @@ -359,6 +359,7 @@ to the detailed description below for a complete description. verb( --super receiver attempts super-user activities --fake-super store/recover privileged attrs using xattrs -S, --sparse handle sparse files efficiently + --preallocate allocate dest files before writing -n, --dry-run perform a trial run with no changes made -W, --whole-file copy files whole (w/o delta-xfer algorithm) -x, --one-file-system don't cross filesystem boundaries @@ -1127,6 +1128,17 @@ NOTE: Don't use this option when the destination is a Solaris "tmpfs" filesystem. It seems to have problems seeking over null regions, and ends up corrupting the files. +dit(bf(--preallocate)) This tells the receiver to allocate each destination +file to its eventual size before writing data to the file. Rsync will only use +the real filesystem-level preallocation support provided by Linux's +bf(fallocate)(2) system call or Cygwin's bf(posix_fallocate)(3), not the slow +glibc implementation that writes a zero byte into each block. + +Without this option, larger files may not be entirely contiguous on the +filesystem, but with this option rsync will probably copy more slowly. If the +destination is not an extent-supporting filesystem (such as ext4, xfs, NTFS, +etc.), this option may have no positive effect at all. + dit(bf(-n, --dry-run)) This makes rsync perform a trial run that doesn't make any changes (and produces mostly the same output as a real run). It is most commonly used in combination with the bf(-v, --verbose) and/or diff --git a/syscall.c b/syscall.c --- a/syscall.c +++ b/syscall.c @@ -29,6 +29,10 @@ #include #endif +#if defined HAVE_SYS_FALLOCATE && !defined HAVE_FALLOCATE +#include +#endif + extern int dry_run; extern int am_root; extern int am_sender; @@ -325,3 +329,25 @@ OFF_T do_lseek(int fd, OFF_T offset, int whence) return lseek(fd, offset, whence); #endif } + +#ifdef SUPPORT_PREALLOCATION +int do_fallocate(int fd, OFF_T offset, OFF_T length) +{ +#ifdef FALLOC_FL_KEEP_SIZE +#define DO_FALLOC_OPTIONS FALLOC_FL_KEEP_SIZE +#else +#define DO_FALLOC_OPTIONS 0 +#endif + RETURN_ERROR_IF(dry_run, 0); + RETURN_ERROR_IF_RO_OR_LO; +#if defined HAVE_FALLOCATE + return fallocate(fd, DO_FALLOC_OPTIONS, offset, length); +#elif defined HAVE_SYS_FALLOCATE + return syscall(SYS_fallocate, fd, DO_FALLOC_OPTIONS, (loff_t)offset, (loff_t)length); +#elif defined HAVE_EFFICIENT_POSIX_FALLOCATE + return posix_fallocate(fd, offset, length); +#else +#error Coding error in SUPPORT_PREALLOCATION logic. +#endif +} +#endif diff --git a/t_stub.c b/t_stub.c --- a/t_stub.c +++ b/t_stub.c @@ -22,6 +22,7 @@ #include "rsync.h" int modify_window = 0; +int preallocate_files = 0; int module_id = -1; int relative_paths = 0; int module_dirlen = 0; diff --git a/util.c b/util.c --- a/util.c +++ b/util.c @@ -26,6 +26,7 @@ #include "inums.h" extern int dry_run; +extern int preallocate_files; extern int module_id; extern int modify_window; extern int relative_paths; @@ -332,6 +333,9 @@ int copy_file(const char *source, const char *dest, int ofd, mode_t mode) int ifd; char buf[1024 * 8]; int len; /* Number of bytes read into `buf'. */ +#ifdef PREALLOCATE_NEEDS_TRUNCATE + OFF_T preallocated_len = 0, offset = 0; +#endif if ((ifd = do_open(source, O_RDONLY, 0)) < 0) { int save_errno = errno; @@ -357,6 +361,25 @@ int copy_file(const char *source, const char *dest, int ofd, mode_t mode) } } +#ifdef SUPPORT_PREALLOCATION + if (preallocate_files) { + STRUCT_STAT srcst; + + /* Try to preallocate enough space for file's eventual length. Can + * reduce fragmentation on filesystems like ext4, xfs, and NTFS. */ + if (do_fstat(ifd, &srcst) < 0) + rsyserr(FWARNING, errno, "fstat %s", full_fname(source)); + else if (srcst.st_size > 0) { + if (do_fallocate(ofd, 0, srcst.st_size) == 0) { +#ifdef PREALLOCATE_NEEDS_TRUNCATE + preallocated_len = srcst.st_size; +#endif + } else + rsyserr(FWARNING, errno, "do_fallocate %s", full_fname(dest)); + } + } +#endif + while ((len = safe_read(ifd, buf, sizeof buf)) > 0) { if (full_write(ofd, buf, len) < 0) { int save_errno = errno; @@ -366,6 +389,9 @@ int copy_file(const char *source, const char *dest, int ofd, mode_t mode) errno = save_errno; return -1; } +#ifdef PREALLOCATE_NEEDS_TRUNCATE + offset += len; +#endif } if (len < 0) { @@ -382,6 +408,16 @@ int copy_file(const char *source, const char *dest, int ofd, mode_t mode) full_fname(source)); } +#ifdef PREALLOCATE_NEEDS_TRUNCATE + /* Source file might have shrunk since we fstatted it. + * Cut off any extra preallocated zeros from dest file. */ + if (offset < preallocated_len && ftruncate(ofd, offset) < 0) { + /* If we fail to truncate, the dest file may be wrong, so we + * must trigger the "partial transfer" error. */ + rsyserr(FERROR_XFER, errno, "ftruncate %s", full_fname(dest)); + } +#endif + if (close(ofd) < 0) { int save_errno = errno; rsyserr(FERROR_XFER, errno, "close failed on %s",