| 1 | This patch adds the --preallocate option that asks rsync to preallocate the |
| 2 | copied files. This slows down the copy, but should reduce fragmentation on |
| 3 | systems that need that. |
| 4 | |
| 5 | To use this patch, run these commands for a successful build: |
| 6 | |
| 7 | patch -p1 <patches/preallocate.diff |
| 8 | ./prepare-source |
| 9 | ./configure |
| 10 | make |
| 11 | |
| 12 | based-on: 24079e988fc31af4eba56cd2701fdc5a4154980d |
| 13 | diff --git a/compat.c b/compat.c |
| 14 | --- a/compat.c |
| 15 | +++ b/compat.c |
| 16 | @@ -33,6 +33,7 @@ extern int inplace; |
| 17 | extern int recurse; |
| 18 | extern int use_qsort; |
| 19 | extern int allow_inc_recurse; |
| 20 | +extern int preallocate_files; |
| 21 | extern int append_mode; |
| 22 | extern int fuzzy_basis; |
| 23 | extern int read_batch; |
| 24 | @@ -188,6 +189,14 @@ void setup_protocol(int f_out,int f_in) |
| 25 | if (read_batch) |
| 26 | check_batch_flags(); |
| 27 | |
| 28 | +#ifndef SUPPORT_PREALLOCATION |
| 29 | + if (preallocate_files && !am_sender) { |
| 30 | + rprintf(FERROR, "preallocation is not supported on this %s\n", |
| 31 | + am_server ? "Server" : "Client"); |
| 32 | + exit_cleanup(RERR_SYNTAX); |
| 33 | + } |
| 34 | +#endif |
| 35 | + |
| 36 | if (protocol_version < 30) { |
| 37 | if (append_mode == 1) |
| 38 | append_mode = 2; |
| 39 | diff --git a/configure.in b/configure.in |
| 40 | --- a/configure.in |
| 41 | +++ b/configure.in |
| 42 | @@ -583,13 +583,49 @@ AC_CHECK_FUNCS(waitpid wait4 getcwd strdup chown chmod lchmod mknod mkfifo \ |
| 43 | setlocale setmode open64 lseek64 mkstemp64 mtrace va_copy __va_copy \ |
| 44 | seteuid strerror putenv iconv_open locale_charset nl_langinfo getxattr \ |
| 45 | extattr_get_link sigaction sigprocmask setattrlist getgrouplist \ |
| 46 | - initgroups utimensat) |
| 47 | + initgroups utimensat posix_fallocate) |
| 48 | |
| 49 | dnl cygwin iconv.h defines iconv_open as libiconv_open |
| 50 | if test x"$ac_cv_func_iconv_open" != x"yes"; then |
| 51 | AC_CHECK_FUNC(libiconv_open, [ac_cv_func_iconv_open=yes; AC_DEFINE(HAVE_ICONV_OPEN, 1)]) |
| 52 | fi |
| 53 | |
| 54 | +dnl Preallocation stuff (also fallocate, posix_fallocate function tests above): |
| 55 | + |
| 56 | +AC_CACHE_CHECK([for useable fallocate],rsync_cv_have_fallocate,[ |
| 57 | +AC_TRY_LINK([#include <sys/syscall.h> |
| 58 | +#include <sys/types.h>], |
| 59 | +[syscall(fallocate, 0, 0, 0, 0);], |
| 60 | +rsync_cv_have_fallocate=yes,rsync_cv_have_fallocate=no)]) |
| 61 | +if test x"$rsync_cv_have_fallocate" = x"yes"; then |
| 62 | + AC_DEFINE(HAVE_FALLOCATE, 1, [Define to 1 if you have the fallocate function and it compiles and links without error]) |
| 63 | +fi |
| 64 | + |
| 65 | +AC_CACHE_CHECK([for SYS_fallocate],rsync_cv_have_sys_fallocate,[ |
| 66 | +AC_TRY_COMPILE([#include <sys/syscall.h> |
| 67 | +#include <sys/types.h>], |
| 68 | +[syscall(SYS_fallocate, 0, 0, 0, 0);], |
| 69 | +rsync_cv_have_sys_fallocate=yes,rsync_cv_have_sys_fallocate=no)]) |
| 70 | +if test x"$rsync_cv_have_sys_fallocate" = x"yes"; then |
| 71 | + AC_DEFINE(HAVE_SYS_FALLOCATE, 1, [Define to 1 if you have the SYS_fallocate syscall number]) |
| 72 | +fi |
| 73 | + |
| 74 | +if test x"$ac_cv_func_posix_fallocate" = x"yes"; then |
| 75 | + AC_MSG_CHECKING([whether posix_fallocate is efficient]) |
| 76 | + case $host_os in |
| 77 | + *cygwin*) |
| 78 | + AC_MSG_RESULT(yes) |
| 79 | + AC_DEFINE(HAVE_EFFICIENT_POSIX_FALLOCATE, 1, |
| 80 | + [Define if posix_fallocate is efficient (Cygwin)]) |
| 81 | + ;; |
| 82 | + *) |
| 83 | + AC_MSG_RESULT(no) |
| 84 | + ;; |
| 85 | + esac |
| 86 | +fi |
| 87 | + |
| 88 | +dnl End of preallocation stuff |
| 89 | + |
| 90 | AC_CHECK_FUNCS(getpgrp tcgetpgrp) |
| 91 | if test $ac_cv_func_getpgrp = yes; then |
| 92 | AC_FUNC_GETPGRP |
| 93 | diff --git a/options.c b/options.c |
| 94 | --- a/options.c |
| 95 | +++ b/options.c |
| 96 | @@ -73,6 +73,7 @@ int remove_source_files = 0; |
| 97 | int one_file_system = 0; |
| 98 | int protocol_version = PROTOCOL_VERSION; |
| 99 | int sparse_files = 0; |
| 100 | +int preallocate_files = 0; |
| 101 | int do_compression = 0; |
| 102 | int def_compress_level = Z_DEFAULT_COMPRESSION; |
| 103 | int am_root = 0; /* 0 = normal, 1 = root, 2 = --super, -1 = --fake-super */ |
| 104 | @@ -567,6 +568,7 @@ static void print_rsync_version(enum logcode f) |
| 105 | char const *links = "no "; |
| 106 | char const *iconv = "no "; |
| 107 | char const *ipv6 = "no "; |
| 108 | + char const *preallocation = "no "; |
| 109 | STRUCT_STAT *dumstat; |
| 110 | |
| 111 | #if SUBPROTOCOL_VERSION != 0 |
| 112 | @@ -600,6 +602,9 @@ static void print_rsync_version(enum logcode f) |
| 113 | #ifdef CAN_SET_SYMLINK_TIMES |
| 114 | symtimes = ""; |
| 115 | #endif |
| 116 | +#ifdef SUPPORT_PREALLOCATION |
| 117 | + preallocation = ""; |
| 118 | +#endif |
| 119 | |
| 120 | rprintf(f, "%s version %s protocol version %d%s\n", |
| 121 | RSYNC_NAME, RSYNC_VERSION, PROTOCOL_VERSION, subprotocol); |
| 122 | @@ -613,8 +618,8 @@ static void print_rsync_version(enum logcode f) |
| 123 | (int)(sizeof (int64) * 8)); |
| 124 | rprintf(f, " %ssocketpairs, %shardlinks, %ssymlinks, %sIPv6, batchfiles, %sinplace,\n", |
| 125 | got_socketpair, hardlinks, links, ipv6, have_inplace); |
| 126 | - rprintf(f, " %sappend, %sACLs, %sxattrs, %siconv, %ssymtimes\n", |
| 127 | - have_inplace, acls, xattrs, iconv, symtimes); |
| 128 | + rprintf(f, " %sappend, %sACLs, %sxattrs, %siconv, %ssymtimes, %spreallocation\n", |
| 129 | + have_inplace, acls, xattrs, iconv, symtimes, preallocation); |
| 130 | |
| 131 | #ifdef MAINTAINER_MODE |
| 132 | rprintf(f, "Panic Action: \"%s\"\n", get_panic_action()); |
| 133 | @@ -704,6 +709,11 @@ void usage(enum logcode F) |
| 134 | rprintf(F," --fake-super store/recover privileged attrs using xattrs\n"); |
| 135 | #endif |
| 136 | rprintf(F," -S, --sparse handle sparse files efficiently\n"); |
| 137 | +#ifdef SUPPORT_PREALLOCATION |
| 138 | + rprintf(F," --preallocate allocate dest files before writing them\n"); |
| 139 | +#else |
| 140 | + rprintf(F," --preallocate pre-allocate dest files on remote receiver\n"); |
| 141 | +#endif |
| 142 | rprintf(F," -n, --dry-run perform a trial run with no changes made\n"); |
| 143 | rprintf(F," -W, --whole-file copy files whole (without delta-xfer algorithm)\n"); |
| 144 | rprintf(F," -x, --one-file-system don't cross filesystem boundaries\n"); |
| 145 | @@ -900,6 +910,7 @@ static struct poptOption long_options[] = { |
| 146 | {"sparse", 'S', POPT_ARG_VAL, &sparse_files, 1, 0, 0 }, |
| 147 | {"no-sparse", 0, POPT_ARG_VAL, &sparse_files, 0, 0, 0 }, |
| 148 | {"no-S", 0, POPT_ARG_VAL, &sparse_files, 0, 0, 0 }, |
| 149 | + {"preallocate", 0, POPT_ARG_NONE, &preallocate_files, 0, 0, 0}, |
| 150 | {"inplace", 0, POPT_ARG_VAL, &inplace, 1, 0, 0 }, |
| 151 | {"no-inplace", 0, POPT_ARG_VAL, &inplace, 0, 0, 0 }, |
| 152 | {"append", 0, POPT_ARG_NONE, 0, OPT_APPEND, 0, 0 }, |
| 153 | @@ -2646,6 +2657,9 @@ void server_options(char **args, int *argc_p) |
| 154 | else if (remove_source_files) |
| 155 | args[ac++] = "--remove-sent-files"; |
| 156 | |
| 157 | + if (preallocate_files && am_sender) |
| 158 | + args[ac++] = "--preallocate"; |
| 159 | + |
| 160 | if (ac > MAX_SERVER_ARGS) { /* Not possible... */ |
| 161 | rprintf(FERROR, "argc overflow in server_options().\n"); |
| 162 | exit_cleanup(RERR_MALLOC); |
| 163 | diff --git a/receiver.c b/receiver.c |
| 164 | --- a/receiver.c |
| 165 | +++ b/receiver.c |
| 166 | @@ -44,6 +44,7 @@ extern int cleanup_got_literal; |
| 167 | extern int remove_source_files; |
| 168 | extern int append_mode; |
| 169 | extern int sparse_files; |
| 170 | +extern int preallocate_files; |
| 171 | extern int keep_partial; |
| 172 | extern int checksum_len; |
| 173 | extern int checksum_seed; |
| 174 | @@ -207,6 +208,22 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r, |
| 175 | char *data; |
| 176 | int32 i; |
| 177 | char *map = NULL; |
| 178 | +#ifdef SUPPORT_PREALLOCATION |
| 179 | +#ifdef PREALLOCATE_NEEDS_TRUNCATE |
| 180 | + OFF_T preallocated_len = 0; |
| 181 | +#endif |
| 182 | + |
| 183 | + if (preallocate_files && fd != -1 && total_size > 0) { |
| 184 | + /* Try to preallocate enough space for file's eventual length. Can |
| 185 | + * reduce fragmentation on filesystems like ext4, xfs, and NTFS. */ |
| 186 | + if (do_fallocate(fd, 0, total_size) == 0) { |
| 187 | +#ifdef PREALLOCATE_NEEDS_TRUNCATE |
| 188 | + preallocated_len = total_size; |
| 189 | +#endif |
| 190 | + } else |
| 191 | + rsyserr(FWARNING, errno, "do_fallocate %s", full_fname(fname)); |
| 192 | + } |
| 193 | +#endif |
| 194 | |
| 195 | read_sum_head(f_in, &sum); |
| 196 | |
| 197 | @@ -317,8 +334,14 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r, |
| 198 | goto report_write_error; |
| 199 | |
| 200 | #ifdef HAVE_FTRUNCATE |
| 201 | - if (inplace && fd != -1 |
| 202 | - && ftruncate(fd, offset) < 0) { |
| 203 | + /* inplace: New data could be shorter than old data. |
| 204 | + * preallocate_files: total_size could have been an overestimate. |
| 205 | + * Cut off any extra preallocated zeros from dest file. */ |
| 206 | + if ((inplace |
| 207 | +#ifdef PREALLOCATE_NEEDS_TRUNCATE |
| 208 | + || preallocated_len > offset |
| 209 | +#endif |
| 210 | + ) && fd != -1 && ftruncate(fd, offset) < 0) { |
| 211 | rsyserr(FERROR_XFER, errno, "ftruncate failed on %s", |
| 212 | full_fname(fname)); |
| 213 | } |
| 214 | diff --git a/rsync.h b/rsync.h |
| 215 | --- a/rsync.h |
| 216 | +++ b/rsync.h |
| 217 | @@ -646,6 +646,21 @@ struct ht_int64_node { |
| 218 | #define ACLS_NEED_MASK 1 |
| 219 | #endif |
| 220 | |
| 221 | +#if defined HAVE_FALLOCATE || HAVE_SYS_FALLOCATE |
| 222 | +#include <linux/falloc.h> |
| 223 | +#ifdef FALLOC_FL_KEEP_SIZE |
| 224 | +#define SUPPORT_PREALLOCATION 1 |
| 225 | +#elif defined HAVE_FTRUNCATE |
| 226 | +#define SUPPORT_PREALLOCATION 1 |
| 227 | +#define PREALLOCATE_NEEDS_TRUNCATE 1 |
| 228 | +#endif |
| 229 | +#else /* !fallocate */ |
| 230 | +#if defined HAVE_EFFICIENT_POSIX_FALLOCATE && defined HAVE_FTRUNCATE |
| 231 | +#define SUPPORT_PREALLOCATION 1 |
| 232 | +#define PREALLOCATE_NEEDS_TRUNCATE 1 |
| 233 | +#endif |
| 234 | +#endif |
| 235 | + |
| 236 | union file_extras { |
| 237 | int32 num; |
| 238 | uint32 unum; |
| 239 | diff --git a/rsync.yo b/rsync.yo |
| 240 | --- a/rsync.yo |
| 241 | +++ b/rsync.yo |
| 242 | @@ -359,6 +359,7 @@ to the detailed description below for a complete description. verb( |
| 243 | --super receiver attempts super-user activities |
| 244 | --fake-super store/recover privileged attrs using xattrs |
| 245 | -S, --sparse handle sparse files efficiently |
| 246 | + --preallocate allocate dest files before writing |
| 247 | -n, --dry-run perform a trial run with no changes made |
| 248 | -W, --whole-file copy files whole (w/o delta-xfer algorithm) |
| 249 | -x, --one-file-system don't cross filesystem boundaries |
| 250 | @@ -1127,6 +1128,17 @@ NOTE: Don't use this option when the destination is a Solaris "tmpfs" |
| 251 | filesystem. It seems to have problems seeking over null regions, |
| 252 | and ends up corrupting the files. |
| 253 | |
| 254 | +dit(bf(--preallocate)) This tells the receiver to allocate each destination |
| 255 | +file to its eventual size before writing data to the file. Rsync will only use |
| 256 | +the real filesystem-level preallocation support provided by Linux's |
| 257 | +bf(fallocate)(2) system call or Cygwin's bf(posix_fallocate)(3), not the slow |
| 258 | +glibc implementation that writes a zero byte into each block. |
| 259 | + |
| 260 | +Without this option, larger files may not be entirely contiguous on the |
| 261 | +filesystem, but with this option rsync will probably copy more slowly. If the |
| 262 | +destination is not an extent-supporting filesystem (such as ext4, xfs, NTFS, |
| 263 | +etc.), this option may have no positive effect at all. |
| 264 | + |
| 265 | dit(bf(-n, --dry-run)) This makes rsync perform a trial run that doesn't |
| 266 | make any changes (and produces mostly the same output as a real run). It |
| 267 | is most commonly used in combination with the bf(-v, --verbose) and/or |
| 268 | diff --git a/syscall.c b/syscall.c |
| 269 | --- a/syscall.c |
| 270 | +++ b/syscall.c |
| 271 | @@ -29,6 +29,10 @@ |
| 272 | #include <sys/attr.h> |
| 273 | #endif |
| 274 | |
| 275 | +#if defined HAVE_SYS_FALLOCATE && !defined HAVE_FALLOCATE |
| 276 | +#include <sys/syscall.h> |
| 277 | +#endif |
| 278 | + |
| 279 | extern int dry_run; |
| 280 | extern int am_root; |
| 281 | extern int am_sender; |
| 282 | @@ -325,3 +329,25 @@ OFF_T do_lseek(int fd, OFF_T offset, int whence) |
| 283 | return lseek(fd, offset, whence); |
| 284 | #endif |
| 285 | } |
| 286 | + |
| 287 | +#ifdef SUPPORT_PREALLOCATION |
| 288 | +int do_fallocate(int fd, OFF_T offset, OFF_T length) |
| 289 | +{ |
| 290 | +#ifdef FALLOC_FL_KEEP_SIZE |
| 291 | +#define DO_FALLOC_OPTIONS FALLOC_FL_KEEP_SIZE |
| 292 | +#else |
| 293 | +#define DO_FALLOC_OPTIONS 0 |
| 294 | +#endif |
| 295 | + RETURN_ERROR_IF(dry_run, 0); |
| 296 | + RETURN_ERROR_IF_RO_OR_LO; |
| 297 | +#if defined HAVE_FALLOCATE |
| 298 | + return fallocate(fd, DO_FALLOC_OPTIONS, offset, length); |
| 299 | +#elif defined HAVE_SYS_FALLOCATE |
| 300 | + return syscall(SYS_fallocate, fd, DO_FALLOC_OPTIONS, (loff_t)offset, (loff_t)length); |
| 301 | +#elif defined HAVE_EFFICIENT_POSIX_FALLOCATE |
| 302 | + return posix_fallocate(fd, offset, length); |
| 303 | +#else |
| 304 | +#error Coding error in SUPPORT_PREALLOCATION logic. |
| 305 | +#endif |
| 306 | +} |
| 307 | +#endif |
| 308 | diff --git a/t_stub.c b/t_stub.c |
| 309 | --- a/t_stub.c |
| 310 | +++ b/t_stub.c |
| 311 | @@ -22,6 +22,7 @@ |
| 312 | #include "rsync.h" |
| 313 | |
| 314 | int modify_window = 0; |
| 315 | +int preallocate_files = 0; |
| 316 | int module_id = -1; |
| 317 | int relative_paths = 0; |
| 318 | int module_dirlen = 0; |
| 319 | diff --git a/util.c b/util.c |
| 320 | --- a/util.c |
| 321 | +++ b/util.c |
| 322 | @@ -26,6 +26,7 @@ |
| 323 | #include "inums.h" |
| 324 | |
| 325 | extern int dry_run; |
| 326 | +extern int preallocate_files; |
| 327 | extern int module_id; |
| 328 | extern int modify_window; |
| 329 | extern int relative_paths; |
| 330 | @@ -332,6 +333,9 @@ int copy_file(const char *source, const char *dest, int ofd, mode_t mode) |
| 331 | int ifd; |
| 332 | char buf[1024 * 8]; |
| 333 | int len; /* Number of bytes read into `buf'. */ |
| 334 | +#ifdef PREALLOCATE_NEEDS_TRUNCATE |
| 335 | + OFF_T preallocated_len = 0, offset = 0; |
| 336 | +#endif |
| 337 | |
| 338 | if ((ifd = do_open(source, O_RDONLY, 0)) < 0) { |
| 339 | int save_errno = errno; |
| 340 | @@ -357,6 +361,25 @@ int copy_file(const char *source, const char *dest, int ofd, mode_t mode) |
| 341 | } |
| 342 | } |
| 343 | |
| 344 | +#ifdef SUPPORT_PREALLOCATION |
| 345 | + if (preallocate_files) { |
| 346 | + STRUCT_STAT srcst; |
| 347 | + |
| 348 | + /* Try to preallocate enough space for file's eventual length. Can |
| 349 | + * reduce fragmentation on filesystems like ext4, xfs, and NTFS. */ |
| 350 | + if (do_fstat(ifd, &srcst) < 0) |
| 351 | + rsyserr(FWARNING, errno, "fstat %s", full_fname(source)); |
| 352 | + else if (srcst.st_size > 0) { |
| 353 | + if (do_fallocate(ofd, 0, srcst.st_size) == 0) { |
| 354 | +#ifdef PREALLOCATE_NEEDS_TRUNCATE |
| 355 | + preallocated_len = srcst.st_size; |
| 356 | +#endif |
| 357 | + } else |
| 358 | + rsyserr(FWARNING, errno, "do_fallocate %s", full_fname(dest)); |
| 359 | + } |
| 360 | + } |
| 361 | +#endif |
| 362 | + |
| 363 | while ((len = safe_read(ifd, buf, sizeof buf)) > 0) { |
| 364 | if (full_write(ofd, buf, len) < 0) { |
| 365 | int save_errno = errno; |
| 366 | @@ -366,6 +389,9 @@ int copy_file(const char *source, const char *dest, int ofd, mode_t mode) |
| 367 | errno = save_errno; |
| 368 | return -1; |
| 369 | } |
| 370 | +#ifdef PREALLOCATE_NEEDS_TRUNCATE |
| 371 | + offset += len; |
| 372 | +#endif |
| 373 | } |
| 374 | |
| 375 | if (len < 0) { |
| 376 | @@ -382,6 +408,16 @@ int copy_file(const char *source, const char *dest, int ofd, mode_t mode) |
| 377 | full_fname(source)); |
| 378 | } |
| 379 | |
| 380 | +#ifdef PREALLOCATE_NEEDS_TRUNCATE |
| 381 | + /* Source file might have shrunk since we fstatted it. |
| 382 | + * Cut off any extra preallocated zeros from dest file. */ |
| 383 | + if (offset < preallocated_len && ftruncate(ofd, offset) < 0) { |
| 384 | + /* If we fail to truncate, the dest file may be wrong, so we |
| 385 | + * must trigger the "partial transfer" error. */ |
| 386 | + rsyserr(FERROR_XFER, errno, "ftruncate %s", full_fname(dest)); |
| 387 | + } |
| 388 | +#endif |
| 389 | + |
| 390 | if (close(ofd) < 0) { |
| 391 | int save_errno = errno; |
| 392 | rsyserr(FERROR_XFER, errno, "close failed on %s", |