From c966e8e87aa9b4edb5f6bb6cab0660de8354a637 Mon Sep 17 00:00:00 2001 From: Wayne Davison Date: Fri, 4 May 2007 18:21:00 +0000 Subject: [PATCH] Tobias Oetiker's patch for a --drop-cache option. --- drop-cache.diff | 487 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 487 insertions(+) create mode 100644 drop-cache.diff diff --git a/drop-cache.diff b/drop-cache.diff new file mode 100644 index 0000000..4cec878 --- /dev/null +++ b/drop-cache.diff @@ -0,0 +1,487 @@ +From: Tobi Oetiker tobi{at}oetiker.ch +Date: 2007-04-23 + +I am using rsync for hard-link backup. I found that there is a +major problem with frequent backup filling up the file system cache +with all the data from the files being backed up. The effect is +that all the other 'sensible' data in the cache gets thrown out in +the process. This is rather unfortunate as the performance of the +system becomes very bad after running rsync. + +Some research showed, that + + posix_fadvise64(fd, 0, 0,POSIX_FADV_DONTNEED); + +would tell the OS that it should not keep the file in cache. I +have written a patch for rsync that adds the + + --drop-cache + +option which activates posix_fadvise64. + +There are some caveats though: + + * When calling posix_fadvise64 while writing a file, only the + part of the cache will be release which has already been + written to disk. This means we have to call fdatasync before + calling posix_fadvise64 and this will unfortunately slow down + operations considerably. On my test system I get 240 KByte/s. + + The patch has been optimized, so that the impact on large files + will be considerably lowered by calling posix_fadvise64 only + after a few megabytes have been written. + + * When reading a file which has been cached *Before* rsync read + it, the content of the file will be released from cache never + the less, which may not be intended. I have unfortunately not + found a method for determining if a file is in cache or not + (ideas?) + + I found that running rsync of an lvm snapshot is a good way + around this problem, since the snapshot data is cached + separately from the original. It has the additional benefit of + making the backups more consistent. + + * I don't really know the rsync code, so it may be that the patch + is calling fadvise for files where this would not be necessary. + + * The patch is tested only on Linux 2.6.18 + +If you have any input on this, please let me know. + +You can get the latest edition of the patch from + + http://tobi.oetiker.ch/patches/ + +cheers +tobi + +Changes: + + 2007-04-23 + +* pass --drop-cache on to the remote server +* make test works now + +--- old/checksum.c ++++ new/checksum.c +@@ -148,7 +148,7 @@ void file_checksum(char *fname, char *su + mdfour_result(&m, (uchar *)sum); + } + +- close(fd); ++ fadv_close(fd); + unmap_file(buf); + } + +--- old/fileio.c ++++ new/fileio.c +@@ -26,15 +26,18 @@ + #endif + + extern int sparse_files; +- + static char last_byte; + static int last_sparse; + ++extern int drop_cache; ++ ++ ++ + int sparse_end(int f) + { + if (last_sparse) { + do_lseek(f,-1,SEEK_CUR); +- return (write(f,&last_byte,1) == 1 ? 0 : -1); ++ return (fadv_write(f,&last_byte,1) == 1 ? 0 : -1); + } + last_sparse = 0; + return 0; +@@ -61,7 +64,7 @@ static int write_sparse(int f,char *buf, + if (l1 == len) + return len; + +- ret = write(f, buf + l1, len - (l1+l2)); ++ ret = fadv_write(f, buf + l1, len - (l1+l2)); + if (ret == -1 || ret == 0) + return ret; + else if (ret != (int) (len - (l1+l2))) +@@ -84,7 +87,7 @@ int flush_write_file(int f) + char *bp = wf_writeBuf; + + while (wf_writeBufCnt > 0) { +- if ((ret = write(f, bp, wf_writeBufCnt)) < 0) { ++ if ((ret = fadv_write(f, bp, wf_writeBufCnt)) < 0) { + if (errno == EINTR) + continue; + return ret; +@@ -235,7 +238,7 @@ char *map_ptr(struct map_struct *map, OF + map->p_len = window_size; + + while (read_size > 0) { +- nread = read(map->fd, map->p + read_offset, read_size); ++ nread = fadv_read(map->fd, map->p + read_offset, read_size); + if (nread <= 0) { + if (!map->status) + map->status = nread ? errno : ENODATA; +--- old/generator.c ++++ new/generator.c +@@ -1614,18 +1614,18 @@ static void recv_generator(char *fname, + + if (inplace && make_backups > 0 && fnamecmp_type == FNAMECMP_FNAME) { + if (!(backupptr = get_backup_name(fname))) { +- close(fd); ++ fadv_close(fd); + goto cleanup; + } + if (!(back_file = make_file(fname, NULL, NULL, 0, NO_FILTERS))) { +- close(fd); ++ fadv_close(fd); + goto pretend_missing; + } + if (robust_unlink(backupptr) && errno != ENOENT) { + rsyserr(FERROR, errno, "unlink %s", + full_fname(backupptr)); + unmake_file(back_file); +- close(fd); ++ fadv_close(fd); + goto cleanup; + } + if ((f_copy = do_open(backupptr, +@@ -1633,7 +1633,7 @@ static void recv_generator(char *fname, + rsyserr(FERROR, errno, "open %s", + full_fname(backupptr)); + unmake_file(back_file); +- close(fd); ++ fadv_close(fd); + goto cleanup; + } + fnamecmp_type = FNAMECMP_BACKUP; +@@ -1695,7 +1695,7 @@ static void recv_generator(char *fname, + generate_and_send_sums(fd, sx.st.st_size, f_out, f_copy); + + if (f_copy >= 0) { +- close(f_copy); ++ fadv_close(f_copy); + set_file_attrs(backupptr, back_file, NULL, NULL, 0); + if (verbose > 1) { + rprintf(FINFO, "backed up %s to %s\n", +@@ -1704,7 +1704,7 @@ static void recv_generator(char *fname, + unmake_file(back_file); + } + +- close(fd); ++ fadv_close(fd); + + cleanup: + #ifdef SUPPORT_ACLS +--- old/options.c ++++ new/options.c +@@ -57,6 +57,7 @@ int preserve_gid = 0; + int preserve_times = 0; + int omit_dir_times = 0; + int update_only = 0; ++int drop_cache = 0; + int cvs_exclude = 0; + int dry_run = 0; + int do_xfers = 1; +@@ -310,6 +311,7 @@ void usage(enum logcode F) + rprintf(F," --backup-dir=DIR make backups into hierarchy based in DIR\n"); + rprintf(F," --suffix=SUFFIX set backup suffix (default %s w/o --backup-dir)\n",BACKUP_SUFFIX); + rprintf(F," -u, --update skip files that are newer on the receiver\n"); ++ rprintf(F," --drop-cache tell OS to drop caching of file data\n"); + rprintf(F," --inplace update destination files in-place (SEE MAN PAGE)\n"); + rprintf(F," --append append data onto shorter files\n"); + rprintf(F," -d, --dirs transfer directories without recursing\n"); +@@ -506,6 +508,7 @@ static struct poptOption long_options[] + {"size-only", 0, POPT_ARG_NONE, &size_only, 0, 0, 0 }, + {"one-file-system", 'x', POPT_ARG_NONE, 0, 'x', 0, 0 }, + {"update", 'u', POPT_ARG_NONE, &update_only, 0, 0, 0 }, ++ {"drop-cache", 0, POPT_ARG_NONE, &drop_cache, 0, 0, 0 }, + {"existing", 0, POPT_ARG_NONE, &ignore_non_existing, 0, 0, 0 }, + {"ignore-non-existing",0,POPT_ARG_NONE, &ignore_non_existing, 0, 0, 0 }, + {"ignore-existing", 0, POPT_ARG_NONE, &ignore_existing, 0, 0, 0 }, +@@ -1603,6 +1606,9 @@ void server_options(char **args,int *arg + if (!am_sender) + args[ac++] = "--sender"; + ++ if (drop_cache) ++ args[ac++] = "--drop-cache"; ++ + x = 1; + argstr[0] = '-'; + for (i = 0; i < verbose; i++) +--- old/receiver.c ++++ new/receiver.c +@@ -554,7 +554,7 @@ int recv_files(int f_in, char *local_nam + rsyserr(FERROR, errno, "fstat %s failed", + full_fname(fnamecmp)); + discard_receive_data(f_in, F_LENGTH(file)); +- close(fd1); ++ fadv_close(fd1); + if (inc_recurse) + send_msg_int(MSG_NO_SEND, ndx); + continue; +@@ -569,14 +569,14 @@ int recv_files(int f_in, char *local_nam + rprintf(FERROR,"recv_files: %s is a directory\n", + full_fname(fnamecmp)); + discard_receive_data(f_in, F_LENGTH(file)); +- close(fd1); ++ fadv_close(fd1); + if (inc_recurse) + send_msg_int(MSG_NO_SEND, ndx); + continue; + } + + if (fd1 != -1 && !S_ISREG(st.st_mode)) { +- close(fd1); ++ fadv_close(fd1); + fd1 = -1; + } + +@@ -604,7 +604,7 @@ int recv_files(int f_in, char *local_nam + full_fname(fname)); + discard_receive_data(f_in, F_LENGTH(file)); + if (fd1 != -1) +- close(fd1); ++ fadv_close(fd1); + if (inc_recurse) + send_msg_int(MSG_NO_SEND, ndx); + continue; +@@ -613,7 +613,7 @@ int recv_files(int f_in, char *local_nam + if (!get_tmpname(fnametmp,fname)) { + discard_receive_data(f_in, F_LENGTH(file)); + if (fd1 != -1) +- close(fd1); ++ fadv_close(fd1); + if (inc_recurse) + send_msg_int(MSG_NO_SEND, ndx); + continue; +@@ -641,7 +641,7 @@ int recv_files(int f_in, char *local_nam + full_fname(fnametmp)); + discard_receive_data(f_in, F_LENGTH(file)); + if (fd1 != -1) +- close(fd1); ++ fadv_close(fd1); + if (inc_recurse) + send_msg_int(MSG_NO_SEND, ndx); + continue; +@@ -663,8 +663,8 @@ int recv_files(int f_in, char *local_nam + log_item(log_code, file, &initial_stats, iflags, NULL); + + if (fd1 != -1) +- close(fd1); +- if (close(fd2) < 0) { ++ fadv_close(fd1); ++ if (fadv_close(fd2) < 0) { + rsyserr(FERROR, errno, "close failed on %s", + full_fname(fnametmp)); + exit_cleanup(RERR_FILEIO); +--- old/rsync.yo ++++ new/rsync.yo +@@ -335,6 +335,7 @@ to the detailed description below for a + --super receiver attempts super-user activities + --fake-super store/recover privileged attrs using xattrs + -S, --sparse handle sparse files efficiently ++ --drop-cache tell OS to drop caching of file data + -n, --dry-run show what would have been transferred + -W, --whole-file copy files whole (without rsync algorithm) + -x, --one-file-system don't cross filesystem boundaries +@@ -956,6 +957,10 @@ NOTE: Don't use this option when the des + filesystem. It doesn't seem to handle seeks over null regions + correctly and ends up corrupting the files. + ++dit(bf(--drop-cache)) Tell the OS to drop the caching of the file data. This ++prevents rsync from filling up the filesystem cache. This can sometimes help ++to make a system perform better by keeping non-rsync files in the disk cache. ++ + dit(bf(-n, --dry-run)) This tells rsync to not do any file transfers, + instead it will just report the actions it would have taken. + +--- old/sender.c ++++ new/sender.c +@@ -307,7 +307,7 @@ void send_files(int f_in, int f_out) + io_error |= IOERR_GENERAL; + rsyserr(FERROR, errno, "fstat failed"); + free_sums(s); +- close(fd); ++ fadv_close(fd); + exit_cleanup(RERR_PROTOCOL); + } + +@@ -351,7 +351,7 @@ void send_files(int f_in, int f_out) + full_fname(fname)); + } + } +- close(fd); ++ fadv_close(fd); + + free_sums(s); + +--- old/t_unsafe.c ++++ new/t_unsafe.c +@@ -28,6 +28,7 @@ int am_root = 0; + int read_only = 0; + int list_only = 0; + int verbose = 0; ++int drop_cache = 0; + int preserve_perms = 0; + + int +--- old/util.c ++++ new/util.c +@@ -24,6 +24,7 @@ + + extern int verbose; + extern int dry_run; ++extern int drop_cache; + extern int module_id; + extern int modify_window; + extern int relative_paths; +@@ -39,6 +40,88 @@ char curr_dir[MAXPATHLEN]; + unsigned int curr_dir_len; + int curr_dir_depth; /* This is only set for a sanitizing daemon. */ + ++extern int drop_cache; ++ ++static struct stat fadv_fd_stat[255]; ++static off_t fadv_fd_pos[255]; ++static int fadv_fd_init = 0; ++ ++static void fadv_fd_init_func(void){ ++ if (fadv_fd_init ==0){ ++ int i; ++ fadv_fd_init = 1; ++ for (i=0;i<255;i++){ ++ fadv_fd_pos[i] = 0; ++ fadv_fd_stat[i].st_dev = 0; ++ fadv_fd_stat[i].st_ino = 0; ++ } ++ } ++} ++ ++static void fadv_drop(int fd, int sync){ ++ struct stat stat; ++ /* trail 1 MB behind in dropping. we do this to make ++ sure that the same block or stripe does not have ++ to be written twice */ ++ int pos = lseek(fd,0,SEEK_CUR) - 1024*1024; ++ if (fd > 255){ ++ return; ++ } ++ fadv_fd_init_func(); ++ fstat(fd,&stat); ++ if ( fadv_fd_stat[fd].st_dev == stat.st_dev ++ && fadv_fd_stat[fd].st_ino == stat.st_ino ) { ++ if ( fadv_fd_pos[fd] < pos - 16*1024*1024 ) { ++ if (sync) { ++ /* if the file is not flushed to disk before calling fadvise, ++ then the Cache will not be freed and the advise gets ignored ++ this does give a severe hit on performance. If only there ++ was a way to mark cache so that it gets release once the data ++ is written to disk. */ ++ fdatasync(fd); ++ } ++ posix_fadvise64(fd, 0, pos, POSIX_FADV_DONTNEED); ++ fadv_fd_pos[fd] = pos; ++ } ++ } else { ++ fadv_fd_stat[fd].st_dev = stat.st_dev; ++ fadv_fd_stat[fd].st_ino = stat.st_ino; ++ fadv_fd_pos[fd] = 0; ++ } ++} ++ ++ssize_t fadv_write(int fd, const void *buf, size_t count) ++{ ++ int ret = write(fd, buf, count); ++ if (drop_cache) { ++ fadv_drop(fd,1); ++ } ++ return ret; ++} ++ ++ssize_t fadv_read(int fd, void *buf, size_t count) ++{ ++ int ret = read(fd, buf, count); ++ if (drop_cache) { ++ fadv_drop(fd,0); ++ } ++ return ret; ++} ++ ++int fadv_close(int fd){ ++ if (drop_cache) { ++ /* drop everything after we are done */ ++ /* if the file is not flushed to disk before calling fadvise, ++ then the Cache will not be freed and the advise gets ignored ++ this does give a severe hit on performance. If only there ++ was a way to mark cache so that it gets release once the data ++ is written to disk. */ ++ fdatasync(fd); ++ posix_fadvise64(fd, 0, 0,POSIX_FADV_DONTNEED); ++ } ++ return close(fd); ++} ++ + /* Set a fd into nonblocking mode. */ + void set_nonblocking(int fd) + { +@@ -221,7 +304,7 @@ int full_write(int desc, const char *ptr + + total_written = 0; + while (len > 0) { +- int written = write(desc, ptr, len); ++ int written = fadv_write(desc, ptr, len); + if (written < 0) { + if (errno == EINTR) + continue; +@@ -253,7 +336,7 @@ static int safe_read(int desc, char *ptr + return len; + + do { +- n_chars = read(desc, ptr, len); ++ n_chars = fadv_read(desc, ptr, len); + } while (n_chars < 0 && errno == EINTR); + + return n_chars; +@@ -284,32 +367,32 @@ int copy_file(const char *source, const + ofd = do_open(dest, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, mode); + if (ofd == -1) { + rsyserr(FERROR, errno, "open %s", full_fname(dest)); +- close(ifd); ++ fadv_close(ifd); + return -1; + } + + while ((len = safe_read(ifd, buf, sizeof buf)) > 0) { + if (full_write(ofd, buf, len) < 0) { + rsyserr(FERROR, errno, "write %s", full_fname(dest)); +- close(ifd); +- close(ofd); ++ fadv_close(ifd); ++ fadv_close(ofd); + return -1; + } + } + + if (len < 0) { + rsyserr(FERROR, errno, "read %s", full_fname(source)); +- close(ifd); +- close(ofd); ++ fadv_close(ifd); ++ fadv_close(ofd); + return -1; + } + +- if (close(ifd) < 0) { ++ if (fadv_close(ifd) < 0) { + rsyserr(FINFO, errno, "close failed on %s", + full_fname(source)); + } + +- if (close(ofd) < 0) { ++ if (fadv_close(ofd) < 0) { + rsyserr(FERROR, errno, "close failed on %s", + full_fname(dest)); + return -1; -- 2.34.1