--- /dev/null
+From: Tobi Oetiker tobi{at}oetiker.ch
+Date: 2007-04-23
+
+I am using rsync for hard-link backup. I found that there is a
+major problem with frequent backup filling up the file system cache
+with all the data from the files being backed up. The effect is
+that all the other 'sensible' data in the cache gets thrown out in
+the process. This is rather unfortunate as the performance of the
+system becomes very bad after running rsync.
+
+Some research showed, that
+
+ posix_fadvise64(fd, 0, 0,POSIX_FADV_DONTNEED);
+
+would tell the OS that it should not keep the file in cache. I
+have written a patch for rsync that adds the
+
+ --drop-cache
+
+option which activates posix_fadvise64.
+
+There are some caveats though:
+
+ * When calling posix_fadvise64 while writing a file, only the
+ part of the cache will be release which has already been
+ written to disk. This means we have to call fdatasync before
+ calling posix_fadvise64 and this will unfortunately slow down
+ operations considerably. On my test system I get 240 KByte/s.
+
+ The patch has been optimized, so that the impact on large files
+ will be considerably lowered by calling posix_fadvise64 only
+ after a few megabytes have been written.
+
+ * When reading a file which has been cached *Before* rsync read
+ it, the content of the file will be released from cache never
+ the less, which may not be intended. I have unfortunately not
+ found a method for determining if a file is in cache or not
+ (ideas?)
+
+ I found that running rsync of an lvm snapshot is a good way
+ around this problem, since the snapshot data is cached
+ separately from the original. It has the additional benefit of
+ making the backups more consistent.
+
+ * I don't really know the rsync code, so it may be that the patch
+ is calling fadvise for files where this would not be necessary.
+
+ * The patch is tested only on Linux 2.6.18
+
+If you have any input on this, please let me know.
+
+You can get the latest edition of the patch from
+
+ http://tobi.oetiker.ch/patches/
+
+cheers
+tobi
+
+Changes:
+
+ 2007-04-23
+
+* pass --drop-cache on to the remote server
+* make test works now
+
+--- old/checksum.c
++++ new/checksum.c
+@@ -148,7 +148,7 @@ void file_checksum(char *fname, char *su
+ mdfour_result(&m, (uchar *)sum);
+ }
+
+- close(fd);
++ fadv_close(fd);
+ unmap_file(buf);
+ }
+
+--- old/fileio.c
++++ new/fileio.c
+@@ -26,15 +26,18 @@
+ #endif
+
+ extern int sparse_files;
+-
+ static char last_byte;
+ static int last_sparse;
+
++extern int drop_cache;
++
++
++
+ int sparse_end(int f)
+ {
+ if (last_sparse) {
+ do_lseek(f,-1,SEEK_CUR);
+- return (write(f,&last_byte,1) == 1 ? 0 : -1);
++ return (fadv_write(f,&last_byte,1) == 1 ? 0 : -1);
+ }
+ last_sparse = 0;
+ return 0;
+@@ -61,7 +64,7 @@ static int write_sparse(int f,char *buf,
+ if (l1 == len)
+ return len;
+
+- ret = write(f, buf + l1, len - (l1+l2));
++ ret = fadv_write(f, buf + l1, len - (l1+l2));
+ if (ret == -1 || ret == 0)
+ return ret;
+ else if (ret != (int) (len - (l1+l2)))
+@@ -84,7 +87,7 @@ int flush_write_file(int f)
+ char *bp = wf_writeBuf;
+
+ while (wf_writeBufCnt > 0) {
+- if ((ret = write(f, bp, wf_writeBufCnt)) < 0) {
++ if ((ret = fadv_write(f, bp, wf_writeBufCnt)) < 0) {
+ if (errno == EINTR)
+ continue;
+ return ret;
+@@ -235,7 +238,7 @@ char *map_ptr(struct map_struct *map, OF
+ map->p_len = window_size;
+
+ while (read_size > 0) {
+- nread = read(map->fd, map->p + read_offset, read_size);
++ nread = fadv_read(map->fd, map->p + read_offset, read_size);
+ if (nread <= 0) {
+ if (!map->status)
+ map->status = nread ? errno : ENODATA;
+--- old/generator.c
++++ new/generator.c
+@@ -1614,18 +1614,18 @@ static void recv_generator(char *fname,
+
+ if (inplace && make_backups > 0 && fnamecmp_type == FNAMECMP_FNAME) {
+ if (!(backupptr = get_backup_name(fname))) {
+- close(fd);
++ fadv_close(fd);
+ goto cleanup;
+ }
+ if (!(back_file = make_file(fname, NULL, NULL, 0, NO_FILTERS))) {
+- close(fd);
++ fadv_close(fd);
+ goto pretend_missing;
+ }
+ if (robust_unlink(backupptr) && errno != ENOENT) {
+ rsyserr(FERROR, errno, "unlink %s",
+ full_fname(backupptr));
+ unmake_file(back_file);
+- close(fd);
++ fadv_close(fd);
+ goto cleanup;
+ }
+ if ((f_copy = do_open(backupptr,
+@@ -1633,7 +1633,7 @@ static void recv_generator(char *fname,
+ rsyserr(FERROR, errno, "open %s",
+ full_fname(backupptr));
+ unmake_file(back_file);
+- close(fd);
++ fadv_close(fd);
+ goto cleanup;
+ }
+ fnamecmp_type = FNAMECMP_BACKUP;
+@@ -1695,7 +1695,7 @@ static void recv_generator(char *fname,
+ generate_and_send_sums(fd, sx.st.st_size, f_out, f_copy);
+
+ if (f_copy >= 0) {
+- close(f_copy);
++ fadv_close(f_copy);
+ set_file_attrs(backupptr, back_file, NULL, NULL, 0);
+ if (verbose > 1) {
+ rprintf(FINFO, "backed up %s to %s\n",
+@@ -1704,7 +1704,7 @@ static void recv_generator(char *fname,
+ unmake_file(back_file);
+ }
+
+- close(fd);
++ fadv_close(fd);
+
+ cleanup:
+ #ifdef SUPPORT_ACLS
+--- old/options.c
++++ new/options.c
+@@ -57,6 +57,7 @@ int preserve_gid = 0;
+ int preserve_times = 0;
+ int omit_dir_times = 0;
+ int update_only = 0;
++int drop_cache = 0;
+ int cvs_exclude = 0;
+ int dry_run = 0;
+ int do_xfers = 1;
+@@ -310,6 +311,7 @@ void usage(enum logcode F)
+ rprintf(F," --backup-dir=DIR make backups into hierarchy based in DIR\n");
+ rprintf(F," --suffix=SUFFIX set backup suffix (default %s w/o --backup-dir)\n",BACKUP_SUFFIX);
+ rprintf(F," -u, --update skip files that are newer on the receiver\n");
++ rprintf(F," --drop-cache tell OS to drop caching of file data\n");
+ rprintf(F," --inplace update destination files in-place (SEE MAN PAGE)\n");
+ rprintf(F," --append append data onto shorter files\n");
+ rprintf(F," -d, --dirs transfer directories without recursing\n");
+@@ -506,6 +508,7 @@ static struct poptOption long_options[]
+ {"size-only", 0, POPT_ARG_NONE, &size_only, 0, 0, 0 },
+ {"one-file-system", 'x', POPT_ARG_NONE, 0, 'x', 0, 0 },
+ {"update", 'u', POPT_ARG_NONE, &update_only, 0, 0, 0 },
++ {"drop-cache", 0, POPT_ARG_NONE, &drop_cache, 0, 0, 0 },
+ {"existing", 0, POPT_ARG_NONE, &ignore_non_existing, 0, 0, 0 },
+ {"ignore-non-existing",0,POPT_ARG_NONE, &ignore_non_existing, 0, 0, 0 },
+ {"ignore-existing", 0, POPT_ARG_NONE, &ignore_existing, 0, 0, 0 },
+@@ -1603,6 +1606,9 @@ void server_options(char **args,int *arg
+ if (!am_sender)
+ args[ac++] = "--sender";
+
++ if (drop_cache)
++ args[ac++] = "--drop-cache";
++
+ x = 1;
+ argstr[0] = '-';
+ for (i = 0; i < verbose; i++)
+--- old/receiver.c
++++ new/receiver.c
+@@ -554,7 +554,7 @@ int recv_files(int f_in, char *local_nam
+ rsyserr(FERROR, errno, "fstat %s failed",
+ full_fname(fnamecmp));
+ discard_receive_data(f_in, F_LENGTH(file));
+- close(fd1);
++ fadv_close(fd1);
+ if (inc_recurse)
+ send_msg_int(MSG_NO_SEND, ndx);
+ continue;
+@@ -569,14 +569,14 @@ int recv_files(int f_in, char *local_nam
+ rprintf(FERROR,"recv_files: %s is a directory\n",
+ full_fname(fnamecmp));
+ discard_receive_data(f_in, F_LENGTH(file));
+- close(fd1);
++ fadv_close(fd1);
+ if (inc_recurse)
+ send_msg_int(MSG_NO_SEND, ndx);
+ continue;
+ }
+
+ if (fd1 != -1 && !S_ISREG(st.st_mode)) {
+- close(fd1);
++ fadv_close(fd1);
+ fd1 = -1;
+ }
+
+@@ -604,7 +604,7 @@ int recv_files(int f_in, char *local_nam
+ full_fname(fname));
+ discard_receive_data(f_in, F_LENGTH(file));
+ if (fd1 != -1)
+- close(fd1);
++ fadv_close(fd1);
+ if (inc_recurse)
+ send_msg_int(MSG_NO_SEND, ndx);
+ continue;
+@@ -613,7 +613,7 @@ int recv_files(int f_in, char *local_nam
+ if (!get_tmpname(fnametmp,fname)) {
+ discard_receive_data(f_in, F_LENGTH(file));
+ if (fd1 != -1)
+- close(fd1);
++ fadv_close(fd1);
+ if (inc_recurse)
+ send_msg_int(MSG_NO_SEND, ndx);
+ continue;
+@@ -641,7 +641,7 @@ int recv_files(int f_in, char *local_nam
+ full_fname(fnametmp));
+ discard_receive_data(f_in, F_LENGTH(file));
+ if (fd1 != -1)
+- close(fd1);
++ fadv_close(fd1);
+ if (inc_recurse)
+ send_msg_int(MSG_NO_SEND, ndx);
+ continue;
+@@ -663,8 +663,8 @@ int recv_files(int f_in, char *local_nam
+ log_item(log_code, file, &initial_stats, iflags, NULL);
+
+ if (fd1 != -1)
+- close(fd1);
+- if (close(fd2) < 0) {
++ fadv_close(fd1);
++ if (fadv_close(fd2) < 0) {
+ rsyserr(FERROR, errno, "close failed on %s",
+ full_fname(fnametmp));
+ exit_cleanup(RERR_FILEIO);
+--- old/rsync.yo
++++ new/rsync.yo
+@@ -335,6 +335,7 @@ to the detailed description below for a
+ --super receiver attempts super-user activities
+ --fake-super store/recover privileged attrs using xattrs
+ -S, --sparse handle sparse files efficiently
++ --drop-cache tell OS to drop caching of file data
+ -n, --dry-run show what would have been transferred
+ -W, --whole-file copy files whole (without rsync algorithm)
+ -x, --one-file-system don't cross filesystem boundaries
+@@ -956,6 +957,10 @@ NOTE: Don't use this option when the des
+ filesystem. It doesn't seem to handle seeks over null regions
+ correctly and ends up corrupting the files.
+
++dit(bf(--drop-cache)) Tell the OS to drop the caching of the file data. This
++prevents rsync from filling up the filesystem cache. This can sometimes help
++to make a system perform better by keeping non-rsync files in the disk cache.
++
+ dit(bf(-n, --dry-run)) This tells rsync to not do any file transfers,
+ instead it will just report the actions it would have taken.
+
+--- old/sender.c
++++ new/sender.c
+@@ -307,7 +307,7 @@ void send_files(int f_in, int f_out)
+ io_error |= IOERR_GENERAL;
+ rsyserr(FERROR, errno, "fstat failed");
+ free_sums(s);
+- close(fd);
++ fadv_close(fd);
+ exit_cleanup(RERR_PROTOCOL);
+ }
+
+@@ -351,7 +351,7 @@ void send_files(int f_in, int f_out)
+ full_fname(fname));
+ }
+ }
+- close(fd);
++ fadv_close(fd);
+
+ free_sums(s);
+
+--- old/t_unsafe.c
++++ new/t_unsafe.c
+@@ -28,6 +28,7 @@ int am_root = 0;
+ int read_only = 0;
+ int list_only = 0;
+ int verbose = 0;
++int drop_cache = 0;
+ int preserve_perms = 0;
+
+ int
+--- old/util.c
++++ new/util.c
+@@ -24,6 +24,7 @@
+
+ extern int verbose;
+ extern int dry_run;
++extern int drop_cache;
+ extern int module_id;
+ extern int modify_window;
+ extern int relative_paths;
+@@ -39,6 +40,88 @@ char curr_dir[MAXPATHLEN];
+ unsigned int curr_dir_len;
+ int curr_dir_depth; /* This is only set for a sanitizing daemon. */
+
++extern int drop_cache;
++
++static struct stat fadv_fd_stat[255];
++static off_t fadv_fd_pos[255];
++static int fadv_fd_init = 0;
++
++static void fadv_fd_init_func(void){
++ if (fadv_fd_init ==0){
++ int i;
++ fadv_fd_init = 1;
++ for (i=0;i<255;i++){
++ fadv_fd_pos[i] = 0;
++ fadv_fd_stat[i].st_dev = 0;
++ fadv_fd_stat[i].st_ino = 0;
++ }
++ }
++}
++
++static void fadv_drop(int fd, int sync){
++ struct stat stat;
++ /* trail 1 MB behind in dropping. we do this to make
++ sure that the same block or stripe does not have
++ to be written twice */
++ int pos = lseek(fd,0,SEEK_CUR) - 1024*1024;
++ if (fd > 255){
++ return;
++ }
++ fadv_fd_init_func();
++ fstat(fd,&stat);
++ if ( fadv_fd_stat[fd].st_dev == stat.st_dev
++ && fadv_fd_stat[fd].st_ino == stat.st_ino ) {
++ if ( fadv_fd_pos[fd] < pos - 16*1024*1024 ) {
++ if (sync) {
++ /* if the file is not flushed to disk before calling fadvise,
++ then the Cache will not be freed and the advise gets ignored
++ this does give a severe hit on performance. If only there
++ was a way to mark cache so that it gets release once the data
++ is written to disk. */
++ fdatasync(fd);
++ }
++ posix_fadvise64(fd, 0, pos, POSIX_FADV_DONTNEED);
++ fadv_fd_pos[fd] = pos;
++ }
++ } else {
++ fadv_fd_stat[fd].st_dev = stat.st_dev;
++ fadv_fd_stat[fd].st_ino = stat.st_ino;
++ fadv_fd_pos[fd] = 0;
++ }
++}
++
++ssize_t fadv_write(int fd, const void *buf, size_t count)
++{
++ int ret = write(fd, buf, count);
++ if (drop_cache) {
++ fadv_drop(fd,1);
++ }
++ return ret;
++}
++
++ssize_t fadv_read(int fd, void *buf, size_t count)
++{
++ int ret = read(fd, buf, count);
++ if (drop_cache) {
++ fadv_drop(fd,0);
++ }
++ return ret;
++}
++
++int fadv_close(int fd){
++ if (drop_cache) {
++ /* drop everything after we are done */
++ /* if the file is not flushed to disk before calling fadvise,
++ then the Cache will not be freed and the advise gets ignored
++ this does give a severe hit on performance. If only there
++ was a way to mark cache so that it gets release once the data
++ is written to disk. */
++ fdatasync(fd);
++ posix_fadvise64(fd, 0, 0,POSIX_FADV_DONTNEED);
++ }
++ return close(fd);
++}
++
+ /* Set a fd into nonblocking mode. */
+ void set_nonblocking(int fd)
+ {
+@@ -221,7 +304,7 @@ int full_write(int desc, const char *ptr
+
+ total_written = 0;
+ while (len > 0) {
+- int written = write(desc, ptr, len);
++ int written = fadv_write(desc, ptr, len);
+ if (written < 0) {
+ if (errno == EINTR)
+ continue;
+@@ -253,7 +336,7 @@ static int safe_read(int desc, char *ptr
+ return len;
+
+ do {
+- n_chars = read(desc, ptr, len);
++ n_chars = fadv_read(desc, ptr, len);
+ } while (n_chars < 0 && errno == EINTR);
+
+ return n_chars;
+@@ -284,32 +367,32 @@ int copy_file(const char *source, const
+ ofd = do_open(dest, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, mode);
+ if (ofd == -1) {
+ rsyserr(FERROR, errno, "open %s", full_fname(dest));
+- close(ifd);
++ fadv_close(ifd);
+ return -1;
+ }
+
+ while ((len = safe_read(ifd, buf, sizeof buf)) > 0) {
+ if (full_write(ofd, buf, len) < 0) {
+ rsyserr(FERROR, errno, "write %s", full_fname(dest));
+- close(ifd);
+- close(ofd);
++ fadv_close(ifd);
++ fadv_close(ofd);
+ return -1;
+ }
+ }
+
+ if (len < 0) {
+ rsyserr(FERROR, errno, "read %s", full_fname(source));
+- close(ifd);
+- close(ofd);
++ fadv_close(ifd);
++ fadv_close(ofd);
+ return -1;
+ }
+
+- if (close(ifd) < 0) {
++ if (fadv_close(ifd) < 0) {
+ rsyserr(FINFO, errno, "close failed on %s",
+ full_fname(source));
+ }
+
+- if (close(ofd) < 0) {
++ if (fadv_close(ofd) < 0) {
+ rsyserr(FERROR, errno, "close failed on %s",
+ full_fname(dest));
+ return -1;