Use "use warnings" rather than -w on the #! line.
[rsync/rsync-patches.git] / drop-cache.diff
CommitLineData
c966e8e8
WD
1From: Tobi Oetiker tobi{at}oetiker.ch
2Date: 2007-04-23
3
4I am using rsync for hard-link backup. I found that there is a
5major problem with frequent backup filling up the file system cache
6with all the data from the files being backed up. The effect is
7that all the other 'sensible' data in the cache gets thrown out in
8the process. This is rather unfortunate as the performance of the
9system becomes very bad after running rsync.
10
11Some research showed, that
12
13 posix_fadvise64(fd, 0, 0,POSIX_FADV_DONTNEED);
14
15would tell the OS that it should not keep the file in cache. I
16have written a patch for rsync that adds the
17
18 --drop-cache
19
20option which activates posix_fadvise64.
21
22There are some caveats though:
23
24 * When calling posix_fadvise64 while writing a file, only the
25 part of the cache will be release which has already been
26 written to disk. This means we have to call fdatasync before
27 calling posix_fadvise64 and this will unfortunately slow down
28 operations considerably. On my test system I get 240 KByte/s.
29
30 The patch has been optimized, so that the impact on large files
31 will be considerably lowered by calling posix_fadvise64 only
32 after a few megabytes have been written.
33
34 * When reading a file which has been cached *Before* rsync read
35 it, the content of the file will be released from cache never
36 the less, which may not be intended. I have unfortunately not
37 found a method for determining if a file is in cache or not
38 (ideas?)
39
40 I found that running rsync of an lvm snapshot is a good way
41 around this problem, since the snapshot data is cached
42 separately from the original. It has the additional benefit of
43 making the backups more consistent.
44
45 * I don't really know the rsync code, so it may be that the patch
46 is calling fadvise for files where this would not be necessary.
47
48 * The patch is tested only on Linux 2.6.18
49
50If you have any input on this, please let me know.
51
52You can get the latest edition of the patch from
53
54 http://tobi.oetiker.ch/patches/
55
56cheers
57tobi
58
e2e42a01 59Changes:
c966e8e8
WD
60
61 2007-04-23
62
63* pass --drop-cache on to the remote server
64* make test works now
65
cc3e685d
WD
66diff --git a/checksum.c b/checksum.c
67--- a/checksum.c
68+++ b/checksum.c
14317044 69@@ -26,6 +26,10 @@ extern int protocol_version;
c966e8e8 70
14317044 71 int csum_length = SHORT_SUM_LENGTH; /* initial value */
c966e8e8 72
14317044
WD
73+#ifdef HAVE_POSIX_FADVISE64
74+#define close(fd) fadv_close(fd)
75+#endif
76+
77 /*
78 a simple 32 bit checksum that can be upadted from either end
79 (inspired by Mark Adler's Adler-32 checksum)
cc3e685d
WD
80diff --git a/cleanup.c b/cleanup.c
81--- a/cleanup.c
82+++ b/cleanup.c
7f0bf1cb 83@@ -46,7 +46,13 @@ void close_all(void)
14317044
WD
84 int fd;
85 int ret;
86 STRUCT_STAT st;
87+#endif
88+
89+#ifdef HAVE_POSIX_FADVISE64
90+ fadv_close_all();
91+#endif
92
93+#ifdef SHUTDOWN_ALL_SOCKETS
94 max_fd = sysconf(_SC_OPEN_MAX) - 1;
95 for (fd = max_fd; fd >= 0; fd--) {
96 if ((ret = do_fstat(fd, &st)) == 0) {
cc3e685d
WD
97diff --git a/configure.in b/configure.in
98--- a/configure.in
99+++ b/configure.in
c0c7984e 100@@ -554,7 +554,7 @@ AC_CHECK_FUNCS(waitpid wait4 getcwd strdup chown chmod lchmod mknod mkfifo \
14317044
WD
101 strlcat strlcpy strtol mallinfo getgroups setgroups geteuid getegid \
102 setlocale setmode open64 lseek64 mkstemp64 mtrace va_copy __va_copy \
103 strerror putenv iconv_open locale_charset nl_langinfo getxattr \
7c4c2959
WD
104- extattr_get_link sigaction sigprocmask setattrlist)
105+ extattr_get_link sigaction sigprocmask setattrlist posix_fadvise64)
14317044 106
4c15e800
WD
107 dnl cygwin iconv.h defines iconv_open as libiconv_open
108 if test x"$ac_cv_func_iconv_open" != x"yes"; then
cc3e685d
WD
109diff --git a/fileio.c b/fileio.c
110--- a/fileio.c
111+++ b/fileio.c
14317044 112@@ -30,6 +30,12 @@ extern int sparse_files;
c966e8e8 113 static char last_byte;
abd3adb8 114 static OFF_T sparse_seek = 0;
c966e8e8 115
14317044
WD
116+#ifdef HAVE_POSIX_FADVISE64
117+#define close(fd) fadv_close(fd)
118+#define read(fd,buf,len) fadv_read(fd,buf,len)
119+#define write(fd,buf,len) fadv_write(fd,buf,len)
120+#endif
c966e8e8
WD
121+
122 int sparse_end(int f)
123 {
c8a8b4a7 124 int ret;
cc3e685d
WD
125diff --git a/generator.c b/generator.c
126--- a/generator.c
127+++ b/generator.c
4c107044 128@@ -115,6 +115,10 @@ static int need_retouch_dir_times;
c8a8b4a7 129 static int need_retouch_dir_perms;
14317044 130 static const char *solo_file = NULL;
c966e8e8 131
14317044
WD
132+#ifdef HAVE_POSIX_FADVISE64
133+#define close(fd) fadv_close(fd)
134+#endif
135+
136 /* For calling delete_item() and delete_dir_contents(). */
f9df736a 137 #define DEL_NO_UID_WRITE (1<<0) /* file/dir has our uid w/o write perm */
a5e6228a 138 #define DEL_RECURSE (1<<1) /* if dir, delete all contents */
cc3e685d
WD
139diff --git a/options.c b/options.c
140--- a/options.c
141+++ b/options.c
c0c7984e 142@@ -60,6 +60,7 @@ int preserve_uid = 0;
c8a8b4a7 143 int preserve_gid = 0;
c966e8e8 144 int preserve_times = 0;
c966e8e8
WD
145 int update_only = 0;
146+int drop_cache = 0;
147 int cvs_exclude = 0;
148 int dry_run = 0;
149 int do_xfers = 1;
abd3adb8 150@@ -327,6 +328,9 @@ void usage(enum logcode F)
c966e8e8
WD
151 rprintf(F," --backup-dir=DIR make backups into hierarchy based in DIR\n");
152 rprintf(F," --suffix=SUFFIX set backup suffix (default %s w/o --backup-dir)\n",BACKUP_SUFFIX);
153 rprintf(F," -u, --update skip files that are newer on the receiver\n");
14317044 154+#ifdef HAVE_POSIX_FADVISE64
c966e8e8 155+ rprintf(F," --drop-cache tell OS to drop caching of file data\n");
14317044 156+#endif
c966e8e8
WD
157 rprintf(F," --inplace update destination files in-place (SEE MAN PAGE)\n");
158 rprintf(F," --append append data onto shorter files\n");
c8a8b4a7 159 rprintf(F," --append-verify like --append, but with old data in file checksum\n");
abd3adb8 160@@ -535,6 +539,9 @@ static struct poptOption long_options[] = {
85096e5e
WD
161 {"no-one-file-system",'x',POPT_ARG_VAL, &one_file_system, 0, 0, 0 },
162 {"no-x", 'x', POPT_ARG_VAL, &one_file_system, 0, 0, 0 },
c966e8e8 163 {"update", 'u', POPT_ARG_NONE, &update_only, 0, 0, 0 },
14317044 164+#ifdef HAVE_POSIX_FADVISE64
c966e8e8 165+ {"drop-cache", 0, POPT_ARG_NONE, &drop_cache, 0, 0, 0 },
14317044 166+#endif
c966e8e8
WD
167 {"existing", 0, POPT_ARG_NONE, &ignore_non_existing, 0, 0, 0 },
168 {"ignore-non-existing",0,POPT_ARG_NONE, &ignore_non_existing, 0, 0, 0 },
169 {"ignore-existing", 0, POPT_ARG_NONE, &ignore_existing, 0, 0, 0 },
abd3adb8 170@@ -1720,6 +1727,11 @@ void server_options(char **args, int *argc_p)
c966e8e8
WD
171 if (!am_sender)
172 args[ac++] = "--sender";
173
14317044 174+#ifdef HAVE_POSIX_FADVISE64
c966e8e8
WD
175+ if (drop_cache)
176+ args[ac++] = "--drop-cache";
14317044 177+#endif
c966e8e8
WD
178+
179 x = 1;
180 argstr[0] = '-';
c8a8b4a7 181
cc3e685d
WD
182diff --git a/receiver.c b/receiver.c
183--- a/receiver.c
184+++ b/receiver.c
963ca808 185@@ -63,6 +63,10 @@ static flist_ndx_list batch_redo_list;
14317044 186 /* We're either updating the basis file or an identical copy: */
e2b0842a 187 static int updating_basis_or_equiv;
c966e8e8 188
14317044
WD
189+#ifdef HAVE_POSIX_FADVISE64
190+#define close(fd) fadv_close(fd)
191+#endif
192+
193 /*
194 * get_tmpname() - create a tmp filename for a given filename
195 *
cc3e685d
WD
196diff --git a/rsync.yo b/rsync.yo
197--- a/rsync.yo
198+++ b/rsync.yo
abd3adb8 199@@ -356,6 +356,7 @@ to the detailed description below for a complete description. verb(
c966e8e8
WD
200 --super receiver attempts super-user activities
201 --fake-super store/recover privileged attrs using xattrs
202 -S, --sparse handle sparse files efficiently
203+ --drop-cache tell OS to drop caching of file data
e2b0842a 204 -n, --dry-run perform a trial run with no changes made
f2863bc0 205 -W, --whole-file copy files whole (w/o delta-xfer algorithm)
c966e8e8 206 -x, --one-file-system don't cross filesystem boundaries
abd3adb8 207@@ -1053,6 +1054,10 @@ NOTE: Don't use this option when the destination is a Solaris "tmpfs"
c966e8e8
WD
208 filesystem. It doesn't seem to handle seeks over null regions
209 correctly and ends up corrupting the files.
210
211+dit(bf(--drop-cache)) Tell the OS to drop the caching of the file data. This
212+prevents rsync from filling up the filesystem cache. This can sometimes help
213+to make a system perform better by keeping non-rsync files in the disk cache.
214+
e2b0842a
WD
215 dit(bf(-n, --dry-run)) This makes rsync perform a trial run that doesn't
216 make any changes (and produces mostly the same output as a real run). It
217 is most commonly used in combination with the bf(-v, --verbose) and/or
cc3e685d
WD
218diff --git a/sender.c b/sender.c
219--- a/sender.c
220+++ b/sender.c
14317044
WD
221@@ -46,6 +46,10 @@ extern int write_batch;
222 extern struct stats stats;
c8a8b4a7 223 extern struct file_list *cur_flist, *first_flist, *dir_flist;
c966e8e8 224
14317044
WD
225+#ifdef HAVE_POSIX_FADVISE64
226+#define close(fd) fadv_close(fd)
227+#endif
228+
229 /**
230 * @file
231 *
cc3e685d
WD
232diff --git a/t_unsafe.c b/t_unsafe.c
233--- a/t_unsafe.c
234+++ b/t_unsafe.c
c966e8e8
WD
235@@ -28,6 +28,7 @@ int am_root = 0;
236 int read_only = 0;
237 int list_only = 0;
238 int verbose = 0;
239+int drop_cache = 0;
240 int preserve_perms = 0;
a5e6228a 241 int preserve_executability = 0;
c966e8e8 242
cc3e685d
WD
243diff --git a/util.c b/util.c
244--- a/util.c
245+++ b/util.c
c8a8b4a7 246@@ -26,6 +26,7 @@
c966e8e8
WD
247 extern int verbose;
248 extern int dry_run;
c966e8e8 249 extern int module_id;
14317044 250+extern int drop_cache;
c966e8e8
WD
251 extern int modify_window;
252 extern int relative_paths;
14317044 253 extern int human_readable;
91270139 254@@ -42,6 +43,131 @@ char curr_dir[MAXPATHLEN];
c966e8e8
WD
255 unsigned int curr_dir_len;
256 int curr_dir_depth; /* This is only set for a sanitizing daemon. */
257
14317044
WD
258+#ifdef HAVE_POSIX_FADVISE64
259+#define FADV_BUFFER_SIZE 1024*1024*16
c966e8e8 260+
14317044
WD
261+static struct stat fadv_fd_stat[1024];
262+static off_t fadv_fd_pos[1024];
263+static int fadv_fd_init = 0;
264+static int fadv_max_fd = 0;
265+static int fadv_close_ring_tail = 0;
266+static int fadv_close_ring_head = 0;
267+static int fadv_close_ring_size = 0;
268+static int fadv_close_ring[1024];
269+static int fadv_close_buffer_size = 0;
c966e8e8 270+
14317044
WD
271+static void fadv_fd_init_func(void)
272+{
273+ if (fadv_fd_init == 0) {
274+ int i;
275+ fadv_fd_init = 1;
276+ if (fadv_max_fd == 0){
277+ fadv_max_fd = sysconf(_SC_OPEN_MAX) - 20;
278+ if (fadv_max_fd < 0)
279+ fadv_max_fd = 1;
280+ if (fadv_max_fd > 1000)
281+ fadv_max_fd = 1000;
282+ }
283+ for (i = 0; i < fadv_max_fd; i++) {
284+ fadv_fd_pos[i] = 0;
285+ fadv_fd_stat[i].st_dev = 0;
286+ fadv_fd_stat[i].st_ino = 0;
287+ }
288+ }
c966e8e8 289+}
14317044
WD
290+
291+static void fadv_drop(int fd, int sync)
292+{
293+ struct stat sb;
294+ int pos;
295+
296+ /* Trail 1 MB behind in dropping. we do this to make
297+ * sure that the same block or stripe does not have
298+ * to be written twice. */
299+ if (fd > fadv_max_fd)
300+ return;
301+ pos = lseek(fd, 0, SEEK_CUR) - 1024*1024;
c966e8e8 302+ fadv_fd_init_func();
14317044
WD
303+ fstat(fd, &sb);
304+ if (fadv_fd_stat[fd].st_dev == sb.st_dev
305+ && fadv_fd_stat[fd].st_ino == sb.st_ino) {
306+ if (fadv_fd_pos[fd] < pos - FADV_BUFFER_SIZE) {
307+ if (sync) {
308+ /* If the file is not flushed to disk before calling fadvise,
309+ * then the Cache will not be freed and the advise gets ignored
310+ * this does give a severe hit on performance. If only there
311+ * was a way to mark cache so that it gets release once the data
312+ * is written to disk. */
313+ fdatasync(fd);
314+ }
315+ posix_fadvise64(fd, 0, pos, POSIX_FADV_DONTNEED);
316+ fadv_fd_pos[fd] = pos;
317+ }
318+ } else {
319+ fadv_fd_stat[fd].st_dev = sb.st_dev;
320+ fadv_fd_stat[fd].st_ino = sb.st_ino;
321+ fadv_fd_pos[fd] = 0;
322+ }
c966e8e8 323+}
14317044 324+
c966e8e8
WD
325+ssize_t fadv_write(int fd, const void *buf, size_t count)
326+{
14317044
WD
327+ int ret = write(fd, buf, count);
328+ if (drop_cache)
329+ fadv_drop(fd, 1);
330+ return ret;
c966e8e8
WD
331+}
332+
333+ssize_t fadv_read(int fd, void *buf, size_t count)
334+{
14317044
WD
335+ int ret = read(fd, buf, count);
336+ if (drop_cache)
337+ fadv_drop(fd, 0);
338+ return ret;
339+}
340+
341+void fadv_close_all(void)
342+{
343+ while (fadv_close_ring_size > 0){
344+ fdatasync(fadv_close_ring[fadv_close_ring_tail]);
345+ posix_fadvise64(fadv_close_ring[fadv_close_ring_tail], 0, 0,POSIX_FADV_DONTNEED);
346+ fadv_close_ring_size--;
347+ close(fadv_close_ring[fadv_close_ring_tail]);
348+ fadv_close_ring_tail = (fadv_close_ring_tail + 1) % fadv_max_fd;
349+ fadv_close_buffer_size = 0;
350+ }
c966e8e8
WD
351+}
352+
14317044
WD
353+int fadv_close(int fd)
354+{
355+ if (drop_cache) {
356+ /* If the file is not flushed to disk before calling fadvise,
357+ * then the Cache will not be freed and the advise gets ignored
358+ * this does give a severe hit on performance. So instead of doing
359+ * it right away, we save us a copy of the filehandle and do it
360+ * some time before we are out of filehandles. This speeds
361+ * up operation for small files massively. It is directly
362+ * related to the number of spare file handles you have. */
363+ int newfd = dup(fd);
364+ int pos = lseek(fd, 0, SEEK_CUR);
365+ fadv_fd_init_func();
366+ fadv_close_buffer_size += pos - fadv_fd_pos[fd];
367+ fadv_close_ring[fadv_close_ring_head] = newfd;
e2e42a01 368+ fadv_close_ring_head = (fadv_close_ring_head + 1) % fadv_max_fd;
14317044
WD
369+ fadv_close_ring_size ++;
370+ if (fadv_close_ring_size == fadv_max_fd || fadv_close_buffer_size > 1024*1024 ){
371+ /* it seems fastest to drop things 'in groups' */
372+ fadv_close_all();
e2e42a01 373+ }
14317044
WD
374+ }
375+ return close(fd);
c966e8e8 376+}
14317044
WD
377+
378+#define close(fd) fadv_close(fd)
379+#define read(fd,buf,len) fadv_read(fd,buf,len)
380+#define write(fd,buf,len) fadv_write(fd,buf,len)
381+#endif
c966e8e8
WD
382+
383 /* Set a fd into nonblocking mode. */
384 void set_nonblocking(int fd)
385 {