Added a manpage entry and made a few more tweaks.
[rsync/rsync-patches.git] / drop-cache.diff
CommitLineData
c966e8e8
WD
1From: Tobi Oetiker tobi{at}oetiker.ch
2Date: 2007-04-23
3
4I am using rsync for hard-link backup. I found that there is a
5major problem with frequent backup filling up the file system cache
6with all the data from the files being backed up. The effect is
7that all the other 'sensible' data in the cache gets thrown out in
8the process. This is rather unfortunate as the performance of the
9system becomes very bad after running rsync.
10
11Some research showed, that
12
13 posix_fadvise64(fd, 0, 0,POSIX_FADV_DONTNEED);
14
15would tell the OS that it should not keep the file in cache. I
16have written a patch for rsync that adds the
17
18 --drop-cache
19
20option which activates posix_fadvise64.
21
22There are some caveats though:
23
24 * When calling posix_fadvise64 while writing a file, only the
25 part of the cache will be release which has already been
26 written to disk. This means we have to call fdatasync before
27 calling posix_fadvise64 and this will unfortunately slow down
28 operations considerably. On my test system I get 240 KByte/s.
29
30 The patch has been optimized, so that the impact on large files
31 will be considerably lowered by calling posix_fadvise64 only
32 after a few megabytes have been written.
33
34 * When reading a file which has been cached *Before* rsync read
35 it, the content of the file will be released from cache never
36 the less, which may not be intended. I have unfortunately not
37 found a method for determining if a file is in cache or not
38 (ideas?)
39
40 I found that running rsync of an lvm snapshot is a good way
41 around this problem, since the snapshot data is cached
42 separately from the original. It has the additional benefit of
43 making the backups more consistent.
44
45 * I don't really know the rsync code, so it may be that the patch
46 is calling fadvise for files where this would not be necessary.
47
48 * The patch is tested only on Linux 2.6.18
49
50If you have any input on this, please let me know.
51
52You can get the latest edition of the patch from
53
54 http://tobi.oetiker.ch/patches/
55
56cheers
57tobi
58
59Changes:
60
61 2007-04-23
62
63* pass --drop-cache on to the remote server
64* make test works now
65
66--- old/checksum.c
67+++ new/checksum.c
14317044 68@@ -26,6 +26,10 @@ extern int protocol_version;
c966e8e8 69
14317044 70 int csum_length = SHORT_SUM_LENGTH; /* initial value */
c966e8e8 71
14317044
WD
72+#ifdef HAVE_POSIX_FADVISE64
73+#define close(fd) fadv_close(fd)
74+#endif
75+
76 /*
77 a simple 32 bit checksum that can be upadted from either end
78 (inspired by Mark Adler's Adler-32 checksum)
79--- old/cleanup.c
80+++ new/cleanup.c
81@@ -46,7 +46,13 @@ void close_all(void)
82 int fd;
83 int ret;
84 STRUCT_STAT st;
85+#endif
86+
87+#ifdef HAVE_POSIX_FADVISE64
88+ fadv_close_all();
89+#endif
90
91+#ifdef SHUTDOWN_ALL_SOCKETS
92 max_fd = sysconf(_SC_OPEN_MAX) - 1;
93 for (fd = max_fd; fd >= 0; fd--) {
94 if ((ret = do_fstat(fd, &st)) == 0) {
95--- old/configure.in
96+++ new/configure.in
97@@ -561,7 +561,7 @@ AC_CHECK_FUNCS(waitpid wait4 getcwd strd
98 strlcat strlcpy strtol mallinfo getgroups setgroups geteuid getegid \
99 setlocale setmode open64 lseek64 mkstemp64 mtrace va_copy __va_copy \
100 strerror putenv iconv_open locale_charset nl_langinfo getxattr \
101- extattr_get_link sigaction sigprocmask)
102+ extattr_get_link sigaction sigprocmask posix_fadvise64)
103
104 AC_CHECK_FUNCS(getpgrp tcgetpgrp)
105 if test $ac_cv_func_getpgrp = yes; then
c966e8e8
WD
106--- old/fileio.c
107+++ new/fileio.c
14317044 108@@ -30,6 +30,12 @@ extern int sparse_files;
c966e8e8
WD
109 static char last_byte;
110 static int last_sparse;
111
14317044
WD
112+#ifdef HAVE_POSIX_FADVISE64
113+#define close(fd) fadv_close(fd)
114+#define read(fd,buf,len) fadv_read(fd,buf,len)
115+#define write(fd,buf,len) fadv_write(fd,buf,len)
116+#endif
c966e8e8
WD
117+
118 int sparse_end(int f)
119 {
120 if (last_sparse) {
c966e8e8
WD
121--- old/generator.c
122+++ new/generator.c
14317044
WD
123@@ -115,6 +115,10 @@ static int dir_tweaking;
124 static int need_retouch_dir_times;
125 static const char *solo_file = NULL;
c966e8e8 126
14317044
WD
127+#ifdef HAVE_POSIX_FADVISE64
128+#define close(fd) fadv_close(fd)
129+#endif
130+
131 /* For calling delete_item() and delete_dir_contents(). */
132 #define DEL_RECURSE (1<<1) /* recurse */
133 #define DEL_DIR_IS_EMPTY (1<<2) /* internal delete_FUNCTIONS use only */
c966e8e8
WD
134--- old/options.c
135+++ new/options.c
136@@ -57,6 +57,7 @@ int preserve_gid = 0;
137 int preserve_times = 0;
138 int omit_dir_times = 0;
139 int update_only = 0;
140+int drop_cache = 0;
141 int cvs_exclude = 0;
142 int dry_run = 0;
143 int do_xfers = 1;
14317044 144@@ -310,6 +311,9 @@ void usage(enum logcode F)
c966e8e8
WD
145 rprintf(F," --backup-dir=DIR make backups into hierarchy based in DIR\n");
146 rprintf(F," --suffix=SUFFIX set backup suffix (default %s w/o --backup-dir)\n",BACKUP_SUFFIX);
147 rprintf(F," -u, --update skip files that are newer on the receiver\n");
14317044 148+#ifdef HAVE_POSIX_FADVISE64
c966e8e8 149+ rprintf(F," --drop-cache tell OS to drop caching of file data\n");
14317044 150+#endif
c966e8e8
WD
151 rprintf(F," --inplace update destination files in-place (SEE MAN PAGE)\n");
152 rprintf(F," --append append data onto shorter files\n");
153 rprintf(F," -d, --dirs transfer directories without recursing\n");
14317044 154@@ -506,6 +510,9 @@ static struct poptOption long_options[]
c966e8e8
WD
155 {"size-only", 0, POPT_ARG_NONE, &size_only, 0, 0, 0 },
156 {"one-file-system", 'x', POPT_ARG_NONE, 0, 'x', 0, 0 },
157 {"update", 'u', POPT_ARG_NONE, &update_only, 0, 0, 0 },
14317044 158+#ifdef HAVE_POSIX_FADVISE64
c966e8e8 159+ {"drop-cache", 0, POPT_ARG_NONE, &drop_cache, 0, 0, 0 },
14317044 160+#endif
c966e8e8
WD
161 {"existing", 0, POPT_ARG_NONE, &ignore_non_existing, 0, 0, 0 },
162 {"ignore-non-existing",0,POPT_ARG_NONE, &ignore_non_existing, 0, 0, 0 },
163 {"ignore-existing", 0, POPT_ARG_NONE, &ignore_existing, 0, 0, 0 },
14317044 164@@ -1603,6 +1610,11 @@ void server_options(char **args,int *arg
c966e8e8
WD
165 if (!am_sender)
166 args[ac++] = "--sender";
167
14317044 168+#ifdef HAVE_POSIX_FADVISE64
c966e8e8
WD
169+ if (drop_cache)
170+ args[ac++] = "--drop-cache";
14317044 171+#endif
c966e8e8
WD
172+
173 x = 1;
174 argstr[0] = '-';
175 for (i = 0; i < verbose; i++)
176--- old/receiver.c
177+++ new/receiver.c
14317044
WD
178@@ -62,6 +62,10 @@ static int phase = 0, redoing = 0;
179 /* We're either updating the basis file or an identical copy: */
180 static int updating_basis;
c966e8e8 181
14317044
WD
182+#ifdef HAVE_POSIX_FADVISE64
183+#define close(fd) fadv_close(fd)
184+#endif
185+
186 /*
187 * get_tmpname() - create a tmp filename for a given filename
188 *
c966e8e8
WD
189--- old/rsync.yo
190+++ new/rsync.yo
191@@ -335,6 +335,7 @@ to the detailed description below for a
192 --super receiver attempts super-user activities
193 --fake-super store/recover privileged attrs using xattrs
194 -S, --sparse handle sparse files efficiently
195+ --drop-cache tell OS to drop caching of file data
196 -n, --dry-run show what would have been transferred
197 -W, --whole-file copy files whole (without rsync algorithm)
198 -x, --one-file-system don't cross filesystem boundaries
199@@ -956,6 +957,10 @@ NOTE: Don't use this option when the des
200 filesystem. It doesn't seem to handle seeks over null regions
201 correctly and ends up corrupting the files.
202
203+dit(bf(--drop-cache)) Tell the OS to drop the caching of the file data. This
204+prevents rsync from filling up the filesystem cache. This can sometimes help
205+to make a system perform better by keeping non-rsync files in the disk cache.
206+
207 dit(bf(-n, --dry-run)) This tells rsync to not do any file transfers,
208 instead it will just report the actions it would have taken.
209
210--- old/sender.c
211+++ new/sender.c
14317044
WD
212@@ -46,6 +46,10 @@ extern int write_batch;
213 extern struct stats stats;
214 extern struct file_list *cur_flist, *first_flist;
c966e8e8 215
14317044
WD
216+#ifdef HAVE_POSIX_FADVISE64
217+#define close(fd) fadv_close(fd)
218+#endif
219+
220 /**
221 * @file
222 *
c966e8e8
WD
223--- old/t_unsafe.c
224+++ new/t_unsafe.c
225@@ -28,6 +28,7 @@ int am_root = 0;
226 int read_only = 0;
227 int list_only = 0;
228 int verbose = 0;
229+int drop_cache = 0;
230 int preserve_perms = 0;
231
232 int
233--- old/util.c
234+++ new/util.c
14317044 235@@ -25,6 +25,7 @@
c966e8e8
WD
236 extern int verbose;
237 extern int dry_run;
c966e8e8 238 extern int module_id;
14317044 239+extern int drop_cache;
c966e8e8
WD
240 extern int modify_window;
241 extern int relative_paths;
14317044
WD
242 extern int human_readable;
243@@ -39,6 +40,131 @@ char curr_dir[MAXPATHLEN];
c966e8e8
WD
244 unsigned int curr_dir_len;
245 int curr_dir_depth; /* This is only set for a sanitizing daemon. */
246
14317044
WD
247+#ifdef HAVE_POSIX_FADVISE64
248+#define FADV_BUFFER_SIZE 1024*1024*16
c966e8e8 249+
14317044
WD
250+static struct stat fadv_fd_stat[1024];
251+static off_t fadv_fd_pos[1024];
252+static int fadv_fd_init = 0;
253+static int fadv_max_fd = 0;
254+static int fadv_close_ring_tail = 0;
255+static int fadv_close_ring_head = 0;
256+static int fadv_close_ring_size = 0;
257+static int fadv_close_ring[1024];
258+static int fadv_close_buffer_size = 0;
c966e8e8 259+
14317044
WD
260+static void fadv_fd_init_func(void)
261+{
262+ if (fadv_fd_init == 0) {
263+ int i;
264+ fadv_fd_init = 1;
265+ if (fadv_max_fd == 0){
266+ fadv_max_fd = sysconf(_SC_OPEN_MAX) - 20;
267+ if (fadv_max_fd < 0)
268+ fadv_max_fd = 1;
269+ if (fadv_max_fd > 1000)
270+ fadv_max_fd = 1000;
271+ }
272+ for (i = 0; i < fadv_max_fd; i++) {
273+ fadv_fd_pos[i] = 0;
274+ fadv_fd_stat[i].st_dev = 0;
275+ fadv_fd_stat[i].st_ino = 0;
276+ }
277+ }
c966e8e8 278+}
14317044
WD
279+
280+static void fadv_drop(int fd, int sync)
281+{
282+ struct stat sb;
283+ int pos;
284+
285+ /* Trail 1 MB behind in dropping. we do this to make
286+ * sure that the same block or stripe does not have
287+ * to be written twice. */
288+ if (fd > fadv_max_fd)
289+ return;
290+ pos = lseek(fd, 0, SEEK_CUR) - 1024*1024;
c966e8e8 291+ fadv_fd_init_func();
14317044
WD
292+ fstat(fd, &sb);
293+ if (fadv_fd_stat[fd].st_dev == sb.st_dev
294+ && fadv_fd_stat[fd].st_ino == sb.st_ino) {
295+ if (fadv_fd_pos[fd] < pos - FADV_BUFFER_SIZE) {
296+ if (sync) {
297+ /* If the file is not flushed to disk before calling fadvise,
298+ * then the Cache will not be freed and the advise gets ignored
299+ * this does give a severe hit on performance. If only there
300+ * was a way to mark cache so that it gets release once the data
301+ * is written to disk. */
302+ fdatasync(fd);
303+ }
304+ posix_fadvise64(fd, 0, pos, POSIX_FADV_DONTNEED);
305+ fadv_fd_pos[fd] = pos;
306+ }
307+ } else {
308+ fadv_fd_stat[fd].st_dev = sb.st_dev;
309+ fadv_fd_stat[fd].st_ino = sb.st_ino;
310+ fadv_fd_pos[fd] = 0;
311+ }
c966e8e8 312+}
14317044 313+
c966e8e8
WD
314+ssize_t fadv_write(int fd, const void *buf, size_t count)
315+{
14317044
WD
316+ int ret = write(fd, buf, count);
317+ if (drop_cache)
318+ fadv_drop(fd, 1);
319+ return ret;
c966e8e8
WD
320+}
321+
322+ssize_t fadv_read(int fd, void *buf, size_t count)
323+{
14317044
WD
324+ int ret = read(fd, buf, count);
325+ if (drop_cache)
326+ fadv_drop(fd, 0);
327+ return ret;
328+}
329+
330+void fadv_close_all(void)
331+{
332+ while (fadv_close_ring_size > 0){
333+ fdatasync(fadv_close_ring[fadv_close_ring_tail]);
334+ posix_fadvise64(fadv_close_ring[fadv_close_ring_tail], 0, 0,POSIX_FADV_DONTNEED);
335+ fadv_close_ring_size--;
336+ close(fadv_close_ring[fadv_close_ring_tail]);
337+ fadv_close_ring_tail = (fadv_close_ring_tail + 1) % fadv_max_fd;
338+ fadv_close_buffer_size = 0;
339+ }
c966e8e8
WD
340+}
341+
14317044
WD
342+int fadv_close(int fd)
343+{
344+ if (drop_cache) {
345+ /* If the file is not flushed to disk before calling fadvise,
346+ * then the Cache will not be freed and the advise gets ignored
347+ * this does give a severe hit on performance. So instead of doing
348+ * it right away, we save us a copy of the filehandle and do it
349+ * some time before we are out of filehandles. This speeds
350+ * up operation for small files massively. It is directly
351+ * related to the number of spare file handles you have. */
352+ int newfd = dup(fd);
353+ int pos = lseek(fd, 0, SEEK_CUR);
354+ fadv_fd_init_func();
355+ fadv_close_buffer_size += pos - fadv_fd_pos[fd];
356+ fadv_close_ring[fadv_close_ring_head] = newfd;
357+ fadv_close_ring_head = (fadv_close_ring_head + 1) % fadv_max_fd;
358+ fadv_close_ring_size ++;
359+ if (fadv_close_ring_size == fadv_max_fd || fadv_close_buffer_size > 1024*1024 ){
360+ /* it seems fastest to drop things 'in groups' */
361+ fadv_close_all();
362+ }
363+ }
364+ return close(fd);
c966e8e8 365+}
14317044
WD
366+
367+#define close(fd) fadv_close(fd)
368+#define read(fd,buf,len) fadv_read(fd,buf,len)
369+#define write(fd,buf,len) fadv_write(fd,buf,len)
370+#endif
c966e8e8
WD
371+
372 /* Set a fd into nonblocking mode. */
373 void set_nonblocking(int fd)
374 {