1 From: Tobi Oetiker tobi{at}oetiker.ch
4 I am using rsync for hard-link backup. I found that there is a
5 major problem with frequent backup filling up the file system cache
6 with all the data from the files being backed up. The effect is
7 that all the other 'sensible' data in the cache gets thrown out in
8 the process. This is rather unfortunate as the performance of the
9 system becomes very bad after running rsync.
11 Some research showed, that
13 posix_fadvise64(fd, 0, 0,POSIX_FADV_DONTNEED);
15 would tell the OS that it should not keep the file in cache. I
16 have written a patch for rsync that adds the
20 option which activates posix_fadvise64.
22 There are some caveats though:
24 * When calling posix_fadvise64 while writing a file, only the
25 part of the cache will be release which has already been
26 written to disk. This means we have to call fdatasync before
27 calling posix_fadvise64 and this will unfortunately slow down
28 operations considerably. On my test system I get 240 KByte/s.
30 The patch has been optimized, so that the impact on large files
31 will be considerably lowered by calling posix_fadvise64 only
32 after a few megabytes have been written.
34 * When reading a file which has been cached *Before* rsync read
35 it, the content of the file will be released from cache never
36 the less, which may not be intended. I have unfortunately not
37 found a method for determining if a file is in cache or not
40 I found that running rsync of an lvm snapshot is a good way
41 around this problem, since the snapshot data is cached
42 separately from the original. It has the additional benefit of
43 making the backups more consistent.
45 * I don't really know the rsync code, so it may be that the patch
46 is calling fadvise for files where this would not be necessary.
48 * The patch is tested only on Linux 2.6.18
50 If you have any input on this, please let me know.
52 You can get the latest edition of the patch from
54 http://tobi.oetiker.ch/patches/
63 * pass --drop-cache on to the remote server
66 To use this patch, run these commands for a successful build:
68 patch -p1 <patches/drop-cache.diff
69 ./configure (optional if already run)
72 based-on: 3b8f8192227b14e708bf535072485e50f4362270
73 diff --git a/checksum.c b/checksum.c
77 extern int checksum_seed;
78 extern int protocol_version;
80 +#ifdef HAVE_POSIX_FADVISE64
81 +#define close(fd) fadv_close(fd)
85 a simple 32 bit checksum that can be upadted from either end
86 (inspired by Mark Adler's Adler-32 checksum)
87 diff --git a/cleanup.c b/cleanup.c
90 @@ -47,7 +47,13 @@ void close_all(void)
96 +#ifdef HAVE_POSIX_FADVISE64
100 +#ifdef SHUTDOWN_ALL_SOCKETS
101 max_fd = sysconf(_SC_OPEN_MAX) - 1;
102 for (fd = max_fd; fd >= 0; fd--) {
103 if ((ret = do_fstat(fd, &st)) == 0) {
104 diff --git a/configure.in b/configure.in
107 @@ -574,7 +574,7 @@ AC_CHECK_FUNCS(waitpid wait4 getcwd strdup chown chmod lchmod mknod mkfifo \
108 setlocale setmode open64 lseek64 mkstemp64 mtrace va_copy __va_copy \
109 strerror putenv iconv_open locale_charset nl_langinfo getxattr \
110 extattr_get_link sigaction sigprocmask setattrlist getgrouplist \
111 - initgroups utimensat)
112 + initgroups utimensat posix_fadvise64)
114 dnl cygwin iconv.h defines iconv_open as libiconv_open
115 if test x"$ac_cv_func_iconv_open" != x"yes"; then
116 diff --git a/fileio.c b/fileio.c
119 @@ -31,6 +31,12 @@ extern int sparse_files;
120 static char last_byte;
121 static OFF_T sparse_seek = 0;
123 +#ifdef HAVE_POSIX_FADVISE64
124 +#define close(fd) fadv_close(fd)
125 +#define read(fd,buf,len) fadv_read(fd,buf,len)
126 +#define write(fd,buf,len) fadv_write(fd,buf,len)
129 int sparse_end(int f)
132 diff --git a/generator.c b/generator.c
135 @@ -113,6 +113,10 @@ static int need_retouch_dir_times;
136 static int need_retouch_dir_perms;
137 static const char *solo_file = NULL;
139 +#ifdef HAVE_POSIX_FADVISE64
140 +#define close(fd) fadv_close(fd)
144 TYPE_DIR, TYPE_SPECIAL, TYPE_DEVICE, TYPE_SYMLINK
146 diff --git a/options.c b/options.c
149 @@ -60,6 +60,7 @@ int preserve_uid = 0;
150 int preserve_gid = 0;
151 int preserve_times = 0;
157 @@ -671,6 +672,9 @@ void usage(enum logcode F)
158 rprintf(F," --backup-dir=DIR make backups into hierarchy based in DIR\n");
159 rprintf(F," --suffix=SUFFIX set backup suffix (default %s w/o --backup-dir)\n",BACKUP_SUFFIX);
160 rprintf(F," -u, --update skip files that are newer on the receiver\n");
161 +#ifdef HAVE_POSIX_FADVISE64
162 + rprintf(F," --drop-cache tell OS to drop caching of file data\n");
164 rprintf(F," --inplace update destination files in-place (SEE MAN PAGE)\n");
165 rprintf(F," --append append data onto shorter files\n");
166 rprintf(F," --append-verify like --append, but with old data in file checksum\n");
167 @@ -892,6 +896,9 @@ static struct poptOption long_options[] = {
168 {"no-one-file-system",'x',POPT_ARG_VAL, &one_file_system, 0, 0, 0 },
169 {"no-x", 'x', POPT_ARG_VAL, &one_file_system, 0, 0, 0 },
170 {"update", 'u', POPT_ARG_NONE, &update_only, 0, 0, 0 },
171 +#ifdef HAVE_POSIX_FADVISE64
172 + {"drop-cache", 0, POPT_ARG_NONE, &drop_cache, 0, 0, 0 },
174 {"existing", 0, POPT_ARG_NONE, &ignore_non_existing, 0, 0, 0 },
175 {"ignore-non-existing",0,POPT_ARG_NONE, &ignore_non_existing, 0, 0, 0 },
176 {"ignore-existing", 0, POPT_ARG_NONE, &ignore_existing, 0, 0, 0 },
177 @@ -2253,6 +2260,11 @@ void server_options(char **args, int *argc_p)
179 args[ac++] = "--sender";
181 +#ifdef HAVE_POSIX_FADVISE64
183 + args[ac++] = "--drop-cache";
189 diff --git a/receiver.c b/receiver.c
192 @@ -64,6 +64,10 @@ static flist_ndx_list batch_redo_list;
193 /* We're either updating the basis file or an identical copy: */
194 static int updating_basis_or_equiv;
196 +#ifdef HAVE_POSIX_FADVISE64
197 +#define close(fd) fadv_close(fd)
200 #define TMPNAME_SUFFIX ".XXXXXX"
201 #define TMPNAME_SUFFIX_LEN ((int)sizeof TMPNAME_SUFFIX - 1)
202 #define MAX_UNIQUE_NUMBER 999999
203 diff --git a/rsync.yo b/rsync.yo
206 @@ -359,6 +359,7 @@ to the detailed description below for a complete description. verb(
207 --super receiver attempts super-user activities
208 --fake-super store/recover privileged attrs using xattrs
209 -S, --sparse handle sparse files efficiently
210 + --drop-cache tell OS to drop caching of file data
211 -n, --dry-run perform a trial run with no changes made
212 -W, --whole-file copy files whole (w/o delta-xfer algorithm)
213 -x, --one-file-system don't cross filesystem boundaries
214 @@ -1127,6 +1128,10 @@ NOTE: Don't use this option when the destination is a Solaris "tmpfs"
215 filesystem. It seems to have problems seeking over null regions,
216 and ends up corrupting the files.
218 +dit(bf(--drop-cache)) Tell the OS to drop the caching of the file data. This
219 +prevents rsync from filling up the filesystem cache. This can sometimes help
220 +to make a system perform better by keeping non-rsync files in the disk cache.
222 dit(bf(-n, --dry-run)) This makes rsync perform a trial run that doesn't
223 make any changes (and produces mostly the same output as a real run). It
224 is most commonly used in combination with the bf(-v, --verbose) and/or
225 diff --git a/sender.c b/sender.c
228 @@ -47,6 +47,10 @@ extern struct file_list *cur_flist, *first_flist, *dir_flist;
230 BOOL extra_flist_sending_enabled;
232 +#ifdef HAVE_POSIX_FADVISE64
233 +#define close(fd) fadv_close(fd)
239 diff --git a/t_unsafe.c b/t_unsafe.c
242 @@ -28,6 +28,7 @@ int am_root = 0;
247 int human_readable = 0;
248 int preserve_perms = 0;
249 int preserve_executability = 0;
250 diff --git a/util.c b/util.c
256 extern int module_id;
257 +extern int drop_cache;
258 extern int modify_window;
259 extern int relative_paths;
260 extern int preserve_xattrs;
261 @@ -42,6 +43,131 @@ char curr_dir[MAXPATHLEN];
262 unsigned int curr_dir_len;
263 int curr_dir_depth; /* This is only set for a sanitizing daemon. */
265 +#ifdef HAVE_POSIX_FADVISE64
266 +#define FADV_BUFFER_SIZE 1024*1024*16
268 +static struct stat fadv_fd_stat[1024];
269 +static off_t fadv_fd_pos[1024];
270 +static int fadv_fd_init = 0;
271 +static int fadv_max_fd = 0;
272 +static int fadv_close_ring_tail = 0;
273 +static int fadv_close_ring_head = 0;
274 +static int fadv_close_ring_size = 0;
275 +static int fadv_close_ring[1024];
276 +static int fadv_close_buffer_size = 0;
278 +static void fadv_fd_init_func(void)
280 + if (fadv_fd_init == 0) {
283 + if (fadv_max_fd == 0){
284 + fadv_max_fd = sysconf(_SC_OPEN_MAX) - 20;
285 + if (fadv_max_fd < 0)
287 + if (fadv_max_fd > 1000)
288 + fadv_max_fd = 1000;
290 + for (i = 0; i < fadv_max_fd; i++) {
291 + fadv_fd_pos[i] = 0;
292 + fadv_fd_stat[i].st_dev = 0;
293 + fadv_fd_stat[i].st_ino = 0;
298 +static void fadv_drop(int fd, int sync)
303 + /* Trail 1 MB behind in dropping. we do this to make
304 + * sure that the same block or stripe does not have
305 + * to be written twice. */
306 + if (fd > fadv_max_fd)
308 + pos = lseek(fd, 0, SEEK_CUR) - 1024*1024;
309 + fadv_fd_init_func();
311 + if (fadv_fd_stat[fd].st_dev == sb.st_dev
312 + && fadv_fd_stat[fd].st_ino == sb.st_ino) {
313 + if (fadv_fd_pos[fd] < pos - FADV_BUFFER_SIZE) {
315 + /* If the file is not flushed to disk before calling fadvise,
316 + * then the Cache will not be freed and the advise gets ignored
317 + * this does give a severe hit on performance. If only there
318 + * was a way to mark cache so that it gets release once the data
319 + * is written to disk. */
322 + posix_fadvise64(fd, 0, pos, POSIX_FADV_DONTNEED);
323 + fadv_fd_pos[fd] = pos;
326 + fadv_fd_stat[fd].st_dev = sb.st_dev;
327 + fadv_fd_stat[fd].st_ino = sb.st_ino;
328 + fadv_fd_pos[fd] = 0;
332 +ssize_t fadv_write(int fd, const void *buf, size_t count)
334 + int ret = write(fd, buf, count);
340 +ssize_t fadv_read(int fd, void *buf, size_t count)
342 + int ret = read(fd, buf, count);
348 +void fadv_close_all(void)
350 + while (fadv_close_ring_size > 0){
351 + fdatasync(fadv_close_ring[fadv_close_ring_tail]);
352 + posix_fadvise64(fadv_close_ring[fadv_close_ring_tail], 0, 0,POSIX_FADV_DONTNEED);
353 + fadv_close_ring_size--;
354 + close(fadv_close_ring[fadv_close_ring_tail]);
355 + fadv_close_ring_tail = (fadv_close_ring_tail + 1) % fadv_max_fd;
356 + fadv_close_buffer_size = 0;
360 +int fadv_close(int fd)
363 + /* If the file is not flushed to disk before calling fadvise,
364 + * then the Cache will not be freed and the advise gets ignored
365 + * this does give a severe hit on performance. So instead of doing
366 + * it right away, we save us a copy of the filehandle and do it
367 + * some time before we are out of filehandles. This speeds
368 + * up operation for small files massively. It is directly
369 + * related to the number of spare file handles you have. */
370 + int newfd = dup(fd);
371 + int pos = lseek(fd, 0, SEEK_CUR);
372 + fadv_fd_init_func();
373 + fadv_close_buffer_size += pos - fadv_fd_pos[fd];
374 + fadv_close_ring[fadv_close_ring_head] = newfd;
375 + fadv_close_ring_head = (fadv_close_ring_head + 1) % fadv_max_fd;
376 + fadv_close_ring_size ++;
377 + if (fadv_close_ring_size == fadv_max_fd || fadv_close_buffer_size > 1024*1024 ){
378 + /* it seems fastest to drop things 'in groups' */
385 +#define close(fd) fadv_close(fd)
386 +#define read(fd,buf,len) fadv_read(fd,buf,len)
387 +#define write(fd,buf,len) fadv_write(fd,buf,len)
390 /* Set a fd into nonblocking mode. */
391 void set_nonblocking(int fd)