1 From: Tobi Oetiker tobi{at}oetiker.ch
4 I am using rsync for hard-link backup. I found that there is a
5 major problem with frequent backup filling up the file system cache
6 with all the data from the files being backed up. The effect is
7 that all the other 'sensible' data in the cache gets thrown out in
8 the process. This is rather unfortunate as the performance of the
9 system becomes very bad after running rsync.
11 Some research showed, that
13 posix_fadvise64(fd, 0, 0,POSIX_FADV_DONTNEED);
15 would tell the OS that it should not keep the file in cache. I
16 have written a patch for rsync that adds the
20 option which activates posix_fadvise64.
22 There are some caveats though:
24 * When calling posix_fadvise64 while writing a file, only the
25 part of the cache will be release which has already been
26 written to disk. This means we have to call fdatasync before
27 calling posix_fadvise64 and this will unfortunately slow down
28 operations considerably. On my test system I get 240 KByte/s.
30 The patch has been optimized, so that the impact on large files
31 will be considerably lowered by calling posix_fadvise64 only
32 after a few megabytes have been written.
34 * When reading a file which has been cached *Before* rsync read
35 it, the content of the file will be released from cache never
36 the less, which may not be intended. I have unfortunately not
37 found a method for determining if a file is in cache or not
40 I found that running rsync of an lvm snapshot is a good way
41 around this problem, since the snapshot data is cached
42 separately from the original. It has the additional benefit of
43 making the backups more consistent.
45 * I don't really know the rsync code, so it may be that the patch
46 is calling fadvise for files where this would not be necessary.
48 * The patch is tested only on Linux 2.6.18
50 If you have any input on this, please let me know.
52 You can get the latest edition of the patch from
54 http://tobi.oetiker.ch/patches/
63 * pass --drop-cache on to the remote server
68 @@ -148,7 +148,7 @@ void file_checksum(char *fname, char *su
69 mdfour_result(&m, (uchar *)sum);
82 extern int sparse_files;
84 static char last_byte;
85 static int last_sparse;
87 +extern int drop_cache;
94 do_lseek(f,-1,SEEK_CUR);
95 - return (write(f,&last_byte,1) == 1 ? 0 : -1);
96 + return (fadv_write(f,&last_byte,1) == 1 ? 0 : -1);
100 @@ -61,7 +64,7 @@ static int write_sparse(int f,char *buf,
104 - ret = write(f, buf + l1, len - (l1+l2));
105 + ret = fadv_write(f, buf + l1, len - (l1+l2));
106 if (ret == -1 || ret == 0)
108 else if (ret != (int) (len - (l1+l2)))
109 @@ -84,7 +87,7 @@ int flush_write_file(int f)
110 char *bp = wf_writeBuf;
112 while (wf_writeBufCnt > 0) {
113 - if ((ret = write(f, bp, wf_writeBufCnt)) < 0) {
114 + if ((ret = fadv_write(f, bp, wf_writeBufCnt)) < 0) {
118 @@ -235,7 +238,7 @@ char *map_ptr(struct map_struct *map, OF
119 map->p_len = window_size;
121 while (read_size > 0) {
122 - nread = read(map->fd, map->p + read_offset, read_size);
123 + nread = fadv_read(map->fd, map->p + read_offset, read_size);
126 map->status = nread ? errno : ENODATA;
129 @@ -1614,18 +1614,18 @@ static void recv_generator(char *fname,
131 if (inplace && make_backups > 0 && fnamecmp_type == FNAMECMP_FNAME) {
132 if (!(backupptr = get_backup_name(fname))) {
137 if (!(back_file = make_file(fname, NULL, NULL, 0, NO_FILTERS))) {
140 goto pretend_missing;
142 if (robust_unlink(backupptr) && errno != ENOENT) {
143 rsyserr(FERROR, errno, "unlink %s",
144 full_fname(backupptr));
145 unmake_file(back_file);
150 if ((f_copy = do_open(backupptr,
151 @@ -1633,7 +1633,7 @@ static void recv_generator(char *fname,
152 rsyserr(FERROR, errno, "open %s",
153 full_fname(backupptr));
154 unmake_file(back_file);
159 fnamecmp_type = FNAMECMP_BACKUP;
160 @@ -1695,7 +1695,7 @@ static void recv_generator(char *fname,
161 generate_and_send_sums(fd, sx.st.st_size, f_out, f_copy);
165 + fadv_close(f_copy);
166 set_file_attrs(backupptr, back_file, NULL, NULL, 0);
168 rprintf(FINFO, "backed up %s to %s\n",
169 @@ -1704,7 +1704,7 @@ static void recv_generator(char *fname,
170 unmake_file(back_file);
180 @@ -57,6 +57,7 @@ int preserve_gid = 0;
181 int preserve_times = 0;
182 int omit_dir_times = 0;
188 @@ -310,6 +311,7 @@ void usage(enum logcode F)
189 rprintf(F," --backup-dir=DIR make backups into hierarchy based in DIR\n");
190 rprintf(F," --suffix=SUFFIX set backup suffix (default %s w/o --backup-dir)\n",BACKUP_SUFFIX);
191 rprintf(F," -u, --update skip files that are newer on the receiver\n");
192 + rprintf(F," --drop-cache tell OS to drop caching of file data\n");
193 rprintf(F," --inplace update destination files in-place (SEE MAN PAGE)\n");
194 rprintf(F," --append append data onto shorter files\n");
195 rprintf(F," -d, --dirs transfer directories without recursing\n");
196 @@ -506,6 +508,7 @@ static struct poptOption long_options[]
197 {"size-only", 0, POPT_ARG_NONE, &size_only, 0, 0, 0 },
198 {"one-file-system", 'x', POPT_ARG_NONE, 0, 'x', 0, 0 },
199 {"update", 'u', POPT_ARG_NONE, &update_only, 0, 0, 0 },
200 + {"drop-cache", 0, POPT_ARG_NONE, &drop_cache, 0, 0, 0 },
201 {"existing", 0, POPT_ARG_NONE, &ignore_non_existing, 0, 0, 0 },
202 {"ignore-non-existing",0,POPT_ARG_NONE, &ignore_non_existing, 0, 0, 0 },
203 {"ignore-existing", 0, POPT_ARG_NONE, &ignore_existing, 0, 0, 0 },
204 @@ -1603,6 +1606,9 @@ void server_options(char **args,int *arg
206 args[ac++] = "--sender";
209 + args[ac++] = "--drop-cache";
213 for (i = 0; i < verbose; i++)
216 @@ -554,7 +554,7 @@ int recv_files(int f_in, char *local_nam
217 rsyserr(FERROR, errno, "fstat %s failed",
218 full_fname(fnamecmp));
219 discard_receive_data(f_in, F_LENGTH(file));
223 send_msg_int(MSG_NO_SEND, ndx);
225 @@ -569,14 +569,14 @@ int recv_files(int f_in, char *local_nam
226 rprintf(FERROR,"recv_files: %s is a directory\n",
227 full_fname(fnamecmp));
228 discard_receive_data(f_in, F_LENGTH(file));
232 send_msg_int(MSG_NO_SEND, ndx);
236 if (fd1 != -1 && !S_ISREG(st.st_mode)) {
242 @@ -604,7 +604,7 @@ int recv_files(int f_in, char *local_nam
244 discard_receive_data(f_in, F_LENGTH(file));
249 send_msg_int(MSG_NO_SEND, ndx);
251 @@ -613,7 +613,7 @@ int recv_files(int f_in, char *local_nam
252 if (!get_tmpname(fnametmp,fname)) {
253 discard_receive_data(f_in, F_LENGTH(file));
258 send_msg_int(MSG_NO_SEND, ndx);
260 @@ -641,7 +641,7 @@ int recv_files(int f_in, char *local_nam
261 full_fname(fnametmp));
262 discard_receive_data(f_in, F_LENGTH(file));
267 send_msg_int(MSG_NO_SEND, ndx);
269 @@ -663,8 +663,8 @@ int recv_files(int f_in, char *local_nam
270 log_item(log_code, file, &initial_stats, iflags, NULL);
274 - if (close(fd2) < 0) {
276 + if (fadv_close(fd2) < 0) {
277 rsyserr(FERROR, errno, "close failed on %s",
278 full_fname(fnametmp));
279 exit_cleanup(RERR_FILEIO);
282 @@ -335,6 +335,7 @@ to the detailed description below for a
283 --super receiver attempts super-user activities
284 --fake-super store/recover privileged attrs using xattrs
285 -S, --sparse handle sparse files efficiently
286 + --drop-cache tell OS to drop caching of file data
287 -n, --dry-run show what would have been transferred
288 -W, --whole-file copy files whole (without rsync algorithm)
289 -x, --one-file-system don't cross filesystem boundaries
290 @@ -956,6 +957,10 @@ NOTE: Don't use this option when the des
291 filesystem. It doesn't seem to handle seeks over null regions
292 correctly and ends up corrupting the files.
294 +dit(bf(--drop-cache)) Tell the OS to drop the caching of the file data. This
295 +prevents rsync from filling up the filesystem cache. This can sometimes help
296 +to make a system perform better by keeping non-rsync files in the disk cache.
298 dit(bf(-n, --dry-run)) This tells rsync to not do any file transfers,
299 instead it will just report the actions it would have taken.
303 @@ -307,7 +307,7 @@ void send_files(int f_in, int f_out)
304 io_error |= IOERR_GENERAL;
305 rsyserr(FERROR, errno, "fstat failed");
309 exit_cleanup(RERR_PROTOCOL);
312 @@ -351,7 +351,7 @@ void send_files(int f_in, int f_out)
323 @@ -28,6 +28,7 @@ int am_root = 0;
328 int preserve_perms = 0;
337 +extern int drop_cache;
338 extern int module_id;
339 extern int modify_window;
340 extern int relative_paths;
341 @@ -39,6 +40,88 @@ char curr_dir[MAXPATHLEN];
342 unsigned int curr_dir_len;
343 int curr_dir_depth; /* This is only set for a sanitizing daemon. */
345 +extern int drop_cache;
347 +static struct stat fadv_fd_stat[255];
348 +static off_t fadv_fd_pos[255];
349 +static int fadv_fd_init = 0;
351 +static void fadv_fd_init_func(void){
352 + if (fadv_fd_init ==0){
355 + for (i=0;i<255;i++){
356 + fadv_fd_pos[i] = 0;
357 + fadv_fd_stat[i].st_dev = 0;
358 + fadv_fd_stat[i].st_ino = 0;
363 +static void fadv_drop(int fd, int sync){
365 + /* trail 1 MB behind in dropping. we do this to make
366 + sure that the same block or stripe does not have
367 + to be written twice */
368 + int pos = lseek(fd,0,SEEK_CUR) - 1024*1024;
372 + fadv_fd_init_func();
374 + if ( fadv_fd_stat[fd].st_dev == stat.st_dev
375 + && fadv_fd_stat[fd].st_ino == stat.st_ino ) {
376 + if ( fadv_fd_pos[fd] < pos - 16*1024*1024 ) {
378 + /* if the file is not flushed to disk before calling fadvise,
379 + then the Cache will not be freed and the advise gets ignored
380 + this does give a severe hit on performance. If only there
381 + was a way to mark cache so that it gets release once the data
382 + is written to disk. */
385 + posix_fadvise64(fd, 0, pos, POSIX_FADV_DONTNEED);
386 + fadv_fd_pos[fd] = pos;
389 + fadv_fd_stat[fd].st_dev = stat.st_dev;
390 + fadv_fd_stat[fd].st_ino = stat.st_ino;
391 + fadv_fd_pos[fd] = 0;
395 +ssize_t fadv_write(int fd, const void *buf, size_t count)
397 + int ret = write(fd, buf, count);
404 +ssize_t fadv_read(int fd, void *buf, size_t count)
406 + int ret = read(fd, buf, count);
413 +int fadv_close(int fd){
415 + /* drop everything after we are done */
416 + /* if the file is not flushed to disk before calling fadvise,
417 + then the Cache will not be freed and the advise gets ignored
418 + this does give a severe hit on performance. If only there
419 + was a way to mark cache so that it gets release once the data
420 + is written to disk. */
422 + posix_fadvise64(fd, 0, 0,POSIX_FADV_DONTNEED);
427 /* Set a fd into nonblocking mode. */
428 void set_nonblocking(int fd)
430 @@ -221,7 +304,7 @@ int full_write(int desc, const char *ptr
434 - int written = write(desc, ptr, len);
435 + int written = fadv_write(desc, ptr, len);
439 @@ -253,7 +336,7 @@ static int safe_read(int desc, char *ptr
443 - n_chars = read(desc, ptr, len);
444 + n_chars = fadv_read(desc, ptr, len);
445 } while (n_chars < 0 && errno == EINTR);
448 @@ -284,32 +367,32 @@ int copy_file(const char *source, const
449 ofd = do_open(dest, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, mode);
451 rsyserr(FERROR, errno, "open %s", full_fname(dest));
457 while ((len = safe_read(ifd, buf, sizeof buf)) > 0) {
458 if (full_write(ofd, buf, len) < 0) {
459 rsyserr(FERROR, errno, "write %s", full_fname(dest));
469 rsyserr(FERROR, errno, "read %s", full_fname(source));
477 - if (close(ifd) < 0) {
478 + if (fadv_close(ifd) < 0) {
479 rsyserr(FINFO, errno, "close failed on %s",
483 - if (close(ofd) < 0) {
484 + if (fadv_close(ofd) < 0) {
485 rsyserr(FERROR, errno, "close failed on %s",