- Made the new options work with a daemon-mode transfer.
[rsync/rsync-patches.git] / drop-cache.diff
CommitLineData
c966e8e8
WD
1From: Tobi Oetiker tobi{at}oetiker.ch
2Date: 2007-04-23
3
4I am using rsync for hard-link backup. I found that there is a
5major problem with frequent backup filling up the file system cache
6with all the data from the files being backed up. The effect is
7that all the other 'sensible' data in the cache gets thrown out in
8the process. This is rather unfortunate as the performance of the
9system becomes very bad after running rsync.
10
11Some research showed, that
12
13 posix_fadvise64(fd, 0, 0,POSIX_FADV_DONTNEED);
14
15would tell the OS that it should not keep the file in cache. I
16have written a patch for rsync that adds the
17
18 --drop-cache
19
20option which activates posix_fadvise64.
21
22There are some caveats though:
23
24 * When calling posix_fadvise64 while writing a file, only the
25 part of the cache will be release which has already been
26 written to disk. This means we have to call fdatasync before
27 calling posix_fadvise64 and this will unfortunately slow down
28 operations considerably. On my test system I get 240 KByte/s.
29
30 The patch has been optimized, so that the impact on large files
31 will be considerably lowered by calling posix_fadvise64 only
32 after a few megabytes have been written.
33
34 * When reading a file which has been cached *Before* rsync read
35 it, the content of the file will be released from cache never
36 the less, which may not be intended. I have unfortunately not
37 found a method for determining if a file is in cache or not
38 (ideas?)
39
40 I found that running rsync of an lvm snapshot is a good way
41 around this problem, since the snapshot data is cached
42 separately from the original. It has the additional benefit of
43 making the backups more consistent.
44
45 * I don't really know the rsync code, so it may be that the patch
46 is calling fadvise for files where this would not be necessary.
47
48 * The patch is tested only on Linux 2.6.18
49
50If you have any input on this, please let me know.
51
52You can get the latest edition of the patch from
53
54 http://tobi.oetiker.ch/patches/
55
56cheers
57tobi
58
59Changes:
60
61 2007-04-23
62
63* pass --drop-cache on to the remote server
64* make test works now
65
66--- old/checksum.c
67+++ new/checksum.c
68@@ -148,7 +148,7 @@ void file_checksum(char *fname, char *su
69 mdfour_result(&m, (uchar *)sum);
70 }
71
72- close(fd);
73+ fadv_close(fd);
74 unmap_file(buf);
75 }
76
77--- old/fileio.c
78+++ new/fileio.c
79@@ -26,15 +26,18 @@
80 #endif
81
82 extern int sparse_files;
83-
84 static char last_byte;
85 static int last_sparse;
86
87+extern int drop_cache;
88+
89+
90+
91 int sparse_end(int f)
92 {
93 if (last_sparse) {
94 do_lseek(f,-1,SEEK_CUR);
95- return (write(f,&last_byte,1) == 1 ? 0 : -1);
96+ return (fadv_write(f,&last_byte,1) == 1 ? 0 : -1);
97 }
98 last_sparse = 0;
99 return 0;
100@@ -61,7 +64,7 @@ static int write_sparse(int f,char *buf,
101 if (l1 == len)
102 return len;
103
104- ret = write(f, buf + l1, len - (l1+l2));
105+ ret = fadv_write(f, buf + l1, len - (l1+l2));
106 if (ret == -1 || ret == 0)
107 return ret;
108 else if (ret != (int) (len - (l1+l2)))
109@@ -84,7 +87,7 @@ int flush_write_file(int f)
110 char *bp = wf_writeBuf;
111
112 while (wf_writeBufCnt > 0) {
113- if ((ret = write(f, bp, wf_writeBufCnt)) < 0) {
114+ if ((ret = fadv_write(f, bp, wf_writeBufCnt)) < 0) {
115 if (errno == EINTR)
116 continue;
117 return ret;
118@@ -235,7 +238,7 @@ char *map_ptr(struct map_struct *map, OF
119 map->p_len = window_size;
120
121 while (read_size > 0) {
122- nread = read(map->fd, map->p + read_offset, read_size);
123+ nread = fadv_read(map->fd, map->p + read_offset, read_size);
124 if (nread <= 0) {
125 if (!map->status)
126 map->status = nread ? errno : ENODATA;
127--- old/generator.c
128+++ new/generator.c
129@@ -1614,18 +1614,18 @@ static void recv_generator(char *fname,
130
131 if (inplace && make_backups > 0 && fnamecmp_type == FNAMECMP_FNAME) {
132 if (!(backupptr = get_backup_name(fname))) {
133- close(fd);
134+ fadv_close(fd);
135 goto cleanup;
136 }
137 if (!(back_file = make_file(fname, NULL, NULL, 0, NO_FILTERS))) {
138- close(fd);
139+ fadv_close(fd);
140 goto pretend_missing;
141 }
142 if (robust_unlink(backupptr) && errno != ENOENT) {
143 rsyserr(FERROR, errno, "unlink %s",
144 full_fname(backupptr));
145 unmake_file(back_file);
146- close(fd);
147+ fadv_close(fd);
148 goto cleanup;
149 }
150 if ((f_copy = do_open(backupptr,
151@@ -1633,7 +1633,7 @@ static void recv_generator(char *fname,
152 rsyserr(FERROR, errno, "open %s",
153 full_fname(backupptr));
154 unmake_file(back_file);
155- close(fd);
156+ fadv_close(fd);
157 goto cleanup;
158 }
159 fnamecmp_type = FNAMECMP_BACKUP;
160@@ -1695,7 +1695,7 @@ static void recv_generator(char *fname,
161 generate_and_send_sums(fd, sx.st.st_size, f_out, f_copy);
162
163 if (f_copy >= 0) {
164- close(f_copy);
165+ fadv_close(f_copy);
166 set_file_attrs(backupptr, back_file, NULL, NULL, 0);
167 if (verbose > 1) {
168 rprintf(FINFO, "backed up %s to %s\n",
169@@ -1704,7 +1704,7 @@ static void recv_generator(char *fname,
170 unmake_file(back_file);
171 }
172
173- close(fd);
174+ fadv_close(fd);
175
176 cleanup:
177 #ifdef SUPPORT_ACLS
178--- old/options.c
179+++ new/options.c
180@@ -57,6 +57,7 @@ int preserve_gid = 0;
181 int preserve_times = 0;
182 int omit_dir_times = 0;
183 int update_only = 0;
184+int drop_cache = 0;
185 int cvs_exclude = 0;
186 int dry_run = 0;
187 int do_xfers = 1;
188@@ -310,6 +311,7 @@ void usage(enum logcode F)
189 rprintf(F," --backup-dir=DIR make backups into hierarchy based in DIR\n");
190 rprintf(F," --suffix=SUFFIX set backup suffix (default %s w/o --backup-dir)\n",BACKUP_SUFFIX);
191 rprintf(F," -u, --update skip files that are newer on the receiver\n");
192+ rprintf(F," --drop-cache tell OS to drop caching of file data\n");
193 rprintf(F," --inplace update destination files in-place (SEE MAN PAGE)\n");
194 rprintf(F," --append append data onto shorter files\n");
195 rprintf(F," -d, --dirs transfer directories without recursing\n");
196@@ -506,6 +508,7 @@ static struct poptOption long_options[]
197 {"size-only", 0, POPT_ARG_NONE, &size_only, 0, 0, 0 },
198 {"one-file-system", 'x', POPT_ARG_NONE, 0, 'x', 0, 0 },
199 {"update", 'u', POPT_ARG_NONE, &update_only, 0, 0, 0 },
200+ {"drop-cache", 0, POPT_ARG_NONE, &drop_cache, 0, 0, 0 },
201 {"existing", 0, POPT_ARG_NONE, &ignore_non_existing, 0, 0, 0 },
202 {"ignore-non-existing",0,POPT_ARG_NONE, &ignore_non_existing, 0, 0, 0 },
203 {"ignore-existing", 0, POPT_ARG_NONE, &ignore_existing, 0, 0, 0 },
204@@ -1603,6 +1606,9 @@ void server_options(char **args,int *arg
205 if (!am_sender)
206 args[ac++] = "--sender";
207
208+ if (drop_cache)
209+ args[ac++] = "--drop-cache";
210+
211 x = 1;
212 argstr[0] = '-';
213 for (i = 0; i < verbose; i++)
214--- old/receiver.c
215+++ new/receiver.c
216@@ -554,7 +554,7 @@ int recv_files(int f_in, char *local_nam
217 rsyserr(FERROR, errno, "fstat %s failed",
218 full_fname(fnamecmp));
219 discard_receive_data(f_in, F_LENGTH(file));
220- close(fd1);
221+ fadv_close(fd1);
222 if (inc_recurse)
223 send_msg_int(MSG_NO_SEND, ndx);
224 continue;
225@@ -569,14 +569,14 @@ int recv_files(int f_in, char *local_nam
226 rprintf(FERROR,"recv_files: %s is a directory\n",
227 full_fname(fnamecmp));
228 discard_receive_data(f_in, F_LENGTH(file));
229- close(fd1);
230+ fadv_close(fd1);
231 if (inc_recurse)
232 send_msg_int(MSG_NO_SEND, ndx);
233 continue;
234 }
235
236 if (fd1 != -1 && !S_ISREG(st.st_mode)) {
237- close(fd1);
238+ fadv_close(fd1);
239 fd1 = -1;
240 }
241
242@@ -604,7 +604,7 @@ int recv_files(int f_in, char *local_nam
243 full_fname(fname));
244 discard_receive_data(f_in, F_LENGTH(file));
245 if (fd1 != -1)
246- close(fd1);
247+ fadv_close(fd1);
248 if (inc_recurse)
249 send_msg_int(MSG_NO_SEND, ndx);
250 continue;
251@@ -613,7 +613,7 @@ int recv_files(int f_in, char *local_nam
252 if (!get_tmpname(fnametmp,fname)) {
253 discard_receive_data(f_in, F_LENGTH(file));
254 if (fd1 != -1)
255- close(fd1);
256+ fadv_close(fd1);
257 if (inc_recurse)
258 send_msg_int(MSG_NO_SEND, ndx);
259 continue;
260@@ -641,7 +641,7 @@ int recv_files(int f_in, char *local_nam
261 full_fname(fnametmp));
262 discard_receive_data(f_in, F_LENGTH(file));
263 if (fd1 != -1)
264- close(fd1);
265+ fadv_close(fd1);
266 if (inc_recurse)
267 send_msg_int(MSG_NO_SEND, ndx);
268 continue;
269@@ -663,8 +663,8 @@ int recv_files(int f_in, char *local_nam
270 log_item(log_code, file, &initial_stats, iflags, NULL);
271
272 if (fd1 != -1)
273- close(fd1);
274- if (close(fd2) < 0) {
275+ fadv_close(fd1);
276+ if (fadv_close(fd2) < 0) {
277 rsyserr(FERROR, errno, "close failed on %s",
278 full_fname(fnametmp));
279 exit_cleanup(RERR_FILEIO);
280--- old/rsync.yo
281+++ new/rsync.yo
282@@ -335,6 +335,7 @@ to the detailed description below for a
283 --super receiver attempts super-user activities
284 --fake-super store/recover privileged attrs using xattrs
285 -S, --sparse handle sparse files efficiently
286+ --drop-cache tell OS to drop caching of file data
287 -n, --dry-run show what would have been transferred
288 -W, --whole-file copy files whole (without rsync algorithm)
289 -x, --one-file-system don't cross filesystem boundaries
290@@ -956,6 +957,10 @@ NOTE: Don't use this option when the des
291 filesystem. It doesn't seem to handle seeks over null regions
292 correctly and ends up corrupting the files.
293
294+dit(bf(--drop-cache)) Tell the OS to drop the caching of the file data. This
295+prevents rsync from filling up the filesystem cache. This can sometimes help
296+to make a system perform better by keeping non-rsync files in the disk cache.
297+
298 dit(bf(-n, --dry-run)) This tells rsync to not do any file transfers,
299 instead it will just report the actions it would have taken.
300
301--- old/sender.c
302+++ new/sender.c
303@@ -307,7 +307,7 @@ void send_files(int f_in, int f_out)
304 io_error |= IOERR_GENERAL;
305 rsyserr(FERROR, errno, "fstat failed");
306 free_sums(s);
307- close(fd);
308+ fadv_close(fd);
309 exit_cleanup(RERR_PROTOCOL);
310 }
311
312@@ -351,7 +351,7 @@ void send_files(int f_in, int f_out)
313 full_fname(fname));
314 }
315 }
316- close(fd);
317+ fadv_close(fd);
318
319 free_sums(s);
320
321--- old/t_unsafe.c
322+++ new/t_unsafe.c
323@@ -28,6 +28,7 @@ int am_root = 0;
324 int read_only = 0;
325 int list_only = 0;
326 int verbose = 0;
327+int drop_cache = 0;
328 int preserve_perms = 0;
329
330 int
331--- old/util.c
332+++ new/util.c
333@@ -24,6 +24,7 @@
334
335 extern int verbose;
336 extern int dry_run;
337+extern int drop_cache;
338 extern int module_id;
339 extern int modify_window;
340 extern int relative_paths;
341@@ -39,6 +40,88 @@ char curr_dir[MAXPATHLEN];
342 unsigned int curr_dir_len;
343 int curr_dir_depth; /* This is only set for a sanitizing daemon. */
344
345+extern int drop_cache;
346+
347+static struct stat fadv_fd_stat[255];
348+static off_t fadv_fd_pos[255];
349+static int fadv_fd_init = 0;
350+
351+static void fadv_fd_init_func(void){
352+ if (fadv_fd_init ==0){
353+ int i;
354+ fadv_fd_init = 1;
355+ for (i=0;i<255;i++){
356+ fadv_fd_pos[i] = 0;
357+ fadv_fd_stat[i].st_dev = 0;
358+ fadv_fd_stat[i].st_ino = 0;
359+ }
360+ }
361+}
362+
363+static void fadv_drop(int fd, int sync){
364+ struct stat stat;
365+ /* trail 1 MB behind in dropping. we do this to make
366+ sure that the same block or stripe does not have
367+ to be written twice */
368+ int pos = lseek(fd,0,SEEK_CUR) - 1024*1024;
369+ if (fd > 255){
370+ return;
371+ }
372+ fadv_fd_init_func();
373+ fstat(fd,&stat);
374+ if ( fadv_fd_stat[fd].st_dev == stat.st_dev
375+ && fadv_fd_stat[fd].st_ino == stat.st_ino ) {
376+ if ( fadv_fd_pos[fd] < pos - 16*1024*1024 ) {
377+ if (sync) {
378+ /* if the file is not flushed to disk before calling fadvise,
379+ then the Cache will not be freed and the advise gets ignored
380+ this does give a severe hit on performance. If only there
381+ was a way to mark cache so that it gets release once the data
382+ is written to disk. */
383+ fdatasync(fd);
384+ }
385+ posix_fadvise64(fd, 0, pos, POSIX_FADV_DONTNEED);
386+ fadv_fd_pos[fd] = pos;
387+ }
388+ } else {
389+ fadv_fd_stat[fd].st_dev = stat.st_dev;
390+ fadv_fd_stat[fd].st_ino = stat.st_ino;
391+ fadv_fd_pos[fd] = 0;
392+ }
393+}
394+
395+ssize_t fadv_write(int fd, const void *buf, size_t count)
396+{
397+ int ret = write(fd, buf, count);
398+ if (drop_cache) {
399+ fadv_drop(fd,1);
400+ }
401+ return ret;
402+}
403+
404+ssize_t fadv_read(int fd, void *buf, size_t count)
405+{
406+ int ret = read(fd, buf, count);
407+ if (drop_cache) {
408+ fadv_drop(fd,0);
409+ }
410+ return ret;
411+}
412+
413+int fadv_close(int fd){
414+ if (drop_cache) {
415+ /* drop everything after we are done */
416+ /* if the file is not flushed to disk before calling fadvise,
417+ then the Cache will not be freed and the advise gets ignored
418+ this does give a severe hit on performance. If only there
419+ was a way to mark cache so that it gets release once the data
420+ is written to disk. */
421+ fdatasync(fd);
422+ posix_fadvise64(fd, 0, 0,POSIX_FADV_DONTNEED);
423+ }
424+ return close(fd);
425+}
426+
427 /* Set a fd into nonblocking mode. */
428 void set_nonblocking(int fd)
429 {
430@@ -221,7 +304,7 @@ int full_write(int desc, const char *ptr
431
432 total_written = 0;
433 while (len > 0) {
434- int written = write(desc, ptr, len);
435+ int written = fadv_write(desc, ptr, len);
436 if (written < 0) {
437 if (errno == EINTR)
438 continue;
439@@ -253,7 +336,7 @@ static int safe_read(int desc, char *ptr
440 return len;
441
442 do {
443- n_chars = read(desc, ptr, len);
444+ n_chars = fadv_read(desc, ptr, len);
445 } while (n_chars < 0 && errno == EINTR);
446
447 return n_chars;
448@@ -284,32 +367,32 @@ int copy_file(const char *source, const
449 ofd = do_open(dest, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, mode);
450 if (ofd == -1) {
451 rsyserr(FERROR, errno, "open %s", full_fname(dest));
452- close(ifd);
453+ fadv_close(ifd);
454 return -1;
455 }
456
457 while ((len = safe_read(ifd, buf, sizeof buf)) > 0) {
458 if (full_write(ofd, buf, len) < 0) {
459 rsyserr(FERROR, errno, "write %s", full_fname(dest));
460- close(ifd);
461- close(ofd);
462+ fadv_close(ifd);
463+ fadv_close(ofd);
464 return -1;
465 }
466 }
467
468 if (len < 0) {
469 rsyserr(FERROR, errno, "read %s", full_fname(source));
470- close(ifd);
471- close(ofd);
472+ fadv_close(ifd);
473+ fadv_close(ofd);
474 return -1;
475 }
476
477- if (close(ifd) < 0) {
478+ if (fadv_close(ifd) < 0) {
479 rsyserr(FINFO, errno, "close failed on %s",
480 full_fname(source));
481 }
482
483- if (close(ofd) < 0) {
484+ if (fadv_close(ofd) < 0) {
485 rsyserr(FERROR, errno, "close failed on %s",
486 full_fname(dest));
487 return -1;