Modified to work with the trunk's hash_search() optimization.
[rsync/rsync-patches.git] / inplace.diff
1 Patch from Mark Curtis to implement the --inplace option.
2 Improved by Wayne Davison.
3
4 Run these commands after applying the patch:
5
6     autoconf
7     autoheader
8     ./configure
9     make
10
11 Optional:
12
13     make rsync.1
14
15 --- configure.in        30 Apr 2004 18:03:33 -0000      1.196
16 +++ configure.in        3 Jul 2004 00:16:50 -0000
17 @@ -442,7 +442,7 @@ dnl AC_FUNC_MEMCMP
18  AC_FUNC_UTIME_NULL
19  AC_FUNC_ALLOCA
20  AC_CHECK_FUNCS(waitpid wait4 getcwd strdup strerror chown chmod mknod mkfifo \
21 -    fchmod fstat strchr readlink link utime utimes strftime mtrace \
22 +    fchmod fstat strchr readlink link utime utimes strftime mtrace ftruncate \
23      memmove lchown vsnprintf snprintf asprintf setsid glob strpbrk \
24      strlcat strlcpy strtol mallinfo getgroups setgroups geteuid getegid)
25  
26 --- match.c     2 Jul 2004 23:35:30 -0000       1.63
27 +++ match.c     3 Jul 2004 00:16:50 -0000
28 @@ -23,6 +23,7 @@ extern int verbose;
29  extern int am_server;
30  extern int do_progress;
31  extern int checksum_seed;
32 +extern int inplace;
33  
34  typedef unsigned short tag;
35  
36 @@ -52,9 +53,23 @@ static size_t *tag_table;
37  #define gettag2(s1,s2) (((s1) + (s2)) & 0xFFFF)
38  #define gettag(sum) gettag2((sum)&0xFFFF,(sum)>>16)
39  
40 +static struct sum_buf *current_sums;
41 +static int current_s2length;
42 +
43  static int compare_targets(struct target *t1,struct target *t2)
44  {
45 -       return (int)t1->t - (int)t2->t;
46 +       struct sum_buf *s1, *s2;
47 +
48 +       if (t1->t != t2->t)
49 +               return t1->t < t2->t ? -1 : 1;
50 +
51 +       s1 = &current_sums[t1->i];
52 +       s2 = &current_sums[t2->i];
53 +
54 +       if (s1->sum1 != s2->sum1)
55 +               return s1->sum1 < s2->sum1 ? -1 : 1;
56 +
57 +       return memcmp(s1->sum2, s2->sum2, current_s2length);
58  }
59  
60  
61 @@ -74,6 +89,8 @@ static void build_hash_table(struct sum_
62                 targets[i].t = gettag(s->sums[i].sum1);
63         }
64  
65 +       current_sums = s->sums;
66 +       current_s2length = s->s2length;
67         qsort(targets,s->count,sizeof(targets[0]),(int (*)())compare_targets);
68  
69         for (i = 0; i < TABLESIZE; i++)
70 @@ -192,12 +209,24 @@ static void hash_search(int f,struct sum
71                         unsigned int l;
72                         size_t i = targets[j].i;
73  
74 -                       if (sum != s->sums[i].sum1)
75 +                       if (sum != s->sums[i].sum1) {
76 +                               if (done_csum2)
77 +                                       break;
78                                 continue;
79 +                       }
80  
81                         /* also make sure the two blocks are the same length */
82                         l = MIN((OFF_T)s->blength, len-offset);
83 -                       if (l != s->sums[i].len)
84 +                       if (l != s->sums[i].len) {
85 +                               if (done_csum2)
86 +                                       break;
87 +                               continue;
88 +                       }
89 +
90 +                       /* inplace: ensure chunk's offset is either >= our
91 +                        * offset or that the data didn't move. */
92 +                       if (inplace && s->sums[i].offset < offset
93 +                           && s->sums[i].i >= 0)
94                                 continue;
95  
96                         if (verbose > 3)
97 @@ -215,15 +244,39 @@ static void hash_search(int f,struct sum
98                                 continue;
99                         }
100  
101 -                       /* we've found a match, but now check to see
102 -                        * if last_i can hint at a better match */
103 +                       /* We've found a match, but now check to see if last_i
104 +                        * can hint at a better match.  If inplace is enabled,
105 +                        * the best possible match is one with an identical
106 +                        * offset, so we prefer that over a last_i+1 match. */
107 +                       if (inplace) {
108 +                               for (; j < s->count && targets[j].t == t; j++) {
109 +                                       size_t i2 = targets[j].i;
110 +                                       if (s->sums[i2].offset != offset)
111 +                                               continue;
112 +                                       if (i2 != i) {
113 +                                               if (sum != s->sums[i2].sum1)
114 +                                                       break;
115 +                                               if (memcmp(sum2, s->sums[i2].sum2,
116 +                                                          s->s2length) != 0)
117 +                                                       break;
118 +                                               i = i2;
119 +                                       }
120 +                                       /* Use this as a flag to indicate that
121 +                                        * this chunk was at the same offset on
122 +                                        * both the sender and the receiver. */
123 +                                       s->sums[i].i = -1;
124 +                                       goto set_last_i;
125 +                               }
126 +                       }
127                         if (i != last_i + 1 && last_i + 1 < s->count
128 +                           && (!inplace || s->sums[last_i+1].offset >= offset || s->sums[last_i+1].i < 0)
129                             && sum == s->sums[last_i+1].sum1
130                             && memcmp(sum2, s->sums[last_i+1].sum2, s->s2length) == 0) {
131                                 /* we've found an adjacent match - the RLL coder
132                                  * will be happy */
133                                 i = last_i + 1;
134                         }
135 +                   set_last_i:
136                         last_i = i;
137  
138                         matched(f,s,buf,offset,i);
139 --- options.c   20 Jun 2004 19:47:05 -0000      1.157
140 +++ options.c   3 Jul 2004 00:16:50 -0000
141 @@ -94,6 +94,7 @@ int ignore_errors = 0;
142  int modify_window = 0;
143  int blocking_io = -1;
144  int checksum_seed = 0;
145 +int inplace = 0;
146  unsigned int block_size = 0;
147  
148  
149 @@ -149,6 +150,7 @@ char *bind_address;
150  static void print_rsync_version(enum logcode f)
151  {
152         char const *got_socketpair = "no ";
153 +       char const *have_inplace = "no ";
154         char const *hardlinks = "no ";
155         char const *links = "no ";
156         char const *ipv6 = "no ";
157 @@ -158,6 +160,10 @@ static void print_rsync_version(enum log
158         got_socketpair = "";
159  #endif
160  
161 +#if HAVE_FTRUNCATE
162 +       have_inplace = "";
163 +#endif
164 +
165  #if SUPPORT_HARD_LINKS
166         hardlinks = "";
167  #endif
168 @@ -183,8 +189,8 @@ static void print_rsync_version(enum log
169         /* Note that this field may not have type ino_t.  It depends
170          * on the complicated interaction between largefile feature
171          * macros. */
172 -       rprintf(f, "              %sIPv6, %d-bit system inums, %d-bit internal inums\n",
173 -               ipv6,
174 +       rprintf(f, "              %sinplace, %sIPv6, %d-bit system inums, %d-bit internal inums\n",
175 +               have_inplace, ipv6,
176                 (int) (sizeof dumstat->st_ino * 8),
177                 (int) (sizeof (uint64) * 8));
178  #ifdef MAINTAINER_MODE
179 @@ -234,6 +240,7 @@ void usage(enum logcode F)
180    rprintf(F,"     --backup-dir            make backups into this directory\n");
181    rprintf(F,"     --suffix=SUFFIX         backup suffix (default %s w/o --backup-dir)\n",BACKUP_SUFFIX);
182    rprintf(F," -u, --update                update only (don't overwrite newer files)\n");
183 +  rprintf(F,"     --inplace               update the destination file inplace (see man page)\n");
184    rprintf(F," -K, --keep-dirlinks         treat symlinked dir on receiver as dir\n");
185    rprintf(F," -l, --links                 copy symlinks as symlinks\n");
186    rprintf(F," -L, --copy-links            copy the referent of all symlinks\n");
187 @@ -341,6 +348,7 @@ static struct poptOption long_options[] 
188    {"sparse",          'S', POPT_ARG_NONE,   &sparse_files, 0, 0, 0 },
189    {"cvs-exclude",     'C', POPT_ARG_NONE,   &cvs_exclude, 0, 0, 0 },
190    {"update",          'u', POPT_ARG_NONE,   &update_only, 0, 0, 0 },
191 +  {"inplace",          0,  POPT_ARG_NONE,   &inplace, 0, 0, 0 },
192    {"keep-dirlinks",   'K', POPT_ARG_NONE,   &keep_dirlinks, 0, 0, 0 },
193    {"links",           'l', POPT_ARG_NONE,   &preserve_links, 0, 0, 0 },
194    {"copy-links",      'L', POPT_ARG_NONE,   &copy_links, 0, 0, 0 },
195 @@ -739,6 +747,18 @@ int parse_arguments(int *argc, const cha
196                         bwlimit_writemax = 512;
197         }
198  
199 +       if (inplace) {
200 +#if HAVE_FTRUNCATE
201 +               if (keep_partial)
202 +                       keep_partial = 0;
203 +#else
204 +               snprintf(err_buf, sizeof err_buf,
205 +                        "inplace is not supported on this %s\n",
206 +                        am_server ? "server" : "client");
207 +               return 0;
208 +#endif
209 +       }
210 +
211         if (files_from) {
212                 char *colon;
213                 if (*argc != 2 && !(am_server && am_sender && *argc == 1)) {
214 @@ -963,6 +983,9 @@ void server_options(char **args,int *arg
215         if (opt_ignore_existing && am_sender)
216                 args[ac++] = "--ignore-existing";
217  
218 +       if (inplace)
219 +               args[ac++] = "--inplace";
220 +
221         if (tmpdir) {
222                 args[ac++] = "--temp-dir";
223                 args[ac++] = tmpdir;
224 --- receiver.c  2 Jul 2004 18:23:57 -0000       1.86
225 +++ receiver.c  3 Jul 2004 00:16:50 -0000
226 @@ -48,6 +48,7 @@ extern int ignore_errors;
227  extern int orig_umask;
228  extern int keep_partial;
229  extern int checksum_seed;
230 +extern int inplace;
231  
232  static void delete_one(char *fn, int is_dir)
233  {
234 @@ -255,16 +256,30 @@ static int receive_data(int f_in,struct 
235                         sum_update(map,len);
236                 }
237  
238 -               if (fd != -1 && write_file(fd, map, len) != (int)len) {
239 -                       rsyserr(FERROR, errno, "write failed on %s",
240 -                               full_fname(fname));
241 -                       exit_cleanup(RERR_FILEIO);
242 +               if (!inplace || offset != offset2) {
243 +                       if (fd != -1 && write_file(fd, map, len) != (int)len) {
244 +                               rsyserr(FERROR, errno, "write failed on %s",
245 +                                       full_fname(fname));
246 +                               exit_cleanup(RERR_FILEIO);
247 +                       }
248 +               } else {
249 +                       flush_write_file(fd);
250 +                       if (do_lseek(fd,(OFF_T)len,SEEK_CUR) != offset+len) {
251 +                               rprintf(FERROR, "lseek failed on %s: %s, %lli, %lli, %i\n",
252 +                                       full_fname(fname), strerror(errno), do_lseek(fd,0,SEEK_CUR), (offset+len), i);
253 +                               exit_cleanup(RERR_FILEIO);
254 +                       }
255                 }
256                 offset += len;
257         }
258  
259         flush_write_file(fd);
260  
261 +#ifdef HAVE_FTRUNCATE
262 +       if (inplace)
263 +               ftruncate(fd, offset);
264 +#endif
265 +
266         if (do_progress)
267                 end_progress(total_size);
268  
269 @@ -414,44 +429,59 @@ int recv_files(int f_in,struct file_list
270                 } else
271                         mapbuf = NULL;
272  
273 -               if (!get_tmpname(fnametmp,fname)) {
274 -                       if (mapbuf)
275 -                               unmap_file(mapbuf);
276 -                       if (fd1 != -1)
277 -                               close(fd1);
278 -                       continue;
279 -               }
280 +               /* We now check to see if we are writing file "inplace" */
281 +               if (inplace)  {
282 +                       fd2 = do_open(fnamecmp, O_WRONLY|O_CREAT, 0);
283 +                       if (fd2 == -1) {
284 +                               rsyserr(FERROR, errno, "open %s failed",
285 +                                       full_fname(fnamecmp));
286 +                               receive_data(f_in,mapbuf,-1,NULL,file->length);
287 +                               if (mapbuf)
288 +                                       unmap_file(mapbuf);
289 +                               if (fd1 != -1)
290 +                                       close(fd1);
291 +                               continue;
292 +                       }
293 +               } else {
294 +                       if (!get_tmpname(fnametmp,fname)) {
295 +                               if (mapbuf)
296 +                                       unmap_file(mapbuf);
297 +                               if (fd1 != -1)
298 +                                       close(fd1);
299 +                               continue;
300 +                       }
301  
302 -               strlcpy(template, fnametmp, sizeof template);
303 +                       strlcpy(template, fnametmp, sizeof template);
304  
305 -               /* we initially set the perms without the
306 -                * setuid/setgid bits to ensure that there is no race
307 -                * condition. They are then correctly updated after
308 -                * the lchown. Thanks to snabb@epipe.fi for pointing
309 -                * this out.  We also set it initially without group
310 -                * access because of a similar race condition. */
311 -               fd2 = do_mkstemp(fnametmp, file->mode & INITACCESSPERMS);
312 -
313 -               /* in most cases parent directories will already exist
314 -                * because their information should have been previously
315 -                * transferred, but that may not be the case with -R */
316 -               if (fd2 == -1 && relative_paths && errno == ENOENT &&
317 -                   create_directory_path(fnametmp, orig_umask) == 0) {
318 -                       strlcpy(fnametmp, template, sizeof fnametmp);
319 +                       /* we initially set the perms without the
320 +                        * setuid/setgid bits to ensure that there is no race
321 +                        * condition. They are then correctly updated after
322 +                        * the lchown. Thanks to snabb@epipe.fi for pointing
323 +                        * this out.  We also set it initially without group
324 +                        * access because of a similar race condition. */
325                         fd2 = do_mkstemp(fnametmp, file->mode & INITACCESSPERMS);
326 -               }
327 -               if (fd2 == -1) {
328 -                       rsyserr(FERROR, errno, "mkstemp %s failed",
329 -                               full_fname(fnametmp));
330 -                       receive_data(f_in,mapbuf,-1,NULL,file->length);
331 -                       if (mapbuf)
332 -                               unmap_file(mapbuf);
333 -                       if (fd1 != -1)
334 -                               close(fd1);
335 -                       continue;
336 -               }
337  
338 -               cleanup_set(fnametmp, fname, file, mapbuf, fd1, fd2);
339 +                       /* in most cases parent directories will already exist
340 +                        * because their information should have been previously
341 +                        * transferred, but that may not be the case with -R */
342 +                       if (fd2 == -1 && relative_paths && errno == ENOENT
343 +                           && create_directory_path(fnametmp, orig_umask) == 0) {
344 +                               strlcpy(fnametmp, template, sizeof fnametmp);
345 +                               fd2 = do_mkstemp(fnametmp, file->mode & INITACCESSPERMS);
346 +                       }
347 +                       if (fd2 == -1) {
348 +                               rsyserr(FERROR, errno, "mkstemp %s failed",
349 +                                       full_fname(fnametmp));
350 +                               receive_data(f_in,mapbuf,-1,NULL,file->length);
351 +                               if (mapbuf)
352 +                                       unmap_file(mapbuf);
353 +                               if (fd1 != -1)
354 +                                       close(fd1);
355 +                               continue;
356 +                       }
357 +
358 +                       cleanup_set(fnametmp, fname, file, mapbuf, fd1, fd2);
359 +               }
360  
361                 if (!am_server && verbose)
362                         rprintf(FINFO, "%s\n", fname);
363 --- rsync.c     2 Jul 2004 18:13:53 -0000       1.142
364 +++ rsync.c     3 Jul 2004 00:16:51 -0000
365 @@ -34,6 +34,7 @@ extern int force_delete;
366  extern int recurse;
367  extern int make_backups;
368  extern char *backup_dir;
369 +extern int inplace;
370  
371  
372  /*
373 @@ -239,6 +240,13 @@ void finish_transfer(char *fname, char *
374         if (make_backups && !make_backup(fname))
375                 return;
376  
377 +       if (inplace) {
378 +               if (verbose > 2)
379 +                       rprintf(FINFO, "finishing %s\n", fname);
380 +               set_perms(fname, file, NULL, 0);
381 +               return;
382 +       }
383 +
384         /* move tmp file over real file */
385         if (verbose > 2)
386                 rprintf(FINFO, "renaming %s to %s\n", fnametmp, fname);
387 --- rsync.yo    5 Jun 2004 16:16:30 -0000       1.171
388 +++ rsync.yo    3 Jul 2004 00:16:52 -0000
389 @@ -289,6 +289,7 @@ verb(
390       --backup-dir            make backups into this directory
391       --suffix=SUFFIX         backup suffix (default ~ w/o --backup-dir)
392   -u, --update                update only (don't overwrite newer files)
393 +     --inplace               update the destination file inplace
394   -K, --keep-dirlinks         treat symlinked dir on receiver as dir
395   -l, --links                 copy symlinks as symlinks
396   -L, --copy-links            copy the referent of all symlinks
397 @@ -484,6 +485,17 @@ dit(bf(-K, --keep-dirlinks)) On the rece
398  pointing to a directory, it will be treated as matching a directory
399  from the sender.
400  
401 +dit(bf(--inplace)) This causes rsync not to create a new copy of the file
402 +and then move it into place.  Instead rsync will overwrite the existing
403 +file, meaning that the rsync algorithm can't extract the full ammount of
404 +network reduction it might otherwise.
405 +
406 +This option is useful for transfer of large files with block based changes
407 +and also on systems that are disk bound not network bound.
408 +
409 +WARNING: If the transfer is interrupted, you will have an inconsistent file
410 +and the transfer should be run again.
411 +
412  dit(bf(-l, --links)) When symlinks are encountered, recreate the
413  symlink on the destination.
414