Decided that we don't need to limit the block size after all now
[rsync/rsync.git] / generator.c
1 /* -*- c-file-style: "linux" -*-
2
3    rsync -- fast file replication program
4
5    Copyright (C) 1996-2000 by Andrew Tridgell
6    Copyright (C) Paul Mackerras 1996
7    Copyright (C) 2002 by Martin Pool <mbp@samba.org>
8
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 2 of the License, or
12    (at your option) any later version.
13
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 */
23
24 #include "rsync.h"
25
26 extern int verbose;
27 extern int dry_run;
28 extern int relative_paths;
29 extern int keep_dirlinks;
30 extern int preserve_links;
31 extern int am_root;
32 extern int preserve_devices;
33 extern int preserve_hard_links;
34 extern int preserve_perms;
35 extern int preserve_uid;
36 extern int preserve_gid;
37 extern int update_only;
38 extern int opt_ignore_existing;
39 extern int csum_length;
40 extern int ignore_times;
41 extern int size_only;
42 extern int io_timeout;
43 extern int protocol_version;
44 extern int always_checksum;
45 extern char *partial_dir;
46 extern char *compare_dest;
47 extern int link_dest;
48 extern int whole_file;
49 extern int local_server;
50 extern int list_only;
51 extern int read_batch;
52 extern int only_existing;
53 extern int orig_umask;
54 extern int safe_symlinks;
55 extern unsigned int block_size;
56
57 extern struct exclude_list_struct server_exclude_list;
58
59
60 /* choose whether to skip a particular file */
61 static int skip_file(char *fname, struct file_struct *file, STRUCT_STAT *st)
62 {
63         if (st->st_size != file->length)
64                 return 0;
65         if (link_dest) {
66                 if (preserve_perms
67                     && (st->st_mode & CHMOD_BITS) != (file->mode & CHMOD_BITS))
68                         return 0;
69
70                 if (am_root && preserve_uid && st->st_uid != file->uid)
71                         return 0;
72
73                 if (preserve_gid && file->gid != GID_NONE
74                     && st->st_gid != file->gid)
75                         return 0;
76         }
77
78         /* if always checksum is set then we use the checksum instead
79            of the file time to determine whether to sync */
80         if (always_checksum && S_ISREG(st->st_mode)) {
81                 char sum[MD4_SUM_LENGTH];
82                 file_checksum(fname,sum,st->st_size);
83                 return memcmp(sum, file->u.sum, protocol_version < 21 ? 2
84                                                         : MD4_SUM_LENGTH) == 0;
85         }
86
87         if (size_only)
88                 return 1;
89
90         if (ignore_times)
91                 return 0;
92
93         return cmp_modtime(st->st_mtime, file->modtime) == 0;
94 }
95
96
97 /*
98  * NULL sum_struct means we have no checksums
99  */
100 void write_sum_head(int f, struct sum_struct *sum)
101 {
102         static struct sum_struct null_sum;
103
104         if (sum == NULL)
105                 sum = &null_sum;
106
107         write_int(f, sum->count);
108         write_int(f, sum->blength);
109         if (protocol_version >= 27)
110                 write_int(f, sum->s2length);
111         write_int(f, sum->remainder);
112 }
113
114 /*
115  * set (initialize) the size entries in the per-file sum_struct
116  * calculating dynamic block and checksum sizes.
117  *
118  * This is only called from generate_and_send_sums() but is a separate
119  * function to encapsulate the logic.
120  *
121  * The block size is a rounded square root of file length.
122  *
123  * The checksum size is determined according to:
124  *     blocksum_bits = BLOCKSUM_EXP + 2*log2(file_len) - log2(block_len)
125  * provided by Donovan Baarda which gives a probability of rsync
126  * algorithm corrupting data and falling back using the whole md4
127  * checksums.
128  *
129  * This might be made one of several selectable heuristics.
130  */
131
132 static void sum_sizes_sqroot(struct sum_struct *sum, uint64 len)
133 {
134         unsigned int blength;
135         int s2length;
136         uint32 c;
137         uint64 l;
138
139         if (block_size) {
140                 blength = block_size;
141         } else if (len <= BLOCK_SIZE * BLOCK_SIZE) {
142                 blength = BLOCK_SIZE;
143         } else {
144                 l = len;
145                 c = 1;
146                 while (l >>= 2) {
147                         c <<= 1;
148                 }
149                 blength = 0;
150                 do {
151                         blength |= c;
152                         if (len < (uint64)blength * blength)
153                                 blength &= ~c;
154                         c >>= 1;
155                 } while (c >= 8);       /* round to multiple of 8 */
156                 blength = MAX(blength, BLOCK_SIZE);
157         }
158
159         if (protocol_version < 27) {
160                 s2length = csum_length;
161         } else if (csum_length == SUM_LENGTH) {
162                 s2length = SUM_LENGTH;
163         } else {
164                 int b = BLOCKSUM_BIAS;
165                 l = len;
166                 while (l >>= 1) {
167                         b += 2;
168                 }
169                 c = blength;
170                 while (c >>= 1 && b) {
171                         b--;
172                 }
173                 s2length = (b + 1 - 32 + 7) / 8; /* add a bit,
174                                                   * subtract rollsum,
175                                                   * round up
176                                                   *    --optimize in compiler--
177                                                   */
178                 s2length = MAX(s2length, csum_length);
179                 s2length = MIN(s2length, SUM_LENGTH);
180         }
181
182         sum->flength    = len;
183         sum->blength    = blength;
184         sum->s2length   = s2length;
185         sum->count      = (len + (blength - 1)) / blength;
186         sum->remainder  = (len % blength);
187
188         if (sum->count && verbose > 2) {
189                 rprintf(FINFO, "count=%.0f rem=%u blength=%u s2length=%d flength=%.0f\n",
190                         (double)sum->count, sum->remainder, sum->blength,
191                         sum->s2length, (double)sum->flength);
192         }
193 }
194
195
196 /*
197  * Generate and send a stream of signatures/checksums that describe a buffer
198  *
199  * Generate approximately one checksum every block_len bytes.
200  */
201 static void generate_and_send_sums(int fd, OFF_T len, int f_out)
202 {
203         size_t i;
204         struct map_struct *mapbuf;
205         struct sum_struct sum;
206         OFF_T offset = 0;
207
208         sum_sizes_sqroot(&sum, len);
209
210         if (len > 0)
211                 mapbuf = map_file(fd, len, MAX_MAP_SIZE, sum.blength);
212         else
213                 mapbuf = NULL;
214
215         write_sum_head(f_out, &sum);
216
217         for (i = 0; i < sum.count; i++) {
218                 unsigned int n1 = MIN(len, sum.blength);
219                 char *map = map_ptr(mapbuf, offset, n1);
220                 uint32 sum1 = get_checksum1(map, n1);
221                 char sum2[SUM_LENGTH];
222
223                 get_checksum2(map, n1, sum2);
224
225                 if (verbose > 3) {
226                         rprintf(FINFO,
227                                 "chunk[%.0f] offset=%.0f len=%u sum1=%08lx\n",
228                                 (double)i, (double)offset, n1,
229                                 (unsigned long)sum1);
230                 }
231                 write_int(f_out, sum1);
232                 write_buf(f_out, sum2, sum.s2length);
233                 len -= n1;
234                 offset += n1;
235         }
236
237         if (mapbuf)
238                 unmap_file(mapbuf);
239 }
240
241
242
243 /*
244  * Acts on file number @p i from @p flist, whose name is @p fname.
245  *
246  * First fixes up permissions, then generates checksums for the file.
247  *
248  * @note This comment was added later by mbp who was trying to work it
249  * out.  It might be wrong.
250  */
251 static void recv_generator(char *fname, struct file_struct *file, int i,
252                            int f_out)
253 {
254         int fd;
255         STRUCT_STAT st;
256         int statret, stat_errno;
257         char *fnamecmp;
258         char fnamecmpbuf[MAXPATHLEN];
259
260         if (list_only)
261                 return;
262
263         if (verbose > 2)
264                 rprintf(FINFO, "recv_generator(%s,%d)\n", safe_fname(fname), i);
265
266         if (server_exclude_list.head
267             && check_exclude(&server_exclude_list, fname,
268                              S_ISDIR(file->mode)) < 0) {
269                 if (verbose) {
270                         rprintf(FINFO, "skipping server-excluded file \"%s\"\n",
271                                 safe_fname(fname));
272                 }
273                 return;
274         }
275
276         statret = link_stat(fname, &st, keep_dirlinks && S_ISDIR(file->mode));
277         stat_errno = errno;
278
279         if (only_existing && statret == -1 && stat_errno == ENOENT) {
280                 /* we only want to update existing files */
281                 if (verbose > 1) {
282                         rprintf(FINFO, "not creating new file \"%s\"\n",
283                                 safe_fname(fname));
284                 }
285                 return;
286         }
287
288         if (statret == 0 && !preserve_perms
289             && S_ISDIR(st.st_mode) == S_ISDIR(file->mode)) {
290                 /* if the file exists already and we aren't perserving
291                  * permissions then act as though the remote end sent
292                  * us the file permissions we already have */
293                 file->mode = (file->mode & ~CHMOD_BITS)
294                            | (st.st_mode & CHMOD_BITS);
295         }
296
297         if (S_ISDIR(file->mode)) {
298                 /* The file to be received is a directory, so we need
299                  * to prepare appropriately.  If there is already a
300                  * file of that name and it is *not* a directory, then
301                  * we need to delete it.  If it doesn't exist, then
302                  * recursively create it. */
303
304                 if (dry_run)
305                         return; /* TODO: causes inaccuracies -- fix */
306                 if (statret == 0 && !S_ISDIR(st.st_mode)) {
307                         if (robust_unlink(fname) != 0) {
308                                 rsyserr(FERROR, errno,
309                                         "recv_generator: unlink %s to make room for directory",
310                                         full_fname(fname));
311                                 return;
312                         }
313                         statret = -1;
314                 }
315                 if (statret != 0 && do_mkdir(fname,file->mode) != 0 && errno != EEXIST) {
316                         if (!(relative_paths && errno == ENOENT
317                             && create_directory_path(fname, orig_umask) == 0
318                             && do_mkdir(fname, file->mode) == 0)) {
319                                 rsyserr(FERROR, errno,
320                                         "recv_generator: mkdir %s failed",
321                                         full_fname(fname));
322                         }
323                 }
324                 /* f_out is set to -1 when doing final directory-permission
325                  * and modification-time repair. */
326                 if (set_perms(fname, file, statret ? NULL : &st, 0)
327                     && verbose && f_out != -1)
328                         rprintf(FINFO, "%s/\n", safe_fname(fname));
329                 return;
330         }
331
332         if (preserve_links && S_ISLNK(file->mode)) {
333 #if SUPPORT_LINKS
334                 char lnk[MAXPATHLEN];
335                 int l;
336
337                 if (safe_symlinks && unsafe_symlink(file->u.link, fname)) {
338                         if (verbose) {
339                                 rprintf(FINFO, "ignoring unsafe symlink %s -> \"%s\"\n",
340                                         full_fname(fname), file->u.link);
341                         }
342                         return;
343                 }
344                 if (statret == 0) {
345                         l = readlink(fname,lnk,MAXPATHLEN-1);
346                         if (l > 0) {
347                                 lnk[l] = 0;
348                                 /* A link already pointing to the
349                                  * right place -- no further action
350                                  * required. */
351                                 if (strcmp(lnk,file->u.link) == 0) {
352                                         set_perms(fname, file, &st,
353                                                   PERMS_REPORT);
354                                         return;
355                                 }
356                         }
357                         /* Not a symlink, so delete whatever's
358                          * already there and put a new symlink
359                          * in place. */
360                         delete_file(fname);
361                 }
362                 if (do_symlink(file->u.link,fname) != 0) {
363                         rsyserr(FERROR, errno, "symlink %s -> \"%s\" failed",
364                                 full_fname(fname), safe_fname(file->u.link));
365                 } else {
366                         set_perms(fname,file,NULL,0);
367                         if (verbose) {
368                                 rprintf(FINFO, "%s -> %s\n", safe_fname(fname),
369                                         safe_fname(file->u.link));
370                         }
371                 }
372 #endif
373                 return;
374         }
375
376 #ifdef HAVE_MKNOD
377         if (am_root && preserve_devices && IS_DEVICE(file->mode)) {
378                 if (statret != 0 ||
379                     st.st_mode != file->mode ||
380                     st.st_rdev != file->u.rdev) {
381                         delete_file(fname);
382                         if (verbose > 2) {
383                                 rprintf(FINFO,"mknod(%s,0%o,0x%x)\n",
384                                         safe_fname(fname),
385                                         (int)file->mode, (int)file->u.rdev);
386                         }
387                         if (do_mknod(fname,file->mode,file->u.rdev) != 0) {
388                                 rsyserr(FERROR, errno, "mknod %s failed",
389                                         full_fname(fname));
390                         } else {
391                                 set_perms(fname,file,NULL,0);
392                                 if (verbose) {
393                                         rprintf(FINFO, "%s\n",
394                                                 safe_fname(fname));
395                                 }
396                         }
397                 } else {
398                         set_perms(fname, file, &st, PERMS_REPORT);
399                 }
400                 return;
401         }
402 #endif
403
404         if (preserve_hard_links && hard_link_check(file, HL_CHECK_MASTER))
405                 return;
406
407         if (!S_ISREG(file->mode)) {
408                 rprintf(FINFO, "skipping non-regular file \"%s\"\n",
409                         safe_fname(fname));
410                 return;
411         }
412
413         fnamecmp = fname;
414
415         if (statret == -1 && compare_dest != NULL) {
416                 /* try the file at compare_dest instead */
417                 pathjoin(fnamecmpbuf, sizeof fnamecmpbuf, compare_dest, fname);
418                 if (link_stat(fnamecmpbuf, &st, 0) == 0
419                     && S_ISREG(st.st_mode)) {
420 #if HAVE_LINK
421                         if (link_dest && !dry_run) {
422                                 if (do_link(fnamecmpbuf, fname) < 0) {
423                                         if (verbose) {
424                                                 rsyserr(FINFO, errno,
425                                                         "link %s => %s",
426                                                         fnamecmpbuf,
427                                                         safe_fname(fname));
428                                         }
429                                         fnamecmp = fnamecmpbuf;
430                                 }
431                         } else
432 #endif
433                                 fnamecmp = fnamecmpbuf;
434                         statret = 0;
435                 }
436         }
437
438         if (statret == 0 && !S_ISREG(st.st_mode)) {
439                 if (delete_file(fname) != 0)
440                         return;
441                 statret = -1;
442                 stat_errno = ENOENT;
443         }
444
445         if (statret == -1) {
446                 if (preserve_hard_links && hard_link_check(file, HL_SKIP))
447                         return;
448                 if (stat_errno == ENOENT) {
449                         write_int(f_out,i);
450                         if (!dry_run && !read_batch)
451                                 write_sum_head(f_out, NULL);
452                 } else if (verbose > 1) {
453                         rsyserr(FERROR, stat_errno,
454                                 "recv_generator: failed to stat %s",
455                                 full_fname(fname));
456                 }
457                 return;
458         }
459
460         if (opt_ignore_existing && fnamecmp == fname) {
461                 if (verbose > 1)
462                         rprintf(FINFO, "%s exists\n", safe_fname(fname));
463                 return;
464         }
465
466         if (update_only && fnamecmp == fname
467             && cmp_modtime(st.st_mtime, file->modtime) > 0) {
468                 if (verbose > 1)
469                         rprintf(FINFO, "%s is newer\n", safe_fname(fname));
470                 return;
471         }
472
473         if (skip_file(fnamecmp, file, &st)) {
474                 if (fnamecmp == fname)
475                         set_perms(fname, file, &st, PERMS_REPORT);
476                 return;
477         }
478
479         if (dry_run || read_batch) {
480                 write_int(f_out,i);
481                 return;
482         }
483
484         if (whole_file > 0) {
485                 write_int(f_out,i);
486                 write_sum_head(f_out, NULL);
487                 return;
488         }
489
490         if (partial_dir) {
491                 STRUCT_STAT st2;
492                 char *partialptr = partial_dir_fname(fname);
493                 if (partialptr && link_stat(partialptr, &st2, 0) == 0
494                     && S_ISREG(st2.st_mode)) {
495                         st = st2;
496                         fnamecmp = partialptr;
497                 }
498         }
499
500         /* open the file */
501         fd = do_open(fnamecmp, O_RDONLY, 0);
502
503         if (fd == -1) {
504                 rsyserr(FERROR, errno, "failed to open %s, continuing",
505                         full_fname(fnamecmp));
506                 /* pretend the file didn't exist */
507                 if (preserve_hard_links && hard_link_check(file, HL_SKIP))
508                         return;
509                 write_int(f_out,i);
510                 write_sum_head(f_out, NULL);
511                 return;
512         }
513
514         if (verbose > 3) {
515                 rprintf(FINFO, "gen mapped %s of size %.0f\n",
516                         safe_fname(fnamecmp), (double)st.st_size);
517         }
518
519         if (verbose > 2)
520                 rprintf(FINFO, "generating and sending sums for %d\n", i);
521
522         write_int(f_out,i);
523         generate_and_send_sums(fd, st.st_size, f_out);
524
525         close(fd);
526 }
527
528
529 void generate_files(int f_out, struct file_list *flist, char *local_name)
530 {
531         int i;
532         int phase = 0;
533         char fbuf[MAXPATHLEN];
534
535         if (verbose > 2) {
536                 rprintf(FINFO, "generator starting pid=%ld count=%d\n",
537                         (long)getpid(), flist->count);
538         }
539
540         if (verbose >= 2) {
541                 rprintf(FINFO,
542                         whole_file > 0
543                         ? "delta-transmission disabled for local transfer or --whole-file\n"
544                         : "delta transmission enabled\n");
545         }
546
547         /* we expect to just sit around now, so don't exit on a
548            timeout. If we really get a timeout then the other process should
549            exit */
550         io_timeout = 0;
551
552         for (i = 0; i < flist->count; i++) {
553                 struct file_struct *file = flist->files[i];
554                 struct file_struct copy;
555
556                 if (!file->basename)
557                         continue;
558                 /* we need to ensure that any directories we create have writeable
559                    permissions initially so that we can create the files within
560                    them. This is then fixed after the files are transferred */
561                 if (!am_root && S_ISDIR(file->mode) && !(file->mode & S_IWUSR)) {
562                         copy = *file;
563                         /* XXX: Could this be causing a problem on SCO?  Perhaps their
564                          * handling of permissions is strange? */
565                         copy.mode |= S_IWUSR; /* user write */
566                         file = &copy;
567                 }
568
569                 recv_generator(local_name ? local_name : f_name_to(file, fbuf),
570                                file, i, f_out);
571         }
572
573         phase++;
574         csum_length = SUM_LENGTH;
575         ignore_times = 1;
576
577         if (verbose > 2)
578                 rprintf(FINFO,"generate_files phase=%d\n",phase);
579
580         write_int(f_out, -1);
581
582         /* files can cycle through the system more than once
583          * to catch initial checksum errors */
584         while ((i = get_redo_num()) != -1) {
585                 struct file_struct *file = flist->files[i];
586                 recv_generator(local_name ? local_name : f_name_to(file, fbuf),
587                                file, i, f_out);
588         }
589
590         phase++;
591         if (verbose > 2)
592                 rprintf(FINFO,"generate_files phase=%d\n",phase);
593
594         write_int(f_out, -1);
595
596         if (preserve_hard_links)
597                 do_hard_links();
598
599         /* now we need to fix any directory permissions that were
600          * modified during the transfer */
601         for (i = 0; i < flist->count; i++) {
602                 struct file_struct *file = flist->files[i];
603                 if (!file->basename || !S_ISDIR(file->mode))
604                         continue;
605                 recv_generator(local_name ? local_name : f_name(file),
606                                file, i, -1);
607         }
608
609         if (verbose > 2)
610                 rprintf(FINFO,"generate_files finished\n");
611 }