Added the --max-size option.
[rsync/rsync.git] / generator.c
1 /* -*- c-file-style: "linux" -*-
2
3    rsync -- fast file replication program
4
5    Copyright (C) 1996-2000 by Andrew Tridgell
6    Copyright (C) Paul Mackerras 1996
7    Copyright (C) 2002 by Martin Pool <mbp@samba.org>
8
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 2 of the License, or
12    (at your option) any later version.
13
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 */
23
24 #include "rsync.h"
25
26 extern int verbose;
27 extern int dry_run;
28 extern int relative_paths;
29 extern int keep_dirlinks;
30 extern int preserve_links;
31 extern int am_root;
32 extern int preserve_devices;
33 extern int preserve_hard_links;
34 extern int preserve_perms;
35 extern int preserve_uid;
36 extern int preserve_gid;
37 extern int update_only;
38 extern int opt_ignore_existing;
39 extern int inplace;
40 extern int make_backups;
41 extern int csum_length;
42 extern int ignore_times;
43 extern int size_only;
44 extern int io_timeout;
45 extern int protocol_version;
46 extern int always_checksum;
47 extern char *partial_dir;
48 extern char *compare_dest;
49 extern int link_dest;
50 extern int whole_file;
51 extern int local_server;
52 extern int list_only;
53 extern int read_batch;
54 extern int only_existing;
55 extern int orig_umask;
56 extern int safe_symlinks;
57 extern unsigned int block_size;
58
59 extern struct exclude_list_struct server_exclude_list;
60
61
62 /* choose whether to skip a particular file */
63 static int skip_file(char *fname, struct file_struct *file, STRUCT_STAT *st)
64 {
65         if (st->st_size != file->length)
66                 return 0;
67         if (link_dest) {
68                 if (preserve_perms
69                     && (st->st_mode & CHMOD_BITS) != (file->mode & CHMOD_BITS))
70                         return 0;
71
72                 if (am_root && preserve_uid && st->st_uid != file->uid)
73                         return 0;
74
75                 if (preserve_gid && file->gid != GID_NONE
76                     && st->st_gid != file->gid)
77                         return 0;
78         }
79
80         /* if always checksum is set then we use the checksum instead
81            of the file time to determine whether to sync */
82         if (always_checksum && S_ISREG(st->st_mode)) {
83                 char sum[MD4_SUM_LENGTH];
84                 file_checksum(fname,sum,st->st_size);
85                 return memcmp(sum, file->u.sum, protocol_version < 21 ? 2
86                                                         : MD4_SUM_LENGTH) == 0;
87         }
88
89         if (size_only)
90                 return 1;
91
92         if (ignore_times)
93                 return 0;
94
95         return cmp_modtime(st->st_mtime, file->modtime) == 0;
96 }
97
98
99 /*
100  * NULL sum_struct means we have no checksums
101  */
102 void write_sum_head(int f, struct sum_struct *sum)
103 {
104         static struct sum_struct null_sum;
105
106         if (sum == NULL)
107                 sum = &null_sum;
108
109         write_int(f, sum->count);
110         write_int(f, sum->blength);
111         if (protocol_version >= 27)
112                 write_int(f, sum->s2length);
113         write_int(f, sum->remainder);
114 }
115
116 /*
117  * set (initialize) the size entries in the per-file sum_struct
118  * calculating dynamic block and checksum sizes.
119  *
120  * This is only called from generate_and_send_sums() but is a separate
121  * function to encapsulate the logic.
122  *
123  * The block size is a rounded square root of file length.
124  *
125  * The checksum size is determined according to:
126  *     blocksum_bits = BLOCKSUM_EXP + 2*log2(file_len) - log2(block_len)
127  * provided by Donovan Baarda which gives a probability of rsync
128  * algorithm corrupting data and falling back using the whole md4
129  * checksums.
130  *
131  * This might be made one of several selectable heuristics.
132  */
133
134 static void sum_sizes_sqroot(struct sum_struct *sum, uint64 len)
135 {
136         unsigned int blength;
137         int s2length;
138         uint32 c;
139         uint64 l;
140
141         if (block_size) {
142                 blength = block_size;
143         } else if (len <= BLOCK_SIZE * BLOCK_SIZE) {
144                 blength = BLOCK_SIZE;
145         } else {
146                 l = len;
147                 c = 1;
148                 while (l >>= 2) {
149                         c <<= 1;
150                 }
151                 blength = 0;
152                 do {
153                         blength |= c;
154                         if (len < (uint64)blength * blength)
155                                 blength &= ~c;
156                         c >>= 1;
157                 } while (c >= 8);       /* round to multiple of 8 */
158                 blength = MAX(blength, BLOCK_SIZE);
159         }
160
161         if (protocol_version < 27) {
162                 s2length = csum_length;
163         } else if (csum_length == SUM_LENGTH) {
164                 s2length = SUM_LENGTH;
165         } else {
166                 int b = BLOCKSUM_BIAS;
167                 l = len;
168                 while (l >>= 1) {
169                         b += 2;
170                 }
171                 c = blength;
172                 while (c >>= 1 && b) {
173                         b--;
174                 }
175                 s2length = (b + 1 - 32 + 7) / 8; /* add a bit,
176                                                   * subtract rollsum,
177                                                   * round up
178                                                   *    --optimize in compiler--
179                                                   */
180                 s2length = MAX(s2length, csum_length);
181                 s2length = MIN(s2length, SUM_LENGTH);
182         }
183
184         sum->flength    = len;
185         sum->blength    = blength;
186         sum->s2length   = s2length;
187         sum->count      = (len + (blength - 1)) / blength;
188         sum->remainder  = (len % blength);
189
190         if (sum->count && verbose > 2) {
191                 rprintf(FINFO, "count=%.0f rem=%u blength=%u s2length=%d flength=%.0f\n",
192                         (double)sum->count, sum->remainder, sum->blength,
193                         sum->s2length, (double)sum->flength);
194         }
195 }
196
197
198 /*
199  * Generate and send a stream of signatures/checksums that describe a buffer
200  *
201  * Generate approximately one checksum every block_len bytes.
202  */
203 static void generate_and_send_sums(int fd, OFF_T len, int f_out, int f_copy)
204 {
205         size_t i;
206         struct map_struct *mapbuf;
207         struct sum_struct sum;
208         OFF_T offset = 0;
209
210         sum_sizes_sqroot(&sum, len);
211
212         if (len > 0)
213                 mapbuf = map_file(fd, len, MAX_MAP_SIZE, sum.blength);
214         else
215                 mapbuf = NULL;
216
217         write_sum_head(f_out, &sum);
218
219         for (i = 0; i < sum.count; i++) {
220                 unsigned int n1 = MIN(len, sum.blength);
221                 char *map = map_ptr(mapbuf, offset, n1);
222                 uint32 sum1 = get_checksum1(map, n1);
223                 char sum2[SUM_LENGTH];
224
225                 if (f_copy >= 0)
226                         full_write(f_copy, map, n1);
227
228                 get_checksum2(map, n1, sum2);
229
230                 if (verbose > 3) {
231                         rprintf(FINFO,
232                                 "chunk[%.0f] offset=%.0f len=%u sum1=%08lx\n",
233                                 (double)i, (double)offset, n1,
234                                 (unsigned long)sum1);
235                 }
236                 write_int(f_out, sum1);
237                 write_buf(f_out, sum2, sum.s2length);
238                 len -= n1;
239                 offset += n1;
240         }
241
242         if (mapbuf)
243                 unmap_file(mapbuf);
244 }
245
246
247
248 /*
249  * Acts on file number @p i from @p flist, whose name is @p fname.
250  *
251  * First fixes up permissions, then generates checksums for the file.
252  *
253  * @note This comment was added later by mbp who was trying to work it
254  * out.  It might be wrong.
255  */
256 static void recv_generator(char *fname, struct file_struct *file, int i,
257                            int f_out, int f_out_name)
258 {
259         int fd = -1, f_copy = -1;
260         STRUCT_STAT st, partial_st;
261         struct file_struct *back_file = NULL;
262         int statret, stat_errno;
263         char *fnamecmp, *partialptr, *backupptr = NULL;
264         char fnamecmpbuf[MAXPATHLEN];
265         uchar fnamecmp_type;
266
267         if (list_only)
268                 return;
269
270         if (verbose > 2)
271                 rprintf(FINFO, "recv_generator(%s,%d)\n", safe_fname(fname), i);
272
273         if (server_exclude_list.head
274             && check_exclude(&server_exclude_list, fname,
275                              S_ISDIR(file->mode)) < 0) {
276                 if (verbose) {
277                         rprintf(FINFO, "skipping server-excluded file \"%s\"\n",
278                                 safe_fname(fname));
279                 }
280                 return;
281         }
282
283         if (dry_run > 1) {
284                 statret = -1;
285                 stat_errno = ENOENT;
286         } else {
287                 statret = link_stat(fname, &st,
288                                     keep_dirlinks && S_ISDIR(file->mode));
289                 stat_errno = errno;
290         }
291
292         if (only_existing && statret == -1 && stat_errno == ENOENT) {
293                 /* we only want to update existing files */
294                 if (verbose > 1) {
295                         rprintf(FINFO, "not creating new file \"%s\"\n",
296                                 safe_fname(fname));
297                 }
298                 return;
299         }
300
301         if (statret == 0 && !preserve_perms
302             && S_ISDIR(st.st_mode) == S_ISDIR(file->mode)) {
303                 /* if the file exists already and we aren't perserving
304                  * permissions then act as though the remote end sent
305                  * us the file permissions we already have */
306                 file->mode = (file->mode & ~CHMOD_BITS)
307                            | (st.st_mode & CHMOD_BITS);
308         }
309
310         if (S_ISDIR(file->mode)) {
311                 /* The file to be received is a directory, so we need
312                  * to prepare appropriately.  If there is already a
313                  * file of that name and it is *not* a directory, then
314                  * we need to delete it.  If it doesn't exist, then
315                  * recursively create it. */
316
317                 if (dry_run)
318                         return; /* TODO: causes inaccuracies -- fix */
319                 if (statret == 0 && !S_ISDIR(st.st_mode)) {
320                         if (robust_unlink(fname) != 0) {
321                                 rsyserr(FERROR, errno,
322                                         "recv_generator: unlink %s to make room for directory",
323                                         full_fname(fname));
324                                 return;
325                         }
326                         statret = -1;
327                 }
328                 if (statret != 0 && do_mkdir(fname,file->mode) != 0 && errno != EEXIST) {
329                         if (!(relative_paths && errno == ENOENT
330                             && create_directory_path(fname, orig_umask) == 0
331                             && do_mkdir(fname, file->mode) == 0)) {
332                                 rsyserr(FERROR, errno,
333                                         "recv_generator: mkdir %s failed",
334                                         full_fname(fname));
335                         }
336                 }
337                 /* f_out is set to -1 when doing final directory-permission
338                  * and modification-time repair. */
339                 if (set_perms(fname, file, statret ? NULL : &st, 0)
340                     && verbose && f_out != -1)
341                         rprintf(FINFO, "%s/\n", safe_fname(fname));
342                 return;
343         }
344
345         if (preserve_links && S_ISLNK(file->mode)) {
346 #if SUPPORT_LINKS
347                 char lnk[MAXPATHLEN];
348                 int l;
349
350                 if (safe_symlinks && unsafe_symlink(file->u.link, fname)) {
351                         if (verbose) {
352                                 rprintf(FINFO, "ignoring unsafe symlink %s -> \"%s\"\n",
353                                         full_fname(fname), file->u.link);
354                         }
355                         return;
356                 }
357                 if (statret == 0) {
358                         l = readlink(fname,lnk,MAXPATHLEN-1);
359                         if (l > 0) {
360                                 lnk[l] = 0;
361                                 /* A link already pointing to the
362                                  * right place -- no further action
363                                  * required. */
364                                 if (strcmp(lnk,file->u.link) == 0) {
365                                         set_perms(fname, file, &st,
366                                                   PERMS_REPORT);
367                                         return;
368                                 }
369                         }
370                         /* Not a symlink, so delete whatever's
371                          * already there and put a new symlink
372                          * in place. */
373                         delete_file(fname);
374                 }
375                 if (do_symlink(file->u.link,fname) != 0) {
376                         rsyserr(FERROR, errno, "symlink %s -> \"%s\" failed",
377                                 full_fname(fname), safe_fname(file->u.link));
378                 } else {
379                         set_perms(fname,file,NULL,0);
380                         if (verbose) {
381                                 rprintf(FINFO, "%s -> %s\n", safe_fname(fname),
382                                         safe_fname(file->u.link));
383                         }
384                 }
385 #endif
386                 return;
387         }
388
389         if (am_root && preserve_devices && IS_DEVICE(file->mode)) {
390                 if (statret != 0 ||
391                     st.st_mode != file->mode ||
392                     st.st_rdev != file->u.rdev) {
393                         delete_file(fname);
394                         if (verbose > 2) {
395                                 rprintf(FINFO,"mknod(%s,0%o,0x%x)\n",
396                                         safe_fname(fname),
397                                         (int)file->mode, (int)file->u.rdev);
398                         }
399                         if (do_mknod(fname,file->mode,file->u.rdev) != 0) {
400                                 rsyserr(FERROR, errno, "mknod %s failed",
401                                         full_fname(fname));
402                         } else {
403                                 set_perms(fname,file,NULL,0);
404                                 if (verbose) {
405                                         rprintf(FINFO, "%s\n",
406                                                 safe_fname(fname));
407                                 }
408                         }
409                 } else {
410                         set_perms(fname, file, &st, PERMS_REPORT);
411                 }
412                 return;
413         }
414
415         if (preserve_hard_links && hard_link_check(file, HL_CHECK_MASTER))
416                 return;
417
418         if (!S_ISREG(file->mode)) {
419                 rprintf(FINFO, "skipping non-regular file \"%s\"\n",
420                         safe_fname(fname));
421                 return;
422         }
423
424         fnamecmp = fname;
425         fnamecmp_type = FNAMECMP_FNAME;
426
427         if (statret == -1 && compare_dest != NULL) {
428                 /* try the file at compare_dest instead */
429                 pathjoin(fnamecmpbuf, sizeof fnamecmpbuf, compare_dest, fname);
430                 if (link_stat(fnamecmpbuf, &st, 0) == 0
431                     && S_ISREG(st.st_mode)) {
432 #if HAVE_LINK
433                         if (link_dest && !dry_run) {
434                                 if (do_link(fnamecmpbuf, fname) < 0) {
435                                         if (verbose) {
436                                                 rsyserr(FINFO, errno,
437                                                         "link %s => %s",
438                                                         fnamecmpbuf,
439                                                         safe_fname(fname));
440                                         }
441                                         fnamecmp = fnamecmpbuf;
442                                         fnamecmp_type = FNAMECMP_CMPDEST;
443                                 }
444                         } else
445 #endif
446                         {
447                                 fnamecmp = fnamecmpbuf;
448                                 fnamecmp_type = FNAMECMP_CMPDEST;
449                         }
450                         statret = 0;
451                 }
452         }
453
454         if (statret == 0 && !S_ISREG(st.st_mode)) {
455                 if (delete_file(fname) != 0)
456                         return;
457                 statret = -1;
458                 stat_errno = ENOENT;
459         }
460
461         if (partial_dir && (partialptr = partial_dir_fname(fname))
462             && link_stat(partialptr, &partial_st, 0) == 0
463             && S_ISREG(partial_st.st_mode)) {
464                 if (statret == -1)
465                         goto prepare_to_open;
466         } else
467                 partialptr = NULL;
468
469         if (statret == -1) {
470                 if (preserve_hard_links && hard_link_check(file, HL_SKIP))
471                         return;
472                 if (stat_errno == ENOENT)
473                         goto notify_others;
474                 if (verbose > 1) {
475                         rsyserr(FERROR, stat_errno,
476                                 "recv_generator: failed to stat %s",
477                                 full_fname(fname));
478                 }
479                 return;
480         }
481
482         if (opt_ignore_existing && fnamecmp_type == FNAMECMP_FNAME) {
483                 if (verbose > 1)
484                         rprintf(FINFO, "%s exists\n", safe_fname(fname));
485                 return;
486         }
487
488         if (update_only && fnamecmp_type == FNAMECMP_FNAME
489             && cmp_modtime(st.st_mtime, file->modtime) > 0) {
490                 if (verbose > 1)
491                         rprintf(FINFO, "%s is newer\n", safe_fname(fname));
492                 return;
493         }
494
495         if (skip_file(fnamecmp, file, &st)) {
496                 if (fnamecmp_type == FNAMECMP_FNAME)
497                         set_perms(fname, file, &st, PERMS_REPORT);
498                 return;
499         }
500
501 prepare_to_open:
502         if (dry_run || whole_file > 0) {
503                 statret = -1;
504                 goto notify_others;
505         }
506         if (read_batch)
507                 goto notify_others;
508
509         if (partialptr) {
510                 st = partial_st;
511                 fnamecmp = partialptr;
512                 fnamecmp_type = FNAMECMP_PARTIAL_DIR;
513         }
514
515         /* open the file */
516         fd = do_open(fnamecmp, O_RDONLY, 0);
517
518         if (fd == -1) {
519                 rsyserr(FERROR, errno, "failed to open %s, continuing",
520                         full_fname(fnamecmp));
521             pretend_missing:
522                 /* pretend the file didn't exist */
523                 if (preserve_hard_links && hard_link_check(file, HL_SKIP))
524                         return;
525                 statret = -1;
526                 goto notify_others;
527         }
528
529         if (inplace && make_backups) {
530                 if (!(backupptr = get_backup_name(fname))) {
531                         close(fd);
532                         return;
533                 }
534                 if (!(back_file = make_file(fname, NULL, NO_EXCLUDES))) {
535                         close(fd);
536                         goto pretend_missing;
537                 }
538                 if (robust_unlink(backupptr) && errno != ENOENT) {
539                         rsyserr(FERROR, errno, "unlink %s",
540                                 full_fname(backupptr));
541                         free(back_file);
542                         close(fd);
543                         return;
544                 }
545                 if ((f_copy = do_open(backupptr,
546                     O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 0600)) < 0) {
547                         rsyserr(FERROR, errno, "open %s",
548                                 full_fname(backupptr));
549                         free(back_file);
550                         close(fd);
551                         return;
552                 }
553                 fnamecmp_type = FNAMECMP_BACKUP;
554         }
555
556         if (verbose > 3) {
557                 rprintf(FINFO, "gen mapped %s of size %.0f\n",
558                         safe_fname(fnamecmp), (double)st.st_size);
559         }
560
561         if (verbose > 2)
562                 rprintf(FINFO, "generating and sending sums for %d\n", i);
563
564 notify_others:
565         write_int(f_out, i);
566         if (f_out_name >= 0)
567                 write_byte(f_out_name, fnamecmp_type);
568
569         if (dry_run || read_batch)
570                 return;
571
572         if (statret == 0) {
573                 generate_and_send_sums(fd, st.st_size, f_out, f_copy);
574
575                 if (f_copy >= 0) {
576                         close(f_copy);
577                         set_perms(backupptr, back_file, NULL, 0);
578                         if (verbose > 1) {
579                                 rprintf(FINFO, "backed up %s to %s\n",
580                                         fname, backupptr);
581                         }
582                         free(back_file);
583                 }
584
585                 close(fd);
586         } else
587                 write_sum_head(f_out, NULL);
588 }
589
590
591 void generate_files(int f_out, struct file_list *flist, char *local_name,
592                     int f_out_name)
593 {
594         int i;
595         int phase = 0;
596         char fbuf[MAXPATHLEN];
597
598         if (verbose > 2) {
599                 rprintf(FINFO, "generator starting pid=%ld count=%d\n",
600                         (long)getpid(), flist->count);
601         }
602
603         if (verbose >= 2) {
604                 rprintf(FINFO,
605                         whole_file > 0
606                         ? "delta-transmission disabled for local transfer or --whole-file\n"
607                         : "delta transmission enabled\n");
608         }
609
610         /* we expect to just sit around now, so don't exit on a
611            timeout. If we really get a timeout then the other process should
612            exit */
613         io_timeout = 0;
614
615         for (i = 0; i < flist->count; i++) {
616                 struct file_struct *file = flist->files[i];
617                 struct file_struct copy;
618
619                 if (!file->basename)
620                         continue;
621                 /* we need to ensure that any directories we create have writeable
622                    permissions initially so that we can create the files within
623                    them. This is then fixed after the files are transferred */
624                 if (!am_root && S_ISDIR(file->mode) && !(file->mode & S_IWUSR)) {
625                         copy = *file;
626                         /* XXX: Could this be causing a problem on SCO?  Perhaps their
627                          * handling of permissions is strange? */
628                         copy.mode |= S_IWUSR; /* user write */
629                         file = &copy;
630                 }
631
632                 recv_generator(local_name ? local_name : f_name_to(file, fbuf),
633                                file, i, f_out, f_out_name);
634         }
635
636         phase++;
637         csum_length = SUM_LENGTH;
638         ignore_times = 1;
639
640         if (verbose > 2)
641                 rprintf(FINFO,"generate_files phase=%d\n",phase);
642
643         write_int(f_out, -1);
644
645         /* files can cycle through the system more than once
646          * to catch initial checksum errors */
647         while ((i = get_redo_num()) != -1) {
648                 struct file_struct *file = flist->files[i];
649                 recv_generator(local_name ? local_name : f_name_to(file, fbuf),
650                                file, i, f_out, f_out_name);
651         }
652
653         phase++;
654         if (verbose > 2)
655                 rprintf(FINFO,"generate_files phase=%d\n",phase);
656
657         write_int(f_out, -1);
658
659         if (preserve_hard_links)
660                 do_hard_links();
661
662         /* now we need to fix any directory permissions that were
663          * modified during the transfer */
664         for (i = 0; i < flist->count; i++) {
665                 struct file_struct *file = flist->files[i];
666                 if (!file->basename || !S_ISDIR(file->mode))
667                         continue;
668                 recv_generator(local_name ? local_name : f_name(file),
669                                file, i, -1, -1);
670         }
671
672         if (verbose > 2)
673                 rprintf(FINFO,"generate_files finished\n");
674 }