This adds a sender optimization feature that allows a cache of checksums to be used when the client specifies the --checksum option, and creates and/or updates the .rsyncsums files when --checksum-updating is specified. To use this patch, run these commands for a successful build: patch -p1 pool_boundary = ptr; } +/* The len count is the length of the basename + 1 for the null. */ +static void add_checksum(const char *dirname, const char *basename, int len, + OFF_T file_length, time_t mtime, time_t ctime, + const char *sum, const char *alt_sum, int flags) +{ + struct file_struct *file; + int alloc_len, extra_len; + char *bp; + + if (len == 10+1 && *basename == '.' && strcmp(basename, ".rsyncsums") == 0) + return; + + if (len < 0) + len = strlen(basename) + 1; + + extra_len = (file_extra_cnt + (file_length > 0xFFFFFFFFu) + + SUM_EXTRA_CNT + TIME_EXTRA_CNT) + * EXTRA_LEN; +#if EXTRA_ROUNDING > 0 + if (extra_len & (EXTRA_ROUNDING * EXTRA_LEN)) + extra_len = (extra_len | (EXTRA_ROUNDING * EXTRA_LEN)) + EXTRA_LEN; +#endif + alloc_len = FILE_STRUCT_LEN + extra_len + len + checksum_len*2 + 1; + bp = pool_alloc(checksum_flist->file_pool, alloc_len, "add_checksum"); + + memset(bp, 0, extra_len + FILE_STRUCT_LEN); + bp += extra_len; + file = (struct file_struct *)bp; + bp += FILE_STRUCT_LEN; + + memcpy(bp, basename, len); + if (alt_sum) + strlcpy(bp+len, alt_sum, checksum_len*2 + 1); + else { + memset(bp+len, '=', checksum_len*2); + bp[len+checksum_len*2] = '\0'; + } + + file->flags = flags; + file->mode = S_IFREG; + file->modtime = mtime; + file->len32 = (uint32)file_length; + if (file_length > 0xFFFFFFFFu) { + file->flags |= FLAG_LENGTH64; + OPT_EXTRA(file, 0)->unum = (uint32)(file_length >> 32); + } + file->dirname = dirname; + bp = F_SUM(file); + memcpy(bp, sum, checksum_len); +#if SIZEOF_TIME_T == 4 + OPT_EXTRA(file, LEN64_BUMP(file) + SUM_EXTRA_CNT)->num = ctime; +#else + memcpy(bp - SIZEOF_TIME_T, &ctime, SIZEOF_TIME_T); +#endif + + flist_expand(checksum_flist, 1); + checksum_flist->files[checksum_flist->count++] = file; + + checksum_flist->sorted = checksum_flist->files; +} + +static void write_checksums(const char *next_dirname, int whole_dir) +{ + static const char *dirname_save; + char fbuf[MAXPATHLEN]; + const char *dirname; + int count, new_entries, counts_match, no_skipped; + time_t latest_time = 0; + FILE *out_fp; + int i; + + dirname = dirname_save; + dirname_save = next_dirname; + + if (!dirname) + return; + + count = checksum_flist->count; + new_entries = checksum_updates != 0; + counts_match = count == checksum_matches; + no_skipped = whole_dir && regular_skipped == 0; + + clean_flist(checksum_flist, 0); + + checksum_flist->count = 0; + checksum_matches = 0; + checksum_updates = 0; + regular_skipped = 0; + + if (dry_run) + return; + + if (*dirname) { + if (pathjoin(fbuf, sizeof fbuf, dirname, ".rsyncsums") >= sizeof fbuf) + return; + } else + strlcpy(fbuf, ".rsyncsums", sizeof fbuf); + + if (checksum_flist->high - checksum_flist->low < 0 && no_skipped) { + unlink(fbuf); + return; + } + + if (!new_entries && (counts_match || !whole_dir)) + return; + + if (!(out_fp = fopen(fbuf, "w"))) + return; + + new_entries = 0; + for (i = checksum_flist->low; i <= checksum_flist->high; i++) { + struct file_struct *file = checksum_flist->sorted[i]; + const char *cp = F_SUM(file); + const char *end = cp + checksum_len; + const char *alt_sum = file->basename + strlen(file->basename) + 1; + time_t ctime; + if (whole_dir && !(file->flags & FLAG_SUM_KEEP)) + continue; +#if SIZEOF_TIME_T == 4 + ctime = OPT_EXTRA(file, LEN64_BUMP(file) + SUM_EXTRA_CNT)->num; +#else + memcpy(&ctime, cp - SIZEOF_TIME_T, SIZEOF_TIME_T); +#endif + if (protocol_version >= 30) + fprintf(out_fp, "%s ", alt_sum); + if (file->flags & FLAG_SUM_MISSING) { + new_entries++; + do { + fprintf(out_fp, "=="); + } while (++cp != end); + } else { + do { + fprintf(out_fp, "%02x", CVAL(cp, 0)); + } while (++cp != end); + } + if (protocol_version < 30) + fprintf(out_fp, " %s", alt_sum); + if (*alt_sum == '=') + new_entries++; + fprintf(out_fp, " %10.0f %10.0f %10.0f %s\n", + (double)F_LENGTH(file), (double)file->modtime, + (double)ctime, file->basename); + if (file->modtime > ctime) + ctime = file->modtime; + if (ctime > latest_time) + latest_time = ctime; + } + + fclose(out_fp); + + if (whole_dir && new_entries == 0) + set_modtime(fbuf, latest_time, latest_time); + else + set_modtime(fbuf, latest_time-1, latest_time-1); +} + +/* The direname value must remain unchanged during the lifespan of the + * created checksum_flist object because we use it directly. */ +static void read_checksums(const char *dirname) +{ + char line[MAXPATHLEN+1024], fbuf[MAXPATHLEN], sum[MAX_DIGEST_LEN]; + const char *alt_sum = NULL; + OFF_T file_length; + time_t mtime, ctime; + int len, dlen, i, flags; + char *cp; + FILE *fp; + + write_checksums(dirname, 0); + + if (checksum_flist) { + /* Reset the pool memory and empty the file-list array. */ + pool_free_old(checksum_flist->file_pool, + pool_boundary(checksum_flist->file_pool, 0)); + checksum_flist->count = 0; + } else + checksum_flist = flist_new(FLIST_TEMP, "read_checksums"); + + checksum_flist->low = 0; + checksum_flist->high = -1; + checksum_matches = 0; + checksum_updates = 0; + regular_skipped = 0; + + if (!dirname) + return; + + dlen = strlcpy(fbuf, dirname, sizeof fbuf); + if (dlen >= (int)sizeof fbuf) + return; + if (dlen) + fbuf[dlen++] = '/'; + else + dirname = NULL; + strlcpy(fbuf+dlen, ".rsyncsums", sizeof fbuf - dlen); + if (!(fp = fopen(fbuf, "r"))) + return; + + while (fgets(line, sizeof line, fp)) { + cp = line; + if (protocol_version >= 30) { + alt_sum = cp; + if (*cp == '=') + while (*++cp == '=') {} + else + while (isXDigit(cp)) cp++; + if (cp - alt_sum != MD4_DIGEST_LEN*2 || *cp != ' ') + break; + while (*++cp == ' ') {} + } + + if (*cp == '=') { + for (i = 0; i < checksum_len*2; i++, cp++) { + if (*cp != '=') { + cp = ""; + break; + } + } + memset(sum, 0, checksum_len); + flags = FLAG_SUM_MISSING; + } else { + for (i = 0; i < checksum_len*2; i++, cp++) { + int x; + if (isXDigit(cp)) { + if (isDigit(cp)) + x = *cp - '0'; + else + x = (*cp & 0xF) + 9; + } else { + cp = ""; + break; + } + if (i & 1) + sum[i/2] |= x; + else + sum[i/2] = x << 4; + } + flags = 0; + } + if (*cp != ' ') + break; + while (*++cp == ' ') {} + + if (protocol_version < 30) { + alt_sum = cp; + if (*cp == '=') + while (*++cp == '=') {} + else + while (isXDigit(cp)) cp++; + if (cp - alt_sum != MD5_DIGEST_LEN*2 || *cp != ' ') + break; + while (*++cp == ' ') {} + } + + file_length = 0; + while (isDigit(cp)) + file_length = file_length * 10 + *cp++ - '0'; + if (*cp != ' ') + break; + while (*++cp == ' ') {} + + mtime = 0; + while (isDigit(cp)) + mtime = mtime * 10 + *cp++ - '0'; + if (*cp != ' ') + break; + while (*++cp == ' ') {} + + ctime = 0; + while (isDigit(cp)) + ctime = ctime * 10 + *cp++ - '0'; + if (*cp != ' ') + break; + while (*++cp == ' ') {} + + len = strlen(cp); + while (len && (cp[len-1] == '\n' || cp[len-1] == '\r')) + len--; + if (!len) + break; + cp[len++] = '\0'; /* len now counts the null */ + if (strchr(cp, '/') || len > MAXPATHLEN) + break; + + strlcpy(fbuf+dlen, cp, sizeof fbuf - dlen); + if (is_excluded(fbuf, 0, ALL_FILTERS)) { + flags |= FLAG_SUM_KEEP; + checksum_matches++; + } + + add_checksum(dirname, cp, len, file_length, mtime, ctime, + sum, alt_sum, flags); + } + fclose(fp); + + clean_flist(checksum_flist, 0); +} + int push_pathname(const char *dir, int len) { if (dir == pathname) @@ -980,34 +1287,24 @@ static struct file_struct *recv_file_ent return file; } -/** - * Create a file_struct for a named file by reading its stat() - * information and performing extensive checks against global - * options. - * - * @return the new file, or NULL if there was an error or this file - * should be excluded. +/* Create a file_struct for a named file by reading its stat() information + * and performing extensive checks against global options. * - * @todo There is a small optimization opportunity here to avoid - * stat()ing the file in some circumstances, which has a certain cost. - * We are called immediately after doing readdir(), and so we may - * already know the d_type of the file. We could for example avoid - * statting directories if we're not recursing, but this is not a very - * important case. Some systems may not have d_type. - **/ + * Returns a pointer to the new file struct, or NULL if there was an error + * or this file should be excluded. */ struct file_struct *make_file(const char *fname, struct file_list *flist, STRUCT_STAT *stp, int flags, int filter_level) { static char *lastdir; - static int lastdir_len = -1; + static int lastdir_len = -2; struct file_struct *file; - STRUCT_STAT st; char thisname[MAXPATHLEN]; char linkname[MAXPATHLEN]; int alloc_len, basename_len, linkname_len; int extra_len = file_extra_cnt * EXTRA_LEN; const char *basename; alloc_pool_t *pool; + STRUCT_STAT st; char *bp; if (strlcpy(thisname, fname, sizeof thisname) >= sizeof thisname) { @@ -1086,6 +1383,8 @@ struct file_struct *make_file(const char if (is_excluded(thisname, S_ISDIR(st.st_mode) != 0, filter_level)) { if (ignore_perishable) non_perishable_cnt++; + if (S_ISREG(st.st_mode)) + regular_skipped++; return NULL; } @@ -1124,9 +1423,16 @@ struct file_struct *make_file(const char memcpy(lastdir, thisname, len); lastdir[len] = '\0'; lastdir_len = len; + if (always_checksum && am_sender && flist) + read_checksums(lastdir); } - } else + } else { basename = thisname; + if (always_checksum && am_sender && flist && lastdir_len == -2) { + lastdir_len = -1; + read_checksums(""); + } + } basename_len = strlen(basename) + 1; /* count the '\0' */ #ifdef SUPPORT_LINKS @@ -1202,11 +1508,44 @@ struct file_struct *make_file(const char } #endif - if (always_checksum && am_sender && S_ISREG(st.st_mode)) - file_checksum(thisname, tmp_sum, st.st_size); - F_PATHNAME(file) = pathname; + if (always_checksum && am_sender && S_ISREG(st.st_mode)) { + int j; + if (flist && (j = flist_find(checksum_flist, file)) >= 0) { + struct file_struct *fp = checksum_flist->sorted[j]; + time_t ctime; +#if SIZEOF_TIME_T == 4 + ctime = OPT_EXTRA(fp, LEN64_BUMP(fp) + SUM_EXTRA_CNT)->num; +#else + memcpy(&ctime, F_SUM(fp) - SIZEOF_TIME_T, SIZEOF_TIME_T); +#endif + if (F_LENGTH(fp) == st.st_size + && fp->modtime == st.st_mtime && ctime == st.st_ctime) { + if (fp->flags & FLAG_SUM_MISSING) { + fp->flags &= ~FLAG_SUM_MISSING; + file_checksum(thisname, tmp_sum, st.st_size); + memcpy(F_SUM(fp), tmp_sum, MAX_DIGEST_LEN); + } else { + checksum_matches++; + memcpy(tmp_sum, F_SUM(fp), MAX_DIGEST_LEN); + } + fp->flags |= FLAG_SUM_KEEP; + } else { + clear_file(fp); + goto compute_new_checksum; + } + } else { + compute_new_checksum: + file_checksum(thisname, tmp_sum, st.st_size); + if (checksum_updating && flist) { + add_checksum(file->dirname, basename, basename_len, + st.st_size, st.st_mtime, st.st_ctime, + tmp_sum, NULL, FLAG_SUM_KEEP); + } + } + } + /* This code is only used by the receiver when it is building * a list of files for a delete pass. */ if (keep_dirlinks && linkname_len && flist) { @@ -1499,6 +1838,9 @@ static void send_directory(int f, struct closedir(d); + if (checksum_updating && always_checksum && am_sender && f >= 0) + write_checksums(NULL, 1); + if (f >= 0 && recurse && !divert_dirs) { int i, end = flist->used - 1; /* send_if_directory() bumps flist->used, so use "end". */ @@ -1942,7 +2284,11 @@ struct file_list *send_file_list(int f, * file-list to check if this is a 1-file xfer. */ send_extra_file_list(f, 1); } - } + } else + flist_eof = 1; + + if (checksum_updating && always_checksum && flist_eof) + read_checksums(NULL); /* writes any last updates */ return flist; } @@ -2235,7 +2581,7 @@ void flist_free(struct file_list *flist) if (!flist->prev || !flist_cnt) pool_destroy(flist->file_pool); - else + else if (flist->pool_boundary) pool_free_old(flist->file_pool, flist->pool_boundary); if (flist->sorted && flist->sorted != flist->files) --- old/loadparm.c +++ new/loadparm.c @@ -152,6 +152,7 @@ typedef struct int syslog_facility; int timeout; + BOOL checksum_updating; BOOL fake_super; BOOL ignore_errors; BOOL ignore_nonreadable; @@ -200,6 +201,7 @@ static service sDefault = /* syslog_facility; */ LOG_DAEMON, /* timeout; */ 0, + /* checksum_updating; */ False, /* fake_super; */ False, /* ignore_errors; */ False, /* ignore_nonreadable; */ False, @@ -316,6 +318,7 @@ static struct parm_struct parm_table[] = {"lock file", P_STRING, P_LOCAL, &sDefault.lock_file, NULL,0}, {"log file", P_STRING, P_LOCAL, &sDefault.log_file, NULL,0}, {"log format", P_STRING, P_LOCAL, &sDefault.log_format, NULL,0}, + {"checksum updating", P_BOOL, P_LOCAL, &sDefault.checksum_updating, NULL,0}, {"max connections", P_INTEGER,P_LOCAL, &sDefault.max_connections, NULL,0}, {"max verbosity", P_INTEGER,P_LOCAL, &sDefault.max_verbosity, NULL,0}, {"name", P_STRING, P_LOCAL, &sDefault.name, NULL,0}, @@ -421,6 +424,7 @@ FN_LOCAL_BOOL(lp_fake_super, fake_super) FN_LOCAL_BOOL(lp_ignore_errors, ignore_errors) FN_LOCAL_BOOL(lp_ignore_nonreadable, ignore_nonreadable) FN_LOCAL_BOOL(lp_list, list) +FN_LOCAL_BOOL(lp_checksum_updating, checksum_updating) FN_LOCAL_BOOL(lp_read_only, read_only) FN_LOCAL_BOOL(lp_strict_modes, strict_modes) FN_LOCAL_BOOL(lp_transfer_logging, transfer_logging) --- old/options.c +++ new/options.c @@ -109,6 +109,7 @@ size_t bwlimit_writemax = 0; int ignore_existing = 0; int ignore_non_existing = 0; int need_messages_from_generator = 0; +int checksum_updating = 0; int max_delete = INT_MIN; OFF_T max_size = 0; OFF_T min_size = 0; @@ -303,6 +304,7 @@ void usage(enum logcode F) rprintf(F," -q, --quiet suppress non-error messages\n"); rprintf(F," --no-motd suppress daemon-mode MOTD (see manpage caveat)\n"); rprintf(F," -c, --checksum skip based on checksum, not mod-time & size\n"); + rprintf(F," --checksum-updating sender updates .rsyncsums files\n"); rprintf(F," -a, --archive archive mode; equals -rlptgoD (no -H,-A,-X)\n"); rprintf(F," --no-OPTION turn off an implied OPTION (e.g. --no-D)\n"); rprintf(F," -r, --recursive recurse into directories\n"); @@ -544,6 +546,7 @@ static struct poptOption long_options[] {"checksum", 'c', POPT_ARG_VAL, &always_checksum, 1, 0, 0 }, {"no-checksum", 0, POPT_ARG_VAL, &always_checksum, 0, 0, 0 }, {"no-c", 0, POPT_ARG_VAL, &always_checksum, 0, 0, 0 }, + {"checksum-updating",0, POPT_ARG_NONE, &checksum_updating, 0, 0, 0 }, {"block-size", 'B', POPT_ARG_LONG, &block_size, 0, 0, 0 }, {"compare-dest", 0, POPT_ARG_STRING, 0, OPT_COMPARE_DEST, 0, 0 }, {"copy-dest", 0, POPT_ARG_STRING, 0, OPT_COPY_DEST, 0, 0 }, @@ -1913,7 +1916,9 @@ void server_options(char **args,int *arg args[ac++] = basis_dir[i]; } } - } + } else if (checksum_updating) + args[ac++] = "--checksum-updating"; + if (append_mode) args[ac++] = "--append"; --- old/rsync.h +++ new/rsync.h @@ -592,6 +592,7 @@ extern int xattrs_ndx; #define DEV_EXTRA_CNT 2 #define DIRNODE_EXTRA_CNT 3 #define SUM_EXTRA_CNT ((MAX_DIGEST_LEN + EXTRA_LEN - 1) / EXTRA_LEN) +#define TIME_EXTRA_CNT ((SIZEOF_TIME_T + EXTRA_LEN - 1) / EXTRA_LEN) #define REQ_EXTRA(f,ndx) ((union file_extras*)(f) - (ndx)) #define OPT_EXTRA(f,bump) ((union file_extras*)(f) - file_extra_cnt - 1 - (bump)) @@ -1077,6 +1078,12 @@ isDigit(const char *ptr) } static inline int +isXDigit(const char *ptr) +{ + return isxdigit(*(unsigned char *)ptr); +} + +static inline int isPrint(const char *ptr) { return isprint(*(unsigned char *)ptr); --- old/rsync.yo +++ new/rsync.yo @@ -322,6 +322,7 @@ to the detailed description below for a -q, --quiet suppress non-error messages --no-motd suppress daemon-mode MOTD (see caveat) -c, --checksum skip based on checksum, not mod-time & size + --checksum-updating sender updates .rsyncsums files -a, --archive archive mode; equals -rlptgoD (no -H,-A,-X) --no-OPTION turn off an implied OPTION (e.g. --no-D) -r, --recursive recurse into directories @@ -518,9 +519,9 @@ uses a "quick check" that (by default) c of last modification match between the sender and receiver. This option changes this to compare a 128-bit MD4 checksum for each file that has a matching size. Generating the checksums means that both sides will expend -a lot of disk I/O reading all the data in the files in the transfer (and -this is prior to any reading that will be done to transfer changed files), -so this can slow things down significantly. +a lot of disk I/O reading the data in all the files in the transfer, so +this can slow things down significantly (and this is prior to any reading +that will be done to transfer the files that have changed). The sending side generates its checksums while it is doing the file-system scan that builds the list of the available files. The receiver generates @@ -528,12 +529,42 @@ its checksums when it is scanning for ch file that has the same size as the corresponding sender's file: files with either a changed size or a changed checksum are selected for transfer. +Starting with version 3.0.0, the sending side will look for a checksum +summary file and use a pre-generated checksum that it reads out of the file +(as long as it matches the file's size and modified time). This allows a +server to support the --checksum option to clients without having to +recompute the checksums for each client. See the bf(--checksum-updating) +option for a way to have rsync create/update these checksum files. + Note that rsync always verifies that each em(transferred) file was correctly reconstructed on the receiving side by checking a whole-file checksum that is generated when as the file is transferred, but that automatic after-the-transfer verification has nothing to do with this option's before-the-transfer "Does this file need to be updated?" check. +dit(bf(--checksum-updating)) This option tells the sending side to create +and/or update per-directory checksum files that are used by the +bf(--checksum) option. The file that is updated is named .rsyncsums. If +pre-transfer checksums are not being computed, this option has no effect. + +The checksum files stores the computed checksum, last-known size, +modification time, and name for each file in the current directory. If a +later transfer finds that a file matches its prior size and modification +time, the checksum is assumed to still be correct. Otherwise it is +recomputed and udpated in the file. + +To avoid transferring the system's checksum files, you can use an exclude +(e.g. bf(--exclude=.rsyncsums)). To make this easier to type, you can use +a popt alias. For instance, adding the following line in your ~/.popt file +defines a bf(-cc) option that enables checksum updating and excludes the +checksum files: + +verb( rsync alias --cc --checksum-updating --exclude=.rsyncsums) + +An rsync daemon does not allow the client to control this setting, so see +the "checksum updating" daemon config option for information on how to make +a daemon maintain these checksum files. + dit(bf(-a, --archive)) This is equivalent to bf(-rlptgoD). It is a quick way of saying you want recursion and want to preserve almost everything (with -H being a notable omission). --- old/rsyncd.conf.yo +++ new/rsyncd.conf.yo @@ -198,6 +198,20 @@ locking on this file to ensure that the exceeded for the modules sharing the lock file. The default is tt(/var/run/rsyncd.lock). +dit(bf(checksum updating)) This option tells rsync to update/create the +checksum information in the per-directory checksum files when users copy +files using the bf(--checksum) option. Any file that has changed since it +was last checksummed (or is not mentioned) has its data updated in the +.rsyncsums file. + +Note that this updating will occur even if the module is listed as being +read-only. If you want to hide these files (and you will almost always +want to do), add ".rsyncsums" to the module's exclude setting. + +Note also that the client's command-line option, bf(--checksum-updating), +has no effect on a daemon. A daemon will only update/create checksum files +if this config option is true. + dit(bf(read only)) The "read only" option determines whether clients will be able to upload files or not. If "read only" is true then any attempted uploads will fail. If "read only" is false then uploads will --- old/support/rsyncsums +++ new/support/rsyncsums @@ -0,0 +1,184 @@ +#!/usr/bin/perl -w +use strict; + +use Getopt::Long; +use Cwd qw(abs_path cwd); +use Digest::MD4; +use Digest::MD5; + +our $SUMS_FILE = '.rsyncsums'; + +our($recurse_opt, $force_reading, $help_opt); +our $verbosity = 0; + +&Getopt::Long::Configure('bundling'); +&usage if !&GetOptions( + 'recurse|r' => \$recurse_opt, + 'force|f' => \$force_reading, + 'verbose|v+' => \$verbosity, + 'help|h' => \$help_opt, +) || $help_opt; + +my $start_dir = cwd(); + +my @dirs = @ARGV; +@dirs = '.' unless @dirs; +foreach (@dirs) { + $_ = abs_path($_); +} + +$| = 1; + +my $md4 = Digest::MD4->new; +my $md5 = Digest::MD5->new; + +while (@dirs) { + my $dir = shift @dirs; + + if (!chdir($dir)) { + warn "Unable to chdir to $dir: $!\n"; + next; + } + if (!opendir(DP, '.')) { + warn "Unable to opendir $dir: $!\n"; + next; + } + + if ($verbosity) { + my $reldir = $dir; + $reldir =~ s#^$start_dir(/|$)# $1 ? '' : '.' #eo; + print "$reldir ... "; + } + + my $sums_mtime = (stat($SUMS_FILE))[9]; + my %cache; + my @subdirs; + my $cnt = 0; + my $latest_time = 0; + while (defined(my $fn = readdir(DP))) { + next if $fn =~ /^\.\.?$/ || $fn =~ /^\Q$SUMS_FILE\E$/o || -l $fn; + if (-d _) { + push(@subdirs, "$dir/$fn"); + next; + } + next unless -f _; + + my($size,$mtime,$ctime) = (stat(_))[7,9,10]; + + $cache{$fn} = [ $size, $mtime, $ctime ]; + $cnt++; + + $latest_time = $mtime if $mtime > $latest_time; + $latest_time = $ctime if $ctime > $latest_time; + } + + closedir DP; + + unshift(@dirs, sort @subdirs) if $recurse_opt; + + if (!$cnt) { + if (defined $sums_mtime) { + print "(removed $SUMS_FILE) " if $verbosity; + unlink($SUMS_FILE); + } + print "empty\n" if $verbosity; + next; + } + + if (defined($sums_mtime) && $sums_mtime == $latest_time && !$force_reading) { + print "OK\n" if $verbosity; + next; + } + + if (open(FP, '+<', $SUMS_FILE)) { + while () { + chomp; + my($sum4, $sum5, $size, $mtime, $ctime, $fn) = split(' ', $_, 6); + my $ref = $cache{$fn}; + if (defined $ref) { + if ($$ref[0] == $size + && $$ref[1] == $mtime && $$ref[2] == $ctime + && $sum4 !~ /=/ && $sum5 !~ /=/) { + $$ref[3] = $sum4; + $$ref[4] = $sum5; + $cnt--; + } else { + $$ref[3] = $$ref[4] = undef; + } + } else { + $cnt = -1; # Force rewrite due to removed line. + } + } + } else { + open(FP, '>', $SUMS_FILE) or die "Unable to write $dir/$SUMS_FILE: $!\n"; + $cnt = -1; + } + + if ($cnt) { + print "updating\n" if $verbosity; + while (my($fn, $ref) = each %cache) { + next if defined $$ref[3] && defined $$ref[4]; + if (!open(IN, $fn)) { + print STDERR "Unable to read $fn: $!\n"; + delete $cache{$fn}; + next; + } + + my($size,$mtime,$ctime) = (stat(IN))[7,9,10]; + my($sum4, $sum5); + + while (1) { + while (sysread(IN, $_, 64*1024)) { + $md4->add($_); + $md5->add($_); + } + $sum4 = $md4->hexdigest; + $sum5 = $md5->hexdigest; + print " $sum4 $sum5" if $verbosity > 2; + print " $fn\n" if $verbosity > 1; + my($size2,$mtime2,$ctime2) = (stat(IN))[7,9,10]; + last if $size == $size2 && $mtime == $mtime2 && $ctime == $ctime2; + $size = $size2; + $mtime = $mtime2; + $ctime = $ctime2; + sysseek(IN, 0, 0); + } + + close IN; + + $cache{$fn} = [ $size, $mtime, $ctime, $sum4, $sum5 ]; + } + + $latest_time = 0; + seek(FP, 0, 0); + foreach my $fn (sort keys %cache) { + my $ref = $cache{$fn}; + my($size, $mtime, $ctime, $sum4, $sum5) = @$ref; + printf FP '%s %s %10d %10d %10d %s' . "\n", $sum4, $sum5, $size, $mtime, $ctime, $fn; + + $latest_time = $mtime if $mtime > $latest_time; + $latest_time = $ctime if $ctime > $latest_time; + } + truncate(FP, tell(FP)); + } else { + print "OK.\n" if $verbosity; + } + + close FP; + + utime $latest_time, $latest_time, $SUMS_FILE; +} + +sub usage +{ + die <