From a3221d2ac14255c31109a617c4d62b949cd910de Mon Sep 17 00:00:00 2001 From: Wayne Davison Date: Fri, 16 Jul 2004 20:06:24 +0000 Subject: [PATCH] My version of Mark Curtis's --inplace option. --- match.c | 33 ++++++++++++++++ options.c | 26 ++++++++++++- receiver.c | 108 ++++++++++++++++++++++++++++++++++------------------- rsync.c | 8 ++++ rsync.h | 4 +- rsync.yo | 12 ++++++ sender.c | 28 +++++++------- 7 files changed, 164 insertions(+), 55 deletions(-) diff --git a/match.c b/match.c index d731aae7..f3858e5b 100644 --- a/match.c +++ b/match.c @@ -23,6 +23,7 @@ extern int verbose; extern int am_server; extern int do_progress; extern int checksum_seed; +extern int inplace; typedef unsigned short tag; @@ -200,6 +201,12 @@ static void hash_search(int f,struct sum_struct *s, if (l != s->sums[i].len) continue; + /* inplace: ensure chunk's offset is either >= our + * offset or that the data didn't move. */ + if (inplace && s->sums[i].offset < offset + && !(s->sums[i].flags & SUMFLG_SAME_OFFSET)) + continue; + if (verbose > 3) rprintf(FINFO,"potential match at %.0f target=%.0f %.0f sum=%08x\n", (double)offset,(double)j,(double)i,sum); @@ -215,15 +222,41 @@ static void hash_search(int f,struct sum_struct *s, continue; } + /* If inplace is enabled, the best possible match is + * one with an identical offset, so we prefer that over + * the following want_i optimization. */ + if (inplace) { + do { + size_t i2 = targets[j].i; + if (s->sums[i2].offset != offset) + continue; + if (i2 != i) { + if (sum != s->sums[i2].sum1) + break; + if (memcmp(sum2, s->sums[i2].sum2, + s->s2length) != 0) + break; + i = i2; + } + /* This chunk was at the same offset on + * both the sender and the receiver. */ + s->sums[i].flags |= SUMFLG_SAME_OFFSET; + goto set_want_i; + } while (++j < s->count && targets[j].t == t); + } + /* we've found a match, but now check to see * if want_i can hint at a better match. */ if (i != want_i && want_i < s->count + && (!inplace || s->sums[want_i].offset >= offset + || s->sums[want_i].flags & SUMFLG_SAME_OFFSET) && sum == s->sums[want_i].sum1 && memcmp(sum2, s->sums[want_i].sum2, s->s2length) == 0) { /* we've found an adjacent match - the RLL coder * will be happy */ i = want_i; } + set_want_i: want_i = i + 1; matched(f,s,buf,offset,i); diff --git a/options.c b/options.c index 47bba714..5e04efe6 100644 --- a/options.c +++ b/options.c @@ -94,6 +94,7 @@ int ignore_errors = 0; int modify_window = 0; int blocking_io = -1; int checksum_seed = 0; +int inplace = 0; unsigned int block_size = 0; @@ -148,6 +149,7 @@ char *bind_address; static void print_rsync_version(enum logcode f) { char const *got_socketpair = "no "; + char const *have_inplace = "no "; char const *hardlinks = "no "; char const *links = "no "; char const *ipv6 = "no "; @@ -157,6 +159,10 @@ static void print_rsync_version(enum logcode f) got_socketpair = ""; #endif +#if HAVE_FTRUNCATE + have_inplace = ""; +#endif + #if SUPPORT_HARD_LINKS hardlinks = ""; #endif @@ -182,8 +188,8 @@ static void print_rsync_version(enum logcode f) /* Note that this field may not have type ino_t. It depends * on the complicated interaction between largefile feature * macros. */ - rprintf(f, " %sIPv6, %d-bit system inums, %d-bit internal inums\n", - ipv6, + rprintf(f, " %sinplace, %sIPv6, %d-bit system inums, %d-bit internal inums\n", + have_inplace, ipv6, (int) (sizeof dumstat->st_ino * 8), (int) (sizeof (uint64) * 8)); #ifdef MAINTAINER_MODE @@ -233,6 +239,7 @@ void usage(enum logcode F) rprintf(F," --backup-dir make backups into this directory\n"); rprintf(F," --suffix=SUFFIX backup suffix (default %s w/o --backup-dir)\n",BACKUP_SUFFIX); rprintf(F," -u, --update update only (don't overwrite newer files)\n"); + rprintf(F," --inplace update the destination file inplace (see man page)\n"); rprintf(F," -K, --keep-dirlinks treat symlinked dir on receiver as dir\n"); rprintf(F," -l, --links copy symlinks as symlinks\n"); rprintf(F," -L, --copy-links copy the referent of all symlinks\n"); @@ -340,6 +347,7 @@ static struct poptOption long_options[] = { {"sparse", 'S', POPT_ARG_NONE, &sparse_files, 0, 0, 0 }, {"cvs-exclude", 'C', POPT_ARG_NONE, &cvs_exclude, 0, 0, 0 }, {"update", 'u', POPT_ARG_NONE, &update_only, 0, 0, 0 }, + {"inplace", 0, POPT_ARG_NONE, &inplace, 0, 0, 0 }, {"keep-dirlinks", 'K', POPT_ARG_NONE, &keep_dirlinks, 0, 0, 0 }, {"links", 'l', POPT_ARG_NONE, &preserve_links, 0, 0, 0 }, {"copy-links", 'L', POPT_ARG_NONE, ©_links, 0, 0, 0 }, @@ -754,6 +762,17 @@ int parse_arguments(int *argc, const char ***argv, int frommain) bwlimit_writemax = 512; } + if (inplace) { +#if HAVE_FTRUNCATE + keep_partial = 0; +#else + snprintf(err_buf, sizeof err_buf, + "inplace is not supported on this %s\n", + am_server ? "server" : "client"); + return 0; +#endif + } + if (files_from) { char *colon; if (*argc != 2 && !(am_server && am_sender && *argc == 1)) { @@ -971,6 +990,9 @@ void server_options(char **args,int *argc) if (opt_ignore_existing && am_sender) args[ac++] = "--ignore-existing"; + if (inplace) + args[ac++] = "--inplace"; + if (tmpdir) { args[ac++] = "--temp-dir"; args[ac++] = tmpdir; diff --git a/receiver.c b/receiver.c index ac8f26bb..2eca4f6f 100644 --- a/receiver.c +++ b/receiver.c @@ -48,6 +48,7 @@ extern int ignore_errors; extern int orig_umask; extern int keep_partial; extern int checksum_seed; +extern int inplace; static void delete_one(char *fn, int is_dir) { @@ -255,16 +256,30 @@ static int receive_data(int f_in,struct map_struct *mapbuf,int fd,char *fname, sum_update(map,len); } - if (fd != -1 && write_file(fd, map, len) != (int)len) { - rsyserr(FERROR, errno, "write failed on %s", - full_fname(fname)); - exit_cleanup(RERR_FILEIO); + if (!inplace || offset != offset2) { + if (fd != -1 && write_file(fd, map, len) != (int)len) { + rsyserr(FERROR, errno, "write failed on %s", + full_fname(fname)); + exit_cleanup(RERR_FILEIO); + } + } else { + flush_write_file(fd); + if (do_lseek(fd,(OFF_T)len,SEEK_CUR) != offset+len) { + rprintf(FERROR, "lseek failed on %s: %s, %lli, %lli, %i\n", + full_fname(fname), strerror(errno), do_lseek(fd,0,SEEK_CUR), (offset+len), i); + exit_cleanup(RERR_FILEIO); + } } offset += len; } flush_write_file(fd); +#ifdef HAVE_FTRUNCATE + if (inplace) + ftruncate(fd, offset); +#endif + if (do_progress) end_progress(total_size); @@ -414,44 +429,59 @@ int recv_files(int f_in, struct file_list *flist, char *local_name) } else mapbuf = NULL; - if (!get_tmpname(fnametmp,fname)) { - if (mapbuf) - unmap_file(mapbuf); - if (fd1 != -1) - close(fd1); - continue; - } + /* We now check to see if we are writing file "inplace" */ + if (inplace) { + fd2 = do_open(fnamecmp, O_WRONLY|O_CREAT, 0); + if (fd2 == -1) { + rsyserr(FERROR, errno, "open %s failed", + full_fname(fnamecmp)); + receive_data(f_in,mapbuf,-1,NULL,file->length); + if (mapbuf) + unmap_file(mapbuf); + if (fd1 != -1) + close(fd1); + continue; + } + } else { + if (!get_tmpname(fnametmp,fname)) { + if (mapbuf) + unmap_file(mapbuf); + if (fd1 != -1) + close(fd1); + continue; + } + + strlcpy(template, fnametmp, sizeof template); - strlcpy(template, fnametmp, sizeof template); - - /* we initially set the perms without the - * setuid/setgid bits to ensure that there is no race - * condition. They are then correctly updated after - * the lchown. Thanks to snabb@epipe.fi for pointing - * this out. We also set it initially without group - * access because of a similar race condition. */ - fd2 = do_mkstemp(fnametmp, file->mode & INITACCESSPERMS); - - /* in most cases parent directories will already exist - * because their information should have been previously - * transferred, but that may not be the case with -R */ - if (fd2 == -1 && relative_paths && errno == ENOENT && - create_directory_path(fnametmp, orig_umask) == 0) { - strlcpy(fnametmp, template, sizeof fnametmp); + /* we initially set the perms without the + * setuid/setgid bits to ensure that there is no race + * condition. They are then correctly updated after + * the lchown. Thanks to snabb@epipe.fi for pointing + * this out. We also set it initially without group + * access because of a similar race condition. */ fd2 = do_mkstemp(fnametmp, file->mode & INITACCESSPERMS); - } - if (fd2 == -1) { - rsyserr(FERROR, errno, "mkstemp %s failed", - full_fname(fnametmp)); - receive_data(f_in,mapbuf,-1,NULL,file->length); - if (mapbuf) - unmap_file(mapbuf); - if (fd1 != -1) - close(fd1); - continue; - } - cleanup_set(fnametmp, fname, file, mapbuf, fd1, fd2); + /* in most cases parent directories will already exist + * because their information should have been previously + * transferred, but that may not be the case with -R */ + if (fd2 == -1 && relative_paths && errno == ENOENT + && create_directory_path(fnametmp, orig_umask) == 0) { + strlcpy(fnametmp, template, sizeof fnametmp); + fd2 = do_mkstemp(fnametmp, file->mode & INITACCESSPERMS); + } + if (fd2 == -1) { + rsyserr(FERROR, errno, "mkstemp %s failed", + full_fname(fnametmp)); + receive_data(f_in,mapbuf,-1,NULL,file->length); + if (mapbuf) + unmap_file(mapbuf); + if (fd1 != -1) + close(fd1); + continue; + } + + cleanup_set(fnametmp, fname, file, mapbuf, fd1, fd2); + } if (!am_server && verbose) rprintf(FINFO, "%s\n", fname); diff --git a/rsync.c b/rsync.c index 125369bc..7a1c3f93 100644 --- a/rsync.c +++ b/rsync.c @@ -34,6 +34,7 @@ extern int force_delete; extern int recurse; extern int make_backups; extern char *backup_dir; +extern int inplace; /* @@ -239,6 +240,13 @@ void finish_transfer(char *fname, char *fnametmp, struct file_struct *file, if (make_backups && !make_backup(fname)) return; + if (inplace) { + if (verbose > 2) + rprintf(FINFO, "finishing %s\n", fname); + set_perms(fname, file, NULL, 0); + return; + } + /* move tmp file over real file */ if (verbose > 2) rprintf(FINFO, "renaming %s to %s\n", fnametmp, fname); diff --git a/rsync.h b/rsync.h index 9a165071..07f447b1 100644 --- a/rsync.h +++ b/rsync.h @@ -458,11 +458,13 @@ struct file_list { struct file_struct **files; }; +#define SUMFLG_SAME_OFFSET (1<<0) + struct sum_buf { OFF_T offset; /**< offset in file of this chunk */ unsigned int len; /**< length of chunk of file */ - int i; /**< index of this chunk */ uint32 sum1; /**< simple checksum */ + short flags; /**< flag bits */ char sum2[SUM_LENGTH]; /**< checksum */ }; diff --git a/rsync.yo b/rsync.yo index d18267a9..ad024e4a 100644 --- a/rsync.yo +++ b/rsync.yo @@ -289,6 +289,7 @@ verb( --backup-dir make backups into this directory --suffix=SUFFIX backup suffix (default ~ w/o --backup-dir) -u, --update update only (don't overwrite newer files) + --inplace update the destination file inplace -K, --keep-dirlinks treat symlinked dir on receiver as dir -l, --links copy symlinks as symlinks -L, --copy-links copy the referent of all symlinks @@ -484,6 +485,17 @@ dit(bf(-K, --keep-dirlinks)) On the receiving side, if a symlink is pointing to a directory, it will be treated as matching a directory from the sender. +dit(bf(--inplace)) This causes rsync not to create a new copy of the file +and then move it into place. Instead rsync will overwrite the existing +file, meaning that the rsync algorithm can't extract the full ammount of +network reduction it might otherwise. + +This option is useful for transfer of large files with block based changes +and also on systems that are disk bound not network bound. + +WARNING: If the transfer is interrupted, you will have an inconsistent file +and the transfer should be run again. + dit(bf(-l, --links)) When symlinks are encountered, recreate the symlink on the destination. diff --git a/sender.c b/sender.c index 9fe5f623..688dff61 100644 --- a/sender.c +++ b/sender.c @@ -27,6 +27,7 @@ extern int dry_run; extern int am_server; extern int am_daemon; extern int protocol_version; +extern struct stats stats; /** @@ -62,8 +63,8 @@ static struct sum_struct *receive_sums(int f) int i; OFF_T offset = 0; - s = new(struct sum_struct); - if (!s) out_of_memory("receive_sums"); + if (!(s = new(struct sum_struct))) + out_of_memory("receive_sums"); read_sum_head(f, s); @@ -77,26 +78,28 @@ static struct sum_struct *receive_sums(int f) if (s->count == 0) return(s); - s->sums = new_array(struct sum_buf, s->count); - if (!s->sums) out_of_memory("receive_sums"); + if (!(s->sums = new_array(struct sum_buf, s->count))) + out_of_memory("receive_sums"); - for (i = 0; i < (int) s->count; i++) { + for (i = 0; i < (int)s->count; i++) { s->sums[i].sum1 = read_int(f); read_buf(f, s->sums[i].sum2, s->s2length); s->sums[i].offset = offset; - s->sums[i].i = i; + s->sums[i].flags = 0; - if (i == (int) s->count-1 && s->remainder != 0) { + if (i == (int)s->count-1 && s->remainder != 0) s->sums[i].len = s->remainder; - } else { + else s->sums[i].len = s->blength; - } offset += s->sums[i].len; - if (verbose > 3) - rprintf(FINFO, "chunk[%d] len=%d offset=%.0f sum1=%08x\n", - i, s->sums[i].len, (double)s->sums[i].offset, s->sums[i].sum1); + if (verbose > 3) { + rprintf(FINFO, + "chunk[%d] len=%d offset=%.0f sum1=%08x\n", + i, s->sums[i].len, (double)s->sums[i].offset, + s->sums[i].sum1); + } } s->flength = offset; @@ -116,7 +119,6 @@ void send_files(struct file_list *flist, int f_out, int f_in) int i; struct file_struct *file; int phase = 0; - extern struct stats stats; struct stats initial_stats; int j; -- 2.34.1