From b9f592fbf50b0dc9e3d1d33b8deb2bf9abad9ef6 Mon Sep 17 00:00:00 2001 From: Wayne Davison Date: Thu, 15 Jul 2004 02:20:08 +0000 Subject: [PATCH] My modified version of Chris Shoemaker's improved batch-file handling. --- batch.c | 263 +------------------------------------------------ clientserver.c | 3 +- compat.c | 11 ++- flist.c | 20 +--- generator.c | 4 + io.c | 38 +++++++ main.c | 122 +++++++++++++++-------- match.c | 3 - options.c | 24 ++--- pipe.c | 25 ++++- rsync.yo | 113 ++++++++++----------- sender.c | 149 ++++++++-------------------- token.c | 34 ------- 13 files changed, 267 insertions(+), 542 deletions(-) diff --git a/batch.c b/batch.c index 067f43cf..42a249c2 100644 --- a/batch.c +++ b/batch.c @@ -9,53 +9,6 @@ #include extern char *batch_prefix; -extern int csum_length; -extern int protocol_version; -extern struct stats stats; - -struct file_list *batch_flist; - -static char rsync_flist_file[] = ".rsync_flist"; -static char rsync_csums_file[] = ".rsync_csums"; -static char rsync_delta_file[] = ".rsync_delta"; -static char rsync_argvs_file[] = ".rsync_argvs"; - -static int f_csums = -1; -static int f_delta = -1; - -void write_batch_flist_info(int flist_count, struct file_struct **files) -{ - char filename[MAXPATHLEN]; - int i, f, save_pv; - int64 save_written; - - stringjoin(filename, sizeof filename, - batch_prefix, rsync_flist_file, NULL); - - f = do_open(filename, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR); - if (f < 0) { - rsyserr(FERROR, errno, "Batch file %s open error", filename); - exit_cleanup(1); - } - - save_written = stats.total_written; - save_pv = protocol_version; - protocol_version = PROTOCOL_VERSION; - write_int(f, protocol_version); - write_int(f, flist_count); - - for (i = 0; i < flist_count; i++) { - send_file_entry(files[i], f, - files[i]->flags & FLAG_TOP_DIR ? XMIT_TOP_DIR : 0); - } - send_file_entry(NULL, f, 0); - - protocol_version = save_pv; - stats.total_written = save_written; - - close(f); -} - void write_batch_argvs_file(int argc, char *argv[]) { @@ -63,7 +16,7 @@ void write_batch_argvs_file(int argc, char *argv[]) char filename[MAXPATHLEN]; stringjoin(filename, sizeof filename, - batch_prefix, rsync_argvs_file, NULL); + batch_prefix, ".rsync_argvs", NULL); fd = do_open(filename, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR | S_IEXEC); if (fd < 0) { @@ -75,9 +28,11 @@ void write_batch_argvs_file(int argc, char *argv[]) for (i = 0; i < argc; i++) { if (i == argc - 2) /* Skip source directory on cmdline */ continue; + if (strncmp(argv[i], "--files-from=", 13) == 0) + continue; if (i != 0) write(fd, " ", 1); - if (!strncmp(argv[i], "--write-batch=", 14)) { + if (strncmp(argv[i], "--write-batch=", 14) == 0) { write(fd, "--read-batch=", 13); write(fd, batch_prefix, strlen(batch_prefix)); } else if (i == argc - 1) { @@ -99,216 +54,6 @@ void write_batch_argvs_file(int argc, char *argv[]) } } -struct file_list *create_flist_from_batch(void) -{ - char filename[MAXPATHLEN]; - unsigned short flags; - int i, f, save_pv; - int64 save_read; - - stringjoin(filename, sizeof filename, - batch_prefix, rsync_flist_file, NULL); - - f = do_open(filename, O_RDONLY, 0); - if (f < 0) { - rsyserr(FERROR, errno, "Batch file %s open error", filename); - exit_cleanup(1); - } - - batch_flist = flist_new(WITH_HLINK, "create_flist_from_batch"); - - save_read = stats.total_read; - save_pv = protocol_version; - protocol_version = read_int(f); - - batch_flist->count = read_int(f); - flist_expand(batch_flist); - - for (i = 0; (flags = read_byte(f)) != 0; i++) { - if (protocol_version >= 28 && (flags & XMIT_EXTENDED_FLAGS)) - flags |= read_byte(f) << 8; - receive_file_entry(&batch_flist->files[i], flags, batch_flist, f); - } - receive_file_entry(NULL, 0, NULL, 0); /* Signal that we're done. */ - - protocol_version = save_pv; - stats.total_read = save_read; - - return batch_flist; -} - -void write_batch_csums_file(void *buff, int bytes_to_write) -{ - if (write(f_csums, buff, bytes_to_write) < 0) { - rsyserr(FERROR, errno, "Batch file write error"); - close(f_csums); - exit_cleanup(1); - } -} - -void close_batch_csums_file(void) -{ - close(f_csums); - f_csums = -1; -} - - -/** - * Write csum info to batch file - * - * @todo This will break if s->count is ever larger than maxint. The - * batch code should probably be changed to consistently use the - * variable-length integer routines, which is probably a compatible - * change. - **/ -void write_batch_csum_info(int *flist_entry, struct sum_struct *s) -{ - size_t i; - int int_count; - char filename[MAXPATHLEN]; - - if (f_csums < 0) { - stringjoin(filename, sizeof filename, - batch_prefix, rsync_csums_file, NULL); - - f_csums = do_open(filename, O_WRONLY | O_CREAT | O_TRUNC, - S_IRUSR | S_IWUSR); - if (f_csums < 0) { - rsyserr(FERROR, errno, "Batch file %s open error", - filename); - close(f_csums); - exit_cleanup(1); - } - } - - write_batch_csums_file(flist_entry, sizeof (int)); - int_count = s ? (int) s->count : 0; - write_batch_csums_file(&int_count, sizeof int_count); - - if (s) { - for (i = 0; i < s->count; i++) { - write_batch_csums_file(&s->sums[i].sum1, - sizeof (uint32)); - write_batch_csums_file(s->sums[i].sum2, csum_length); - } - } -} - -int read_batch_csums_file(char *buff, int len) -{ - int bytes_read; - - if ((bytes_read = read(f_csums, buff, len)) < 0) { - rsyserr(FERROR, errno, "Batch file read error"); - close(f_csums); - exit_cleanup(1); - } - return bytes_read; -} - -void read_batch_csum_info(int flist_entry, struct sum_struct *s, - int *checksums_match) -{ - int i; - int file_flist_entry; - int file_chunk_ct; - uint32 file_sum1; - char file_sum2[SUM_LENGTH]; - char filename[MAXPATHLEN]; - - if (f_csums < 0) { - stringjoin(filename, sizeof filename, - batch_prefix, rsync_csums_file, NULL); - - f_csums = do_open(filename, O_RDONLY, 0); - if (f_csums < 0) { - rsyserr(FERROR, errno, "Batch file %s open error", - filename); - close(f_csums); - exit_cleanup(1); - } - } - - read_batch_csums_file((char *) &file_flist_entry, sizeof (int)); - if (file_flist_entry != flist_entry) { - rprintf(FINFO, "file_flist_entry (%d) != flist_entry (%d)\n", - file_flist_entry, flist_entry); - close(f_csums); - exit_cleanup(1); - - } else { - read_batch_csums_file((char *) &file_chunk_ct, sizeof (int)); - *checksums_match = 1; - for (i = 0; i < file_chunk_ct; i++) { - read_batch_csums_file((char *) &file_sum1, - sizeof (uint32)); - read_batch_csums_file(file_sum2, csum_length); - - if ((s->sums[i].sum1 != file_sum1) - || memcmp(s->sums[i].sum2, file_sum2, csum_length)) - *checksums_match = 0; - } /* end for */ - } -} - -void write_batch_delta_file(char *buff, int bytes_to_write) -{ - char filename[MAXPATHLEN]; - - if (f_delta < 0) { - stringjoin(filename, sizeof filename, - batch_prefix, rsync_delta_file, NULL); - - f_delta = do_open(filename, O_WRONLY | O_CREAT | O_TRUNC, - S_IRUSR | S_IWUSR); - if (f_delta < 0) { - rsyserr(FERROR, errno, "Batch file %s open error", - filename); - exit_cleanup(1); - } - } - - if (write(f_delta, buff, bytes_to_write) < 0) { - rsyserr(FERROR, errno, "Batch file %s write error", filename); - close(f_delta); - exit_cleanup(1); - } -} - -void close_batch_delta_file(void) -{ - close(f_delta); - f_delta = -1; -} - -int read_batch_delta_file(char *buff, int len) -{ - int bytes_read; - char filename[MAXPATHLEN]; - - if (f_delta < 0) { - stringjoin(filename, sizeof filename, - batch_prefix, rsync_delta_file, NULL); - - f_delta = do_open(filename, O_RDONLY, 0); - if (f_delta < 0) { - rsyserr(FERROR, errno, "Batch file %s open error", - filename); - close(f_delta); - exit_cleanup(1); - } - } - - bytes_read = read(f_delta, buff, len); - if (bytes_read < 0) { - rsyserr(FERROR, errno, "Batch file %s read error", filename); - close(f_delta); - exit_cleanup(1); - } - - return bytes_read; -} - void show_flist(int index, struct file_struct **fptr) { /* for debugging show_flist(flist->count, flist->files * */ diff --git a/clientserver.c b/clientserver.c index 555c3154..a8104839 100644 --- a/clientserver.c +++ b/clientserver.c @@ -97,7 +97,8 @@ int start_socket_client(char *host, char *path, int argc, char *argv[]) return ret < 0? ret : client_run(fd, fd, -1, argc, argv); } -int start_inband_exchange(char *user, char *path, int f_in, int f_out, int argc) +int start_inband_exchange(char *user, char *path, int f_in, int f_out, + int argc) { int i; char *sargs[MAX_ARGS]; diff --git a/compat.c b/compat.c index 6bd1a57d..57563b0b 100644 --- a/compat.c +++ b/compat.c @@ -27,12 +27,12 @@ int remote_protocol = 0; +extern int verbose; extern int am_server; - +extern int am_sender; +extern int read_batch; extern int checksum_seed; - extern int protocol_version; -extern int verbose; void setup_protocol(int f_out,int f_in) { @@ -47,6 +47,11 @@ void setup_protocol(int f_out,int f_in) if (protocol_version > remote_protocol) protocol_version = remote_protocol; } + if (read_batch && remote_protocol > protocol_version) { + rprintf(FERROR, "The protocol version in the batch file is too new (%d > %d).\n", + remote_protocol, protocol_version); + exit_cleanup(RERR_PROTOCOL); + } if (verbose > 3) { rprintf(FINFO, "(%s) Protocol versions: remote=%d, negotiated=%d\n", diff --git a/flist.c b/flist.c index 0b600acf..ebb70e49 100644 --- a/flist.c +++ b/flist.c @@ -64,9 +64,6 @@ extern int delete_excluded; extern int orig_umask; extern int list_only; -extern int read_batch; -extern int write_batch; - extern struct exclude_list_struct exclude_list; extern struct exclude_list_struct server_exclude_list; extern struct exclude_list_struct local_exclude_list; @@ -950,9 +947,6 @@ void send_file_name(int f, struct file_list *flist, char *fname, flist_expand(flist); - if (write_batch) - file->flags |= FLAG_TOP_DIR; - if (file->basename[0]) { flist->files[flist->count++] = file; send_file_entry(file, f, base_flags); @@ -1231,8 +1225,6 @@ struct file_list *send_file_list(int f, int argc, char *argv[]) io_end_buffering(); stats.flist_size = stats.total_written - start_write; stats.num_files = flist->count; - if (write_batch) - write_batch_flist_info(flist->count, flist->files); } if (verbose > 3) @@ -1301,13 +1293,11 @@ struct file_list *recv_file_list(int f) * protocol version 15 */ recv_uid_list(f, flist); - if (!read_batch) { - /* Recv the io_error flag */ - if (lp_ignore_errors(module_id) || ignore_errors) - read_int(f); - else - io_error |= read_int(f); - } + /* Recv the io_error flag */ + if (lp_ignore_errors(module_id) || ignore_errors) + read_int(f); + else + io_error |= read_int(f); } if (verbose > 3) diff --git a/generator.c b/generator.c index 6eb65d24..ffd7a0d3 100644 --- a/generator.c +++ b/generator.c @@ -47,6 +47,7 @@ extern int link_dest; extern int whole_file; extern int local_server; extern int list_only; +extern int read_batch; extern int only_existing; extern int orig_umask; extern int safe_symlinks; @@ -326,6 +327,9 @@ static void recv_generator(char *fname, struct file_struct *file, int i, return; } + if (read_batch) + return; + if (preserve_links && S_ISLNK(file->mode)) { #if SUPPORT_LINKS char lnk[MAXPATHLEN]; diff --git a/io.c b/io.c index 421571a5..9f9c3823 100644 --- a/io.c +++ b/io.c @@ -54,11 +54,14 @@ extern int am_server; extern int am_daemon; extern int am_sender; extern int eol_nulls; +extern int checksum_seed; +extern int protocol_version; extern char *remote_filesfrom_file; extern struct stats stats; const char phase_unknown[] = "unknown"; int select_timeout = SELECT_TIMEOUT; +int batch_fd = -1; /** * The connection might be dropped at some point; perhaps because the @@ -83,6 +86,9 @@ int kludge_around_eof = False; int msg_fd_in = -1; int msg_fd_out = -1; +static int write_batch_monitor_in = -1; +static int write_batch_monitor_out = -1; + static int io_filesfrom_f_in = -1; static int io_filesfrom_f_out = -1; static char io_filesfrom_buf[2048]; @@ -674,6 +680,11 @@ static void readfd(int fd, char *buffer, size_t N) total += ret; } + if (fd == write_batch_monitor_in) { + if ((size_t)write(batch_fd, buffer, total) != total) + exit_cleanup(RERR_FILEIO); + } + stats.total_read += total; } @@ -951,6 +962,11 @@ static void writefd(int fd,char *buf,size_t len) exit_cleanup(RERR_PROTOCOL); } + if (fd == write_batch_monitor_out) { + if ((size_t)write(batch_fd, buf, len) != len) + exit_cleanup(RERR_FILEIO); + } + if (!io_buffer || fd != multiplex_out_fd) { writefd_unbuffered(fd, buf, len); return; @@ -1109,3 +1125,25 @@ void io_multiplexing_close(void) io_multiplexing_out = 0; } +void start_write_batch(int fd) +{ + /* Some communication has already taken place, but we don't + * enable batch writing until here so that we can write a + * canonical record of the communication even though the + * actual communication so far depends on whether a daemon + * is involved. */ + write_int(batch_fd, protocol_version); + write_int(batch_fd, checksum_seed); + stats.total_written -= sizeof (int) * 2; + + if (am_sender) + write_batch_monitor_out = fd; + else + write_batch_monitor_in = fd; +} + +void stop_write_batch(void) +{ + write_batch_monitor_out = -1; + write_batch_monitor_in = -1; +} diff --git a/main.c b/main.c index 0ec6dee2..b34127c1 100644 --- a/main.c +++ b/main.c @@ -52,13 +52,14 @@ extern int rsync_port; extern int whole_file; extern int read_batch; extern int write_batch; +extern int batch_fd; extern int filesfrom_fd; extern pid_t cleanup_child_pid; extern char *files_from; extern char *remote_filesfrom_file; extern char *rsync_path; extern char *shell_cmd; -extern struct file_list *batch_flist; +extern char *batch_prefix; /* there's probably never more than at most 2 outstanding child processes, @@ -107,8 +108,19 @@ void wait_process(pid_t pid, int *status) *status = WEXITSTATUS(*status); } +/* This function gets called from all 3 processes. We want the client side + * to actually output the text, but the sender is the only process that has + * all the stats we need. So, if we're a client sender, we do the report. + * If we're a server sender, we write the stats on the supplied fd. If + * we're the client receiver we read the stats from the supplied fd and do + * the report. All processes might also generate a set of debug stats, if + * the verbose level is high enough (this is the only thing that the + * generator process and the server receiver ever do here). */ static void report(int f) { + /* Cache two stats because the read/write code can change it. */ + int64 total_read = stats.total_read; + int64 total_written = stats.total_written; time_t t = time(NULL); if (do_stats && verbose > 1) { @@ -128,13 +140,9 @@ static void report(int f) if (am_server) { if (am_sender) { - int64 w; - /* store total_written in a temporary - * because write_longint changes it */ - w = stats.total_written; - write_longint(f,stats.total_read); - write_longint(f,w); - write_longint(f,stats.total_size); + write_longint(f, total_read); + write_longint(f, total_written); + write_longint(f, stats.total_size); } return; } @@ -142,12 +150,15 @@ static void report(int f) /* this is the client */ if (!am_sender) { - int64 r; - stats.total_written = read_longint(f); - /* store total_read in a temporary, read_longint changes it */ - r = read_longint(f); + total_written = read_longint(f); + total_read = read_longint(f); stats.total_size = read_longint(f); - stats.total_read = r; + } else if (write_batch) { + /* The --read-batch process is going to be a client + * receiver, so we need to give it the stats. */ + write_longint(batch_fd, total_read); + write_longint(batch_fd, total_written); + write_longint(batch_fd, stats.total_size); } if (do_stats) { @@ -164,19 +175,19 @@ static void report(int f) (double)stats.matched_data); rprintf(FINFO,"File list size: %d\n", stats.flist_size); rprintf(FINFO,"Total bytes written: %.0f\n", - (double)stats.total_written); + (double)total_written); rprintf(FINFO,"Total bytes read: %.0f\n", - (double)stats.total_read); + (double)total_read); } if (verbose || do_stats) { - rprintf(FINFO,"\nwrote %.0f bytes read %.0f bytes %.2f bytes/sec\n", - (double)stats.total_written, - (double)stats.total_read, - (stats.total_written+stats.total_read)/(0.5 + (t-starttime))); - rprintf(FINFO,"total size is %.0f speedup is %.2f\n", + rprintf(FINFO, + "\nwrote %.0f bytes read %.0f bytes %.2f bytes/sec\n", + (double)total_written, (double)total_read, + (total_written + total_read)/(0.5 + (t - starttime))); + rprintf(FINFO, "total size is %.0f speedup is %.2f\n", (double)stats.total_size, - (1.0*stats.total_size)/(stats.total_written+stats.total_read)); + (double)stats.total_size / (total_written+total_read)); } fflush(stdout); @@ -301,8 +312,6 @@ static pid_t do_cmd(char *cmd, char *machine, char *user, char *path, } if (local_server) { - if (read_batch) - create_flist_from_batch(); /* sets batch_flist */ /* If the user didn't request --[no-]whole-file, force * it on, but only if we're not batch processing. */ if (whole_file < 0 && !read_batch && !write_batch) @@ -478,6 +487,8 @@ static int do_recv(int f_in,int f_out,struct file_list *flist,char *local_name) } am_generator = 1; + if (write_batch) + stop_write_batch(); close(error_pipe[1]); if (f_in != f_out) @@ -548,10 +559,7 @@ static void do_server_recv(int f_in, int f_out, int argc,char *argv[]) filesfrom_fd = -1; } - if (read_batch) - flist = batch_flist; - else - flist = recv_file_list(f_in); + flist = recv_file_list(f_in); if (!flist) { rprintf(FERROR,"server_recv: recv_file_list error\n"); exit_cleanup(RERR_FILESELECT); @@ -590,11 +598,10 @@ void start_server(int f_in, int f_out, int argc, char *argv[]) if (am_sender) { keep_dirlinks = 0; /* Must be disabled on the sender. */ - if (!read_batch) { - recv_exclude_list(f_in); - if (cvs_exclude) - add_cvs_excludes(); - } + + recv_exclude_list(f_in); + if (cvs_exclude) + add_cvs_excludes(); do_server_sender(f_in, f_out, argc, argv); } else { do_server_recv(f_in, f_out, argc, argv); @@ -614,15 +621,23 @@ int client_run(int f_in, int f_out, pid_t pid, int argc, char *argv[]) char *local_name = NULL; cleanup_child_pid = pid; - if (read_batch) - flist = batch_flist; - - set_nonblocking(f_in); - set_nonblocking(f_out); + if (read_batch) { + /* This is the heart of the read_batch approach: + * Switcher-roo the file descriptors, and + * nobody's the wiser. */ + close(f_in); + close(f_out); + f_in = batch_fd; + f_out = do_open("/dev/null", O_WRONLY, 0); + assert(am_sender == 0); + } else { + set_nonblocking(f_in); + set_nonblocking(f_out); + } setup_protocol(f_out,f_in); - if (protocol_version >= 23) + if (protocol_version >= 23 && !read_batch) io_start_multiplex_in(f_in); if (am_sender) { @@ -634,6 +649,11 @@ int client_run(int f_in, int f_out, pid_t pid, int argc, char *argv[]) send_exclude_list(f_out); if (remote_filesfrom_file) filesfrom_fd = f_in; + + if (write_batch) + start_write_batch(f_out); + /* Can be unconditional, but this is theoretically + * more efficent for read_batch case. */ if (!read_batch) /* don't write to pipe */ flist = send_file_list(f_out,argc,argv); if (verbose > 3) @@ -660,6 +680,8 @@ int client_run(int f_in, int f_out, pid_t pid, int argc, char *argv[]) if (argc == 0) list_only = 1; + /* Can be unconditional, but this is theoretically more + * efficient for the read_batch case. */ if (!read_batch) send_exclude_list(f_out); @@ -668,6 +690,8 @@ int client_run(int f_in, int f_out, pid_t pid, int argc, char *argv[]) filesfrom_fd = -1; } + if (write_batch) + start_write_batch(f_in); flist = recv_file_list(f_in); if (!flist || flist->count == 0) { rprintf(FINFO, "client: nothing to do: " @@ -731,7 +755,8 @@ static int start_client(int argc, char *argv[]) return rc; /* rsync:// always uses rsync server over direct socket connection */ - if (strncasecmp(URL_PREFIX, argv[0], strlen(URL_PREFIX)) == 0) { + if (strncasecmp(URL_PREFIX, argv[0], strlen(URL_PREFIX)) == 0 + && !read_batch) { char *host, *path; host = argv[0] + strlen(URL_PREFIX); @@ -844,9 +869,12 @@ static int start_client(int argc, char *argv[]) } argc--; } else { /* read_batch */ - am_sender = 1; local_server = 1; shell_path = argv[argc-1]; + if (find_colon(shell_path)) { + rprintf(FERROR, "remote destination is not allowed with --read-batch\n"); + exit_cleanup(RERR_SYNTAX); + } } if (shell_machine) { @@ -1042,8 +1070,18 @@ int main(int argc,char *argv[]) init_flist(); - if (write_batch && !am_server) { - write_batch_argvs_file(orig_argc, orig_argv); + if (write_batch || read_batch) { + if (write_batch) + write_batch_argvs_file(orig_argc, orig_argv); + + batch_fd = do_open(batch_prefix, + write_batch ? O_WRONLY | O_CREAT | O_TRUNC + : O_RDONLY, S_IRUSR | S_IWUSR); + if (batch_fd < 0) { + rsyserr(FERROR, errno, "Batch file %s open error", + batch_prefix); + exit_cleanup(RERR_FILEIO); + } } if (am_daemon && !am_server) diff --git a/match.c b/match.c index 32bab93f..d731aae7 100644 --- a/match.c +++ b/match.c @@ -285,7 +285,6 @@ static void hash_search(int f,struct sum_struct *s, void match_sums(int f, struct sum_struct *s, struct map_struct *buf, OFF_T len) { char file_sum[MD4_SUM_LENGTH]; - extern int write_batch; last_match = 0; false_alarms = 0; @@ -323,8 +322,6 @@ void match_sums(int f, struct sum_struct *s, struct map_struct *buf, OFF_T len) if (verbose > 2) rprintf(FINFO,"sending file_sum\n"); write_buf(f,file_sum,MD4_SUM_LENGTH); - if (write_batch) - write_batch_delta_file(file_sum, MD4_SUM_LENGTH); if (targets) { free(targets); diff --git a/options.c b/options.c index 26b26ab5..d11e6419 100644 --- a/options.c +++ b/options.c @@ -133,7 +133,6 @@ int quiet = 0; int always_checksum = 0; int list_only = 0; -#define FIXED_CHECKSUM_SEED 32761 #define MAX_BATCH_PREFIX_LEN 256 /* Must be less than MAXPATHLEN-13 */ char *batch_prefix = NULL; @@ -571,13 +570,11 @@ int parse_arguments(int *argc, const char ***argv, int frommain) case OPT_WRITE_BATCH: /* popt stores the filename in batch_prefix for us */ write_batch = 1; - checksum_seed = FIXED_CHECKSUM_SEED; break; case OPT_READ_BATCH: /* popt stores the filename in batch_prefix for us */ read_batch = 1; - checksum_seed = FIXED_CHECKSUM_SEED; break; case OPT_TIMEOUT: @@ -643,6 +640,14 @@ int parse_arguments(int *argc, const char ***argv, int frommain) "write-batch and read-batch can not be used together\n"); exit_cleanup(RERR_SYNTAX); } + if ((write_batch || read_batch) && am_server) { + rprintf(FERROR, + "batch-mode is incompatible with server mode\n"); + /* We don't actually exit_cleanup(), so that we can still service + * older version clients that still send batch args to server. */ + read_batch = write_batch = 0; + batch_prefix = NULL; + } if (batch_prefix && strlen(batch_prefix) > MAX_BATCH_PREFIX_LEN) { rprintf(FERROR, "the batch-file prefix must be %d characters or less.\n", @@ -655,12 +660,6 @@ int parse_arguments(int *argc, const char ***argv, int frommain) exit_cleanup(RERR_SYNTAX); } - if (do_compression && (write_batch || read_batch)) { - rprintf(FERROR, - "compress can not be used with write-batch or read-batch\n"); - exit_cleanup(RERR_SYNTAX); - } - if (archive_mode) { if (!files_from) recurse = 1; @@ -884,13 +883,6 @@ void server_options(char **args,int *argc) args[ac++] = arg; } - if (batch_prefix) { - char *r_or_w = write_batch ? "write" : "read"; - if (asprintf(&arg, "--%s-batch=%s", r_or_w, batch_prefix) < 0) - goto oom; - args[ac++] = arg; - } - if (io_timeout) { if (asprintf(&arg, "--timeout=%d", io_timeout) < 0) goto oom; diff --git a/pipe.c b/pipe.c index 3a45dd11..90c7679f 100644 --- a/pipe.c +++ b/pipe.c @@ -26,6 +26,7 @@ extern int am_server; extern int blocking_io; extern int orig_umask; extern int read_batch; +extern int write_batch; extern int filesfrom_fd; /** @@ -94,7 +95,19 @@ pid_t piped_child(char **command, int *f_in, int *f_out) return pid; } -pid_t local_child(int argc, char **argv,int *f_in,int *f_out, +/* + * This function forks a child which calls child_main(). First, + * however, it has to establish communication paths to and from the + * newborn child. It creates two socket pairs -- one for writing to + * the child (from the parent) and one for reading from the child + * (writing to the parent). Since that's four socket ends, each + * process has to close the two ends it doesn't need. The remaining + * two socket ends are retained for reading and writing. In the + * child, the STDIN and STDOUT file descriptors refer to these + * sockets. In the parent, the function arguments f_in and f_out are + * set to refer to these sockets. + */ +pid_t local_child(int argc, char **argv, int *f_in, int *f_out, int (*child_main)(int, char*[])) { pid_t pid; @@ -107,16 +120,22 @@ pid_t local_child(int argc, char **argv,int *f_in,int *f_out, exit_cleanup(RERR_IPC); } - pid = do_fork(); + /* For read-batch, don't even fork. */ + pid = read_batch ? getpid() : do_fork(); + if (pid == -1) { rsyserr(FERROR, errno, "fork"); exit_cleanup(RERR_IPC); } if (pid == 0) { - am_sender = read_batch ? 0 : !am_sender; + am_sender = !am_sender; am_server = 1; + /* The server side never writes the batch, even if it + * is local (it makes the logic easier elsewhere). */ + write_batch = 0; + if (!am_sender) filesfrom_fd = -1; diff --git a/rsync.yo b/rsync.yo index 4805c010..0f07c46b 100644 --- a/rsync.yo +++ b/rsync.yo @@ -32,7 +32,7 @@ report that accompanies this package. Some of the additional features of rsync are: itemize( - it() support for copying links, devices, owners, groups and permissions + it() support for copying links, devices, owners, groups, and permissions it() exclude and exclude-from options similar to GNU tar it() a CVS exclude mode for ignoring the same files that CVS would ignore it() can use any transparent remote shell, including ssh or rsh @@ -347,8 +347,8 @@ verb( --log-format=FORMAT log file transfers using specified format --password-file=FILE get password from FILE --bwlimit=KBPS limit I/O bandwidth, KBytes per second - --write-batch=PREFIX write batch fileset starting with PREFIX - --read-batch=PREFIX read batch fileset starting with PREFIX + --write-batch=FILE write a batch to FILE + --read-batch=FILE read a batch from FILE --checksum-seed=NUM set block/file checksum seed -4 --ipv4 prefer IPv4 -6 --ipv6 prefer IPv6 @@ -897,13 +897,13 @@ transfer was too fast, it will wait before sending the next data block. The result is an average transfer rate equaling the specified limit. A value of zero specifies no limit. -dit(bf(--write-batch=PREFIX)) Generate a set of files that can be -transferred as a batch update. Each filename in the set starts with -PREFIX. See the "BATCH MODE" section for details. +dit(bf(--write-batch=FILE)) Record a file that can later be applied to +anonther identical destination with --read-batch. See the "BATCH MODE" +section for details. -dit(bf(--read-batch=PREFIX)) Apply a previously generated change batch, -using the fileset whose filenames start with PREFIX. See the "BATCH -MODE" section for details. +dit(bf(--read-batch=FILE)) Apply all of the changes stored in FILE, a +file previously generated by --write-batch. See the "BATCH MODE" +section for details. dit(bf(-4, --ipv4) or bf(-6, --ipv6)) Tells rsync to prefer IPv4/IPv6 when creating sockets. This only affects sockets that rsync has direct @@ -917,16 +917,12 @@ try specifying --ipv6 or --ipv4 when starting the daemon). dit(bf(--checksum-seed=NUM)) Set the MD4 checksum seed to the integer NUM. This 4 byte checksum seed is included in each block and file MD4 checksum calculation. By default the checksum seed is generated -by the server and defaults to the current time(), or 32761 if -bf(--write-batch) or bf(--read-batch) are specified. This option +by the server and defaults to the current time(). This option is used to set a specific checksum seed, which is useful for applications that want repeatable block and file checksums, or in the case where the user wants a more random checksum seed. Note that setting NUM to 0 causes rsync to use the default of time() -for checksum seed. Note also that bf(--write-batch) and bf(--read-batch) -set the checksum seed to 32761, so bf(--checksum-seed=NUM) needs to -follow these options if you want to specify a different checksum -seed in batch mode. +for checksum seed. enddit() @@ -1107,53 +1103,45 @@ source tree and those changes need to be propagated to the other hosts. In order to do this using batch mode, rsync is run with the write-batch option to apply the changes made to the source tree to one of the destination trees. The write-batch option causes the rsync -client to store the information needed to repeat this operation against -other destination trees in a batch update fileset (see below). The -filename of each file in the fileset starts with a prefix specified by -the user as an argument to the write-batch option. This fileset is -then copied to each remote host, where rsync is run with the read-batch -option, again specifying the same prefix, and the destination tree. -Rsync updates the destination tree using the information stored in the -batch update fileset. - -The fileset consists of 4 files: - -itemize( -it() bf(.rsync_argvs) command-line arguments -it() bf(.rsync_flist) rsync internal file metadata -it() bf(.rsync_csums) rsync checksums -it() bf(.rsync_delta) data blocks for file update & change -) - -The .rsync_argvs file contains a command-line suitable for updating a -destination tree using that batch update fileset. It can be executed -using a Bourne(-like) shell, optionally passing in an alternate -destination tree pathname which is then used instead of the original -path. This is useful when the destination tree path differs from the -original destination tree path. - -Generating the batch update fileset once saves having to perform the -file status, checksum and data block generation more than once when +client to store in a "batch file" all the information needed to repeat +this operation against other, identical destination trees. + +To apply the recorded changes to another destination tree, run rsync +with the read-batch option, specifying the name of the same batch +file, and the destination tree. Rsync updates the destination tree +using the information stored in the batch file. + +For convenience, one additional file is creating when the write-batch +option is used. This file's name is created by appending +".rsync_argvs" to the batch filename. The .rsync_argvs file contains +a command-line suitable for updating a destination tree using that +batch file. It can be executed using a Bourne(-like) shell, optionally +passing in an alternate destination tree pathname which is then used +instead of the original path. This is useful when the destination tree +path differs from the original destination tree path. + +Generating the batch file once saves having to perform the file +status, checksum, and data block generation more than once when updating multiple destination trees. Multicast transport protocols can -be used to transfer the batch update files in parallel to many hosts at -once, instead of sending the same data to every host individually. +be used to transfer the batch update files in parallel to many hosts +at once, instead of sending the same data to every host individually. Example: verb( - $ rsync --write-batch=pfx -a /source/dir/ /adest/dir/ - $ rcp pfx.rsync_* remote: - $ ssh remote rsync --read-batch=pfx -a /bdest/dir/ + $ rsync --write-batch=batch -a /source/dir/ /adest/dir/ + $ rcp batch* remote: + $ ssh remote rsync --read-batch=batch -a /bdest/dir/ # or alternatively - $ ssh remote ./pfx.rsync_argvs /bdest/dir/ + $ ssh remote ./batch.rsync_argvs /bdest/dir/ ) In this example, rsync is used to update /adest/dir/ with /source/dir/ -and the information to repeat this operation is stored in the files -pfx.rsync_*. These files are then copied to the machine named "remote". -Rsync is then invoked on "remote" to update /bdest/dir/ the same way as -/adest/dir/. The last line shows the rsync_argvs file being used to -invoke rsync. +and the information to repeat this operation is stored in "batch" and +"batch.rsync_argvs". These files are then copied to the machine named +"remote". Rsync is then invoked on "remote" to update /bdest/dir/ the +same way as /adest/dir/. The last line shows the rsync_argvs file +being used to invoke rsync. Caveats: @@ -1165,16 +1153,23 @@ destination tree in a partially updated state. In that case, rsync can be used in its regular (non-batch) mode of operation to fix up the destination tree. -The rsync version used on all destinations should be identical to the -one used on the original destination. - -The -z/--compress option does not work in batch mode and yields a usage -error. A separate compression tool can be used instead to reduce the -size of the batch update files for transport to the destination. +The rsync version used on all destinations must be at least as new as the +one used to generate the batch file. The -n/--dryrun option does not work in batch mode and yields a runtime error. +You should use an equivalent set of options when reading a batch file that +you used when generating it with a few exceptions. For instance +--write-batch changes to --read-batch, --files-from is dropped, and the +--include/--exclude options are not needed unless --delete is specified +without --delete-excluded. Other options that affect how the update +happens should generally remain the same as it is possible to confuse rsync +into expecting a different data stream than the one that is contained in +the batch file. For example, it would not work to change the setting of +the -H or -c option, but it would work to add or remove the --delete +option. + See bf(http://www.ils.unc.edu/i2dsi/unc_rsync+.html) for papers and technical reports. diff --git a/sender.c b/sender.c index 3c14ddf5..9fe5f623 100644 --- a/sender.c +++ b/sender.c @@ -118,13 +118,7 @@ void send_files(struct file_list *flist, int f_out, int f_in) int phase = 0; extern struct stats stats; struct stats initial_stats; - extern int write_batch; - extern int read_batch; - int checksums_match; - int buff_len; - char buff[CHUNK_SIZE]; int j; - int done; if (verbose > 2) rprintf(FINFO, "send_files starting\n"); @@ -186,54 +180,45 @@ void send_files(struct file_list *flist, int f_out, int f_in) return; } - if (write_batch) - write_batch_csum_info(&i, s); - - if (!read_batch) { - fd = do_open(fname, O_RDONLY, 0); - if (fd == -1) { - if (errno == ENOENT) { - enum logcode c = am_daemon - && protocol_version < 28 ? FERROR - : FINFO; - io_error |= IOERR_VANISHED; - rprintf(c, "file has vanished: %s\n", - full_fname(fname)); - } else { - io_error |= IOERR_GENERAL; - rsyserr(FERROR, errno, - "send_files failed to open %s", - full_fname(fname)); - } - free_sums(s); - continue; - } - - /* map the local file */ - if (do_fstat(fd, &st) != 0) { + fd = do_open(fname, O_RDONLY, 0); + if (fd == -1) { + if (errno == ENOENT) { + enum logcode c = am_daemon + && protocol_version < 28 ? FERROR + : FINFO; + io_error |= IOERR_VANISHED; + rprintf(c, "file has vanished: %s\n", + full_fname(fname)); + } else { io_error |= IOERR_GENERAL; - rsyserr(FERROR, errno, "fstat failed"); - free_sums(s); - close(fd); - return; - } - - mbuf = st.st_size ? map_file(fd, st.st_size) : NULL; - - if (verbose > 2) { - rprintf(FINFO, "send_files mapped %s of size %.0f\n", - fname, (double)st.st_size); + rsyserr(FERROR, errno, + "send_files failed to open %s", + full_fname(fname)); } + free_sums(s); + continue; + } - write_int(f_out, i); + /* map the local file */ + if (do_fstat(fd, &st) != 0) { + io_error |= IOERR_GENERAL; + rsyserr(FERROR, errno, "fstat failed"); + free_sums(s); + close(fd); + return; + } - if (write_batch) - write_batch_delta_file((char *)&i, sizeof i); + mbuf = st.st_size ? map_file(fd, st.st_size) : NULL; - write_sum_head(f_out, s); + if (verbose > 2) { + rprintf(FINFO, "send_files mapped %s of size %.0f\n", + fname, (double)st.st_size); } - if (verbose > 2 && !read_batch) + write_int(f_out, i); + write_sum_head(f_out, s); + + if (verbose > 2) rprintf(FINFO, "calling match_sums %s\n", fname); if (!am_server && verbose) { @@ -242,59 +227,19 @@ void send_files(struct file_list *flist, int f_out, int f_in) set_compression(fname); - if (read_batch) { - /* read checksums originally computed on sender side */ - read_batch_csum_info(i, s, &checksums_match); - if (checksums_match) { - read_batch_delta_file((char*)&j, sizeof (int)); - if (j != i) { /* if flist index entries don't match*/ - rprintf(FINFO, "index mismatch in send_files\n"); - rprintf(FINFO, "read index = %d flist ndx = %d\n", j, i); - close_batch_delta_file(); - close_batch_csums_file(); - exit_cleanup(1); - } else { - write_int(f_out, j); - write_sum_head(f_out, s); - done = 0; - while (!done) { - read_batch_delta_file((char*)&buff_len, sizeof (int)); - write_int(f_out, buff_len); - if (buff_len == 0) { - done = 1; - } else { - if (buff_len > 0) { - read_batch_delta_file(buff, buff_len); - write_buf(f_out, buff, buff_len); - } - } - } - read_batch_delta_file(buff, MD4_SUM_LENGTH); - write_buf(f_out, buff, MD4_SUM_LENGTH); - - } - } else { /* not checksum match */ - rprintf (FINFO, "readbatch & checksums don't match\n"); - rprintf (FINFO, "filename=%s is being skipped\n", fname); - continue; - } - } else { /* not read_batch */ - match_sums(f_out, s, mbuf, st.st_size); - log_send(file, &initial_stats); - } + match_sums(f_out, s, mbuf, st.st_size); + log_send(file, &initial_stats); - if (!read_batch) { - if (mbuf) { - j = unmap_file(mbuf); - if (j) { - io_error |= IOERR_GENERAL; - rsyserr(FERROR, j, - "read errors mapping %s", - full_fname(fname)); - } + if (mbuf) { + j = unmap_file(mbuf); + if (j) { + io_error |= IOERR_GENERAL; + rsyserr(FERROR, j, + "read errors mapping %s", + full_fname(fname)); } - close(fd); } + close(fd); free_sums(s); @@ -308,14 +253,4 @@ void send_files(struct file_list *flist, int f_out, int f_in) match_report(); write_int(f_out, -1); - if (write_batch || read_batch) { - close_batch_csums_file(); - close_batch_delta_file(); - } - } - - - - - diff --git a/token.c b/token.c index 707512ef..c7e898ce 100644 --- a/token.c +++ b/token.c @@ -22,7 +22,6 @@ extern int do_compression; extern int module_id; -extern int write_batch; static int compression_level = Z_DEFAULT_COMPRESSION; @@ -97,28 +96,18 @@ static int simple_recv_token(int f,char **data) static void simple_send_token(int f,int token, struct map_struct *buf,OFF_T offset,int n) { - int hold_int; - if (n > 0) { int l = 0; while (l < n) { int n1 = MIN(CHUNK_SIZE,n-l); write_int(f,n1); write_buf(f,map_ptr(buf,offset+l,n1),n1); - if (write_batch) { - write_batch_delta_file( (char *) &n1, sizeof(int) ); - write_batch_delta_file(map_ptr(buf,offset+l,n1),n1); - } l += n1; } } /* a -2 token means to send data only and no token */ if (token != -2) { write_int(f,-(token+1)); - if (write_batch) { - hold_int = -(token+1); - write_batch_delta_file( (char *) &hold_int, sizeof(int) ); - } } } @@ -165,7 +154,6 @@ send_deflated_token(int f, int token, { int n, r; static int init_done, flush_pending; - char temp_byte; if (last_token == -1) { /* initialization */ @@ -198,28 +186,13 @@ send_deflated_token(int f, int token, n = last_token - run_start; if (r >= 0 && r <= 63) { write_byte(f, (n==0? TOKEN_REL: TOKENRUN_REL) + r); - if (write_batch) { - temp_byte = (char)( (n==0? TOKEN_REL: TOKENRUN_REL) + r); - write_batch_delta_file(&temp_byte,sizeof(char)); - } } else { write_byte(f, (n==0? TOKEN_LONG: TOKENRUN_LONG)); write_int(f, run_start); - if (write_batch) { - temp_byte = (char)(n==0? TOKEN_LONG: TOKENRUN_LONG); - write_batch_delta_file(&temp_byte,sizeof(char)); - write_batch_delta_file((char *)&run_start,sizeof(run_start)); - } } if (n != 0) { write_byte(f, n); write_byte(f, n >> 8); - if (write_batch) { - temp_byte = (char)n; - write_batch_delta_file(&temp_byte,sizeof(char)); - temp_byte = (char)(n >> 8); - write_batch_delta_file(&temp_byte,sizeof(char)); - } } last_run_end = last_token; run_start = token; @@ -278,8 +251,6 @@ send_deflated_token(int f, int token, obuf[0] = DEFLATED_DATA + (n >> 8); obuf[1] = n; write_buf(f, obuf, n+2); - if (write_batch) - write_batch_delta_file(obuf,n+2); } } } while (nb != 0 || tx_strm.avail_out == 0); @@ -289,11 +260,6 @@ send_deflated_token(int f, int token, if (token == -1) { /* end of file - clean up */ write_byte(f, END_FLAG); - if (write_batch) { - temp_byte = END_FLAG; - write_batch_delta_file(&temp_byte,sizeof(char)); - } - } else if (token != -2) { /* add the data in the current block to the compressor's history and hash table */ -- 2.34.1