From 05c36015f79d0d2975f15b08e31ea72825700f11 Mon Sep 17 00:00:00 2001 From: Wayne Davison Date: Sat, 2 Jan 2010 10:51:09 -0800 Subject: [PATCH] More --timeout improvements, especially for the receiving side: - The receiver now sends keep-alive messages to the generator when it is actively doing work and hasn't sent anything recently. This ensures that the generator won't timeout if the receiver is working hard. - The perform_io() code has improved keep-alive participation. - Allow the sender to send some keep-alive messages, which ensures that if it is in a lull, it can probe the socket. --- generator.c | 6 +++--- io.c | 60 ++++++++++++++++++++++++++++++++++++----------------- main.c | 5 ----- receiver.c | 4 ++++ rsync.c | 2 +- rsync.h | 6 +++++- sender.c | 5 ++--- 7 files changed, 56 insertions(+), 32 deletions(-) diff --git a/generator.c b/generator.c index 33b7ce2a..e7c1ef7d 100644 --- a/generator.c +++ b/generator.c @@ -289,7 +289,7 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev) rprintf(FINFO, "delete_in_dir(%s)\n", fbuf); if (allowed_lull) - maybe_send_keepalive(time(NULL), True); + maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH); if (io_error && !ignore_errors) { if (already_warned) @@ -1928,7 +1928,7 @@ static void touch_up_dirs(struct file_list *flist, int ndx) } if (counter >= loopchk_limit) { if (allowed_lull) - maybe_send_keepalive(time(NULL), True); + maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH); else maybe_flush_socket(0); counter = 0; @@ -2128,7 +2128,7 @@ void generate_files(int f_out, const char *local_name) if (i + cur_flist->ndx_start >= next_loopchk) { if (allowed_lull) - maybe_send_keepalive(time(NULL), True); + maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH); else maybe_flush_socket(0); next_loopchk += loopchk_limit; diff --git a/io.c b/io.c index 077e5033..3388269c 100644 --- a/io.c +++ b/io.c @@ -69,7 +69,6 @@ int batch_fd = -1; int msgdone_cnt = 0; int forward_flist_data = 0; BOOL flist_receiving_enabled = False; -BOOL we_send_keepalive_messages = False; /* Ignore an EOF error if non-zero. See whine_about_eof(). */ int kluge_around_eof = 0; @@ -157,23 +156,33 @@ static void check_timeout(BOOL allow_keepalive) { time_t t, chk; - /* On the receiving side, the generator is now handling timeouts, so - * the receiver ignores them. Note that the am_receiver flag is not - * set until the receiver forks from the generator, so timeouts will be - * based on receiving data on the receiving side until that event. */ - if (!io_timeout || am_receiver) + /* On the receiving side, the generator is now the one that decides + * when a timeout has occurred. When it is sifting through a lot of + * files looking for work, it will be sending keep-alive messages to + * the sender, and even though the receiver won't be sending/receiving + * anything (not even keep-alive messages), the successful writes to + * the sender will keep things going. If the receiver is actively + * receiving data, it will ensure that the generator knows that it is + * not idle by sending the generator keep-alive messages (since the + * generator might be blocked trying to send checksums, it needs to + * know that the receiver is active). Thus, as long as one or the + * other is successfully doing work, the generator will not timeout. */ + if (!io_timeout) return; t = time(NULL); - if (allow_keepalive && we_send_keepalive_messages) { + if (allow_keepalive) { /* This may put data into iobuf.msg w/o flushing. */ - maybe_send_keepalive(t, False); + maybe_send_keepalive(t, 0); } if (!last_io_in) last_io_in = t; + if (am_receiver) + return; + chk = MAX(last_io_out, last_io_in); if (t - chk >= io_timeout) { if (am_server) @@ -261,8 +270,8 @@ static size_t safe_read(int fd, char *buf, size_t len) who_am_i()); exit_cleanup(RERR_FILEIO); } - if (we_send_keepalive_messages) - maybe_send_keepalive(time(NULL), True); + if (io_timeout) + maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH); continue; } @@ -346,8 +355,8 @@ static void safe_write(int fd, const char *buf, size_t len) what_fd_is(fd), who_am_i()); exit_cleanup(RERR_FILEIO); } - if (we_send_keepalive_messages) - maybe_send_keepalive(time(NULL), True); + if (io_timeout) + maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH); continue; } @@ -781,8 +790,11 @@ static char *perform_io(size_t needed, int flags) if (msgs2stderr && DEBUG_GTE(IO, 2)) rprintf(FINFO, "[%s] recv=%ld\n", who_am_i(), (long)n); - if (io_timeout) + if (io_timeout) { last_io_in = time(NULL); + if (flags & PIO_NEED_INPUT) + maybe_send_keepalive(last_io_in, 0); + } stats.total_read += n; iobuf.in.len += n; @@ -1053,13 +1065,15 @@ void io_set_sock_fds(int f_in, int f_out) void set_io_timeout(int secs) { io_timeout = secs; + allowed_lull = (io_timeout + 1) / 2; - if (!io_timeout || io_timeout > SELECT_TIMEOUT) + if (!io_timeout || allowed_lull > SELECT_TIMEOUT) select_timeout = SELECT_TIMEOUT; else - select_timeout = io_timeout; + select_timeout = allowed_lull; - allowed_lull = read_batch ? 0 : (io_timeout + 1) / 2; + if (read_batch) + allowed_lull = 0; } static void check_for_d_option_error(const char *msg) @@ -1333,12 +1347,20 @@ void maybe_flush_socket(int important) * rsync versions. This avoids any message forwarding, and leaves the raw-data * stream alone (since we can never be quite sure if that stream is in the * right state for a keep-alive message). */ -void maybe_send_keepalive(time_t now, BOOL allow_flush) +void maybe_send_keepalive(time_t now, int flags) { + if (flags & MSK_ACTIVE_RECEIVER) + last_io_in = now; /* Fudge things when we're working hard on the files. */ + if (now - last_io_out >= allowed_lull) { + /* The receiver is special: it only sends keep-alive messages if it is + * actively receiving data. Otherwise, it lets the generator timeout. */ + if (am_receiver && now - last_io_in >= io_timeout) + return; + if (!iobuf.msg.len && iobuf.out.len == iobuf.out_empty_len) send_msg(MSG_DATA, "", 0, 0); - if (!allow_flush) { + if (!(flags & MSK_ALLOW_FLUSH)) { /* Let the caller worry about writing out the data. */ } else if (iobuf.msg.len) perform_io(iobuf.msg.size - iobuf.msg.len + 1, PIO_NEED_MSGROOM); @@ -1430,7 +1452,7 @@ static void read_a_msg(void) goto invalid_msg; iobuf.in_multiplexed = 1; if (am_sender) - maybe_send_keepalive(time(NULL), True); + maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH); break; case MSG_DELETED: if (msg_bytes >= sizeof data) diff --git a/main.c b/main.c index 59550013..4f3729e8 100644 --- a/main.c +++ b/main.c @@ -73,7 +73,6 @@ extern int send_msgs_to_gen; extern pid_t cleanup_child_pid; extern size_t bwlimit_writemax; extern unsigned int module_dirlen; -extern BOOL we_send_keepalive_messages; extern BOOL flist_receiving_enabled; extern BOOL shutting_down; extern struct stats stats; @@ -765,8 +764,6 @@ static void do_server_sender(int f_in, int f_out, int argc, char *argv[]) struct file_list *flist; char *dir = argv[0]; - we_send_keepalive_messages = io_timeout != 0 && protocol_version < 31; - if (DEBUG_GTE(SEND, 1)) { rprintf(FINFO, "server_sender starting pid=%ld\n", (long)getpid()); @@ -907,7 +904,6 @@ static int do_recv(int f_in, int f_out, char *local_name) } am_generator = 1; - we_send_keepalive_messages = io_timeout != 0; flist_receiving_enabled = True; io_end_multiplex_in(MPLX_SWITCHING); @@ -1112,7 +1108,6 @@ int client_run(int f_in, int f_out, pid_t pid, int argc, char *argv[]) if (am_sender) { keep_dirlinks = 0; /* Must be disabled on the sender. */ - we_send_keepalive_messages = io_timeout != 0 && protocol_version < 31; if (always_checksum && (log_format_has(stdout_format, 'C') diff --git a/receiver.c b/receiver.c index e8fa3d16..6688dda7 100644 --- a/receiver.c +++ b/receiver.c @@ -48,6 +48,7 @@ extern int keep_partial; extern int checksum_len; extern int checksum_seed; extern int inplace; +extern int allowed_lull; extern int delay_updates; extern mode_t orig_umask; extern struct stats stats; @@ -254,6 +255,9 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r, if (INFO_GTE(PROGRESS, 1)) show_progress(offset, total_size); + if (allowed_lull) + maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH | MSK_ACTIVE_RECEIVER); + if (i > 0) { if (DEBUG_GTE(DELTASUM, 3)) { rprintf(FINFO,"data recv %d at %s\n", diff --git a/rsync.c b/rsync.c index 938969a8..cefbe5f4 100644 --- a/rsync.c +++ b/rsync.c @@ -368,7 +368,7 @@ int read_ndx_and_attrs(int f_in, int f_out, int *iflag_ptr, uchar *type_ptr, /* Support the protocol-29 keep-alive style. */ if (protocol_version < 30 && ndx == cur_flist->used && iflags == ITEM_IS_NEW) { if (am_sender) - maybe_send_keepalive(time(NULL), True); + maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH); goto read_loop; } diff --git a/rsync.h b/rsync.h index 1cf7c6e0..731f4fea 100644 --- a/rsync.h +++ b/rsync.h @@ -235,7 +235,7 @@ enum msgcode { MSG_IO_ERROR=22,/* the sending side had an I/O error */ MSG_IO_TIMEOUT=33,/* tell client about a daemon's timeout value */ MSG_NOOP=42, /* a do-nothing message (legacy protocol-30 only) */ - MSG_ERROR_EXIT=86, /* used by siblings and by protocol-31 */ + MSG_ERROR_EXIT=86, /* synchronize an error exit (siblings and protocol >= 31) */ MSG_SUCCESS=100,/* successfully updated indicated flist index */ MSG_DELETED=101,/* successfully deleted a file on receiving side */ MSG_NO_SEND=102,/* sender failed to open a file we wanted */ @@ -267,6 +267,10 @@ enum delret { #define MKP_DROP_NAME (1<<0) /* drop trailing filename or trailing slash */ #define MKP_SKIP_SLASH (1<<1) /* skip one or more leading slashes */ +/* Defines for maybe_send_keepalive() */ +#define MSK_ALLOW_FLUSH (1<<0) +#define MSK_ACTIVE_RECEIVER (1<<1) + #include "errcode.h" #include "config.h" diff --git a/sender.c b/sender.c index 60820dd8..600ad847 100644 --- a/sender.c +++ b/sender.c @@ -43,7 +43,6 @@ extern int inplace; extern int batch_fd; extern int write_batch; extern int file_old_total; -extern BOOL we_send_keepalive_messages; extern struct stats stats; extern struct file_list *cur_flist, *first_flist, *dir_flist; @@ -64,7 +63,7 @@ static struct sum_struct *receive_sums(int f) { struct sum_struct *s; int32 i; - int lull_mod = allowed_lull * 5; + int lull_mod = protocol_version >= 31 ? 0 : allowed_lull * 5; OFF_T offset = 0; if (!(s = new(struct sum_struct))) @@ -105,7 +104,7 @@ static struct sum_struct *receive_sums(int f) s->sums[i].len = s->blength; offset += s->sums[i].len; - if (we_send_keepalive_messages && !(i % lull_mod)) + if (lull_mod && !(i % lull_mod)) maybe_send_keepalive(time(NULL), True); if (DEBUG_GTE(DELTASUM, 3)) { -- 2.34.1