Improve --timeout method to take into account all I/O that is going on.
authorWayne Davison <wayned@samba.org>
Sat, 19 Dec 2009 18:15:15 +0000 (10:15 -0800)
committerWayne Davison <wayned@samba.org>
Sat, 19 Dec 2009 19:00:36 +0000 (11:00 -0800)
The receiving side also switches timeout handling from the receiver to
the generator, which obviates the need for the sender to send any
keep-alive messages at all (for protocol 31 and beyond).  Given this
setup, all keep-alive messages are now sent as empty MSG_DATA messages,
with MSG_NOOP messages only being understood and (when necessary) acted
upon to forward a keep-alive event to an older receiver.  This is both
safer and more compatible with older versions.

generator.c
io.c
main.c
rsync.c
rsync.h
sender.c

index b5db075..33b7ce2 100644 (file)
@@ -71,7 +71,6 @@ extern int io_error;
 extern int flist_eof;
 extern int allowed_lull;
 extern int sock_f_out;
-extern int ignore_timeout;
 extern int protocol_version;
 extern int file_total;
 extern int fuzzy_basis;
@@ -290,7 +289,7 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
                rprintf(FINFO, "delete_in_dir(%s)\n", fbuf);
 
        if (allowed_lull)
-               maybe_send_keepalive();
+               maybe_send_keepalive(time(NULL), True);
 
        if (io_error && !ignore_errors) {
                if (already_warned)
@@ -1929,7 +1928,7 @@ static void touch_up_dirs(struct file_list *flist, int ndx)
                }
                if (counter >= loopchk_limit) {
                        if (allowed_lull)
-                               maybe_send_keepalive();
+                               maybe_send_keepalive(time(NULL), True);
                        else
                                maybe_flush_socket(0);
                        counter = 0;
@@ -2076,12 +2075,6 @@ void generate_files(int f_out, const char *local_name)
                        : "enabled");
        }
 
-       /* Since we often fill up the outgoing socket and then just sit around
-        * waiting for the other 2 processes to do their thing, we don't want
-        * to exit on a timeout.  If the data stops flowing, the receiver will
-        * notice that and let us know via the redo pipe (or its closing). */
-       ignore_timeout = 1;
-
        dflt_perms = (ACCESSPERMS & ~orig_umask);
 
        do {
@@ -2135,7 +2128,7 @@ void generate_files(int f_out, const char *local_name)
 
                        if (i + cur_flist->ndx_start >= next_loopchk) {
                                if (allowed_lull)
-                                       maybe_send_keepalive();
+                                       maybe_send_keepalive(time(NULL), True);
                                else
                                        maybe_flush_socket(0);
                                next_loopchk += loopchk_limit;
diff --git a/io.c b/io.c
index 1ae25ce..6c2b220 100644 (file)
--- a/io.c
+++ b/io.c
@@ -39,6 +39,7 @@ extern size_t bwlimit_writemax;
 extern int io_timeout;
 extern int am_server;
 extern int am_sender;
+extern int am_receiver;
 extern int am_generator;
 extern int msgs2stderr;
 extern int inc_recurse;
@@ -64,11 +65,11 @@ extern iconv_t ic_send, ic_recv;
 
 int csum_length = SHORT_SUM_LENGTH; /* initial value */
 int allowed_lull = 0;
-int ignore_timeout = 0;
 int batch_fd = -1;
 int msgdone_cnt = 0;
 int forward_flist_data = 0;
 BOOL flist_receiving_enabled = False;
+BOOL we_send_keepalive_messages = False;
 
 /* Ignore an EOF error if non-zero. See whine_about_eof(). */
 int kluge_around_eof = 0;
@@ -152,25 +153,35 @@ static void read_a_msg(void);
 static void drain_multiplex_messages(void);
 static void sleep_for_bwlimit(int bytes_written);
 
-static void check_timeout(void)
+static void check_timeout(BOOL allow_keepalive)
 {
-       time_t t;
+       time_t t, chk;
 
-       if (!io_timeout || ignore_timeout)
+       /* On the receiving side, the generator is now handling timeouts, so
+        * the receiver ignores them.  Note that the am_receiver flag is not
+        * set until the receiver forks from the generator, so timeouts will be
+        * based on receiving data on the receiving side until that event. */
+       if (!io_timeout || am_receiver)
                return;
 
-       if (!last_io_in) {
-               last_io_in = time(NULL);
-               return;
+       t = time(NULL);
+
+       if (allow_keepalive && we_send_keepalive_messages) {
+               /* This may put data into iobuf.msg w/o flushing. */
+               maybe_send_keepalive(t, False);
        }
 
-       t = time(NULL);
+       if (!last_io_in)
+               last_io_in = t;
+       if (!last_io_out)
+               last_io_out = t;
 
-       if (t - last_io_in >= io_timeout) {
+       chk = MAX(last_io_out, last_io_in);
+       if (t - chk >= io_timeout) {
                if (am_server)
                        msgs2stderr = 1;
                rprintf(FERROR, "[%s] io timeout after %d seconds -- exiting\n",
-                       who_am_i(), (int)(t-last_io_in));
+                       who_am_i(), (int)(t-chk));
                exit_cleanup(RERR_TIMEOUT);
        }
 }
@@ -252,7 +263,8 @@ static size_t safe_read(int fd, char *buf, size_t len)
                                        who_am_i());
                                exit_cleanup(RERR_FILEIO);
                        }
-                       check_timeout();
+                       if (we_send_keepalive_messages)
+                               maybe_send_keepalive(time(NULL), True);
                        continue;
                }
 
@@ -336,7 +348,8 @@ static void safe_write(int fd, const char *buf, size_t len)
                                        what_fd_is(fd), who_am_i());
                                exit_cleanup(RERR_FILEIO);
                        }
-                       check_timeout();
+                       if (we_send_keepalive_messages)
+                               maybe_send_keepalive(time(NULL), True);
                        continue;
                }
 
@@ -733,7 +746,7 @@ static char *perform_io(size_t needed, int flags)
                                send_extra_file_list(sock_f_out, -1);
                                extra_flist_sending_enabled = !flist_eof;
                        } else
-                               check_timeout();
+                               check_timeout((flags & PIO_NEED_INPUT) != 0);
                        FD_ZERO(&r_fds); /* Just in case... */
                        FD_ZERO(&w_fds);
                }
@@ -1315,18 +1328,21 @@ void maybe_flush_socket(int important)
                io_flush(NORMAL_FLUSH);
 }
 
-/* This never adds new non-msg-buffer data, since we don't know the state
- * of the raw-data buffer. */
-void maybe_send_keepalive(void)
-{
-       if (time(NULL) - last_io_out >= allowed_lull) {
-               if (!iobuf.msg.len && iobuf.out.len == iobuf.out_empty_len) {
-                       if (protocol_version >= 30)
-                               send_msg(MSG_NOOP, "", 0, 0);
-                       else
-                               send_msg(MSG_DATA, "", 0, 0);
-               }
-               if (iobuf.msg.len)
+/* Older rsync versions used to send either a MSG_NOOP (protocol 30) or a
+ * raw-data-based keep-alive (protocol 29), both of which implied forwarding of
+ * the message through the sender.  Since the new timeout method does not need
+ * any forwarding, we just send an empty MSG_DATA message, which works with all
+ * rsync versions.  This avoids any message forwarding, and leaves the raw-data
+ * stream alone (since we can never be quite sure if that stream is in the
+ * right state for a keep-alive message). */
+void maybe_send_keepalive(time_t now, BOOL allow_flush)
+{
+       if (now - last_io_out >= allowed_lull) {
+               if (!iobuf.msg.len && iobuf.out.len == iobuf.out_empty_len)
+                       send_msg(MSG_DATA, "", 0, 0);
+               if (!allow_flush) {
+                       /* Let the caller worry about writing out the data. */
+               } else if (iobuf.msg.len)
                        perform_io(iobuf.msg.size - iobuf.msg.len + 1, PIO_NEED_MSGROOM);
                else if (iobuf.out.len > iobuf.out_empty_len)
                        io_flush(NORMAL_FLUSH);
@@ -1411,11 +1427,12 @@ static void read_a_msg(void)
                }
                break;
        case MSG_NOOP:
+               /* Support protocol-30 keep-alive method. */
                if (msg_bytes != 0)
                        goto invalid_msg;
                iobuf.in_multiplexed = 1;
                if (am_sender)
-                       maybe_send_keepalive();
+                       maybe_send_keepalive(time(NULL), True);
                break;
        case MSG_DELETED:
                if (msg_bytes >= sizeof data)
diff --git a/main.c b/main.c
index 4f3729e..5955001 100644 (file)
--- a/main.c
+++ b/main.c
@@ -73,6 +73,7 @@ extern int send_msgs_to_gen;
 extern pid_t cleanup_child_pid;
 extern size_t bwlimit_writemax;
 extern unsigned int module_dirlen;
+extern BOOL we_send_keepalive_messages;
 extern BOOL flist_receiving_enabled;
 extern BOOL shutting_down;
 extern struct stats stats;
@@ -764,6 +765,8 @@ static void do_server_sender(int f_in, int f_out, int argc, char *argv[])
        struct file_list *flist;
        char *dir = argv[0];
 
+       we_send_keepalive_messages = io_timeout != 0 && protocol_version < 31;
+
        if (DEBUG_GTE(SEND, 1)) {
                rprintf(FINFO, "server_sender starting pid=%ld\n",
                        (long)getpid());
@@ -904,6 +907,7 @@ static int do_recv(int f_in, int f_out, char *local_name)
        }
 
        am_generator = 1;
+       we_send_keepalive_messages = io_timeout != 0;
        flist_receiving_enabled = True;
 
        io_end_multiplex_in(MPLX_SWITCHING);
@@ -1108,6 +1112,7 @@ int client_run(int f_in, int f_out, pid_t pid, int argc, char *argv[])
 
        if (am_sender) {
                keep_dirlinks = 0; /* Must be disabled on the sender. */
+               we_send_keepalive_messages = io_timeout != 0 && protocol_version < 31;
 
                if (always_checksum
                 && (log_format_has(stdout_format, 'C')
diff --git a/rsync.c b/rsync.c
index 31f9696..938969a 100644 (file)
--- a/rsync.c
+++ b/rsync.c
@@ -365,11 +365,10 @@ int read_ndx_and_attrs(int f_in, int f_out, int *iflag_ptr, uchar *type_ptr,
        iflags = protocol_version >= 29 ? read_shortint(f_in)
                   : ITEM_TRANSFER | ITEM_MISSING_DATA;
 
-       /* Honor the old-style keep-alive indicator. */
-       if (protocol_version < 30
-        && ndx == cur_flist->used && iflags == ITEM_IS_NEW) {
+       /* Support the protocol-29 keep-alive style. */
+       if (protocol_version < 30 && ndx == cur_flist->used && iflags == ITEM_IS_NEW) {
                if (am_sender)
-                       maybe_send_keepalive();
+                       maybe_send_keepalive(time(NULL), True);
                goto read_loop;
        }
 
diff --git a/rsync.h b/rsync.h
index 68f236d..1cf7c6e 100644 (file)
--- a/rsync.h
+++ b/rsync.h
@@ -234,7 +234,7 @@ enum msgcode {
        MSG_STATS=10,   /* message has stats data for generator */
        MSG_IO_ERROR=22,/* the sending side had an I/O error */
        MSG_IO_TIMEOUT=33,/* tell client about a daemon's timeout value */
-       MSG_NOOP=42,    /* a do-nothing message */
+       MSG_NOOP=42,    /* a do-nothing message (legacy protocol-30 only) */
        MSG_ERROR_EXIT=86, /* used by siblings and by protocol-31 */
        MSG_SUCCESS=100,/* successfully updated indicated flist index */
        MSG_DELETED=101,/* successfully deleted a file on receiving side */
index bf6e7b4..60820dd 100644 (file)
--- a/sender.c
+++ b/sender.c
@@ -43,6 +43,7 @@ extern int inplace;
 extern int batch_fd;
 extern int write_batch;
 extern int file_old_total;
+extern BOOL we_send_keepalive_messages;
 extern struct stats stats;
 extern struct file_list *cur_flist, *first_flist, *dir_flist;
 
@@ -104,8 +105,8 @@ static struct sum_struct *receive_sums(int f)
                        s->sums[i].len = s->blength;
                offset += s->sums[i].len;
 
-               if (allowed_lull && !(i % lull_mod))
-                       maybe_send_keepalive();
+               if (we_send_keepalive_messages && !(i % lull_mod))
+                       maybe_send_keepalive(time(NULL), True);
 
                if (DEBUG_GTE(DELTASUM, 3)) {
                        rprintf(FINFO,