More --timeout improvements, especially for the receiving side:
authorWayne Davison <wayned@samba.org>
Sat, 2 Jan 2010 18:51:09 +0000 (10:51 -0800)
committerWayne Davison <wayned@samba.org>
Sat, 2 Jan 2010 18:58:39 +0000 (10:58 -0800)
- The receiver now sends keep-alive messages to the generator
  when it is actively doing work and hasn't sent anything
  recently.  This ensures that the generator won't timeout
  if the receiver is working hard.
- The perform_io() code has improved keep-alive participation.
- Allow the sender to send some keep-alive messages, which
  ensures that if it is in a lull, it can probe the socket.

generator.c
io.c
main.c
receiver.c
rsync.c
rsync.h
sender.c

index 33b7ce2..e7c1ef7 100644 (file)
@@ -289,7 +289,7 @@ static void delete_in_dir(char *fbuf, struct file_struct *file, dev_t *fs_dev)
                rprintf(FINFO, "delete_in_dir(%s)\n", fbuf);
 
        if (allowed_lull)
-               maybe_send_keepalive(time(NULL), True);
+               maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH);
 
        if (io_error && !ignore_errors) {
                if (already_warned)
@@ -1928,7 +1928,7 @@ static void touch_up_dirs(struct file_list *flist, int ndx)
                }
                if (counter >= loopchk_limit) {
                        if (allowed_lull)
-                               maybe_send_keepalive(time(NULL), True);
+                               maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH);
                        else
                                maybe_flush_socket(0);
                        counter = 0;
@@ -2128,7 +2128,7 @@ void generate_files(int f_out, const char *local_name)
 
                        if (i + cur_flist->ndx_start >= next_loopchk) {
                                if (allowed_lull)
-                                       maybe_send_keepalive(time(NULL), True);
+                                       maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH);
                                else
                                        maybe_flush_socket(0);
                                next_loopchk += loopchk_limit;
diff --git a/io.c b/io.c
index 077e503..3388269 100644 (file)
--- a/io.c
+++ b/io.c
@@ -69,7 +69,6 @@ int batch_fd = -1;
 int msgdone_cnt = 0;
 int forward_flist_data = 0;
 BOOL flist_receiving_enabled = False;
-BOOL we_send_keepalive_messages = False;
 
 /* Ignore an EOF error if non-zero. See whine_about_eof(). */
 int kluge_around_eof = 0;
@@ -157,23 +156,33 @@ static void check_timeout(BOOL allow_keepalive)
 {
        time_t t, chk;
 
-       /* On the receiving side, the generator is now handling timeouts, so
-        * the receiver ignores them.  Note that the am_receiver flag is not
-        * set until the receiver forks from the generator, so timeouts will be
-        * based on receiving data on the receiving side until that event. */
-       if (!io_timeout || am_receiver)
+       /* On the receiving side, the generator is now the one that decides
+        * when a timeout has occurred.  When it is sifting through a lot of
+        * files looking for work, it will be sending keep-alive messages to
+        * the sender, and even though the receiver won't be sending/receiving
+        * anything (not even keep-alive messages), the successful writes to
+        * the sender will keep things going.  If the receiver is actively
+        * receiving data, it will ensure that the generator knows that it is
+        * not idle by sending the generator keep-alive messages (since the
+        * generator might be blocked trying to send checksums, it needs to
+        * know that the receiver is active).  Thus, as long as one or the
+        * other is successfully doing work, the generator will not timeout. */
+       if (!io_timeout)
                return;
 
        t = time(NULL);
 
-       if (allow_keepalive && we_send_keepalive_messages) {
+       if (allow_keepalive) {
                /* This may put data into iobuf.msg w/o flushing. */
-               maybe_send_keepalive(t, False);
+               maybe_send_keepalive(t, 0);
        }
 
        if (!last_io_in)
                last_io_in = t;
 
+       if (am_receiver)
+               return;
+
        chk = MAX(last_io_out, last_io_in);
        if (t - chk >= io_timeout) {
                if (am_server)
@@ -261,8 +270,8 @@ static size_t safe_read(int fd, char *buf, size_t len)
                                        who_am_i());
                                exit_cleanup(RERR_FILEIO);
                        }
-                       if (we_send_keepalive_messages)
-                               maybe_send_keepalive(time(NULL), True);
+                       if (io_timeout)
+                               maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH);
                        continue;
                }
 
@@ -346,8 +355,8 @@ static void safe_write(int fd, const char *buf, size_t len)
                                        what_fd_is(fd), who_am_i());
                                exit_cleanup(RERR_FILEIO);
                        }
-                       if (we_send_keepalive_messages)
-                               maybe_send_keepalive(time(NULL), True);
+                       if (io_timeout)
+                               maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH);
                        continue;
                }
 
@@ -781,8 +790,11 @@ static char *perform_io(size_t needed, int flags)
                        if (msgs2stderr && DEBUG_GTE(IO, 2))
                                rprintf(FINFO, "[%s] recv=%ld\n", who_am_i(), (long)n);
 
-                       if (io_timeout)
+                       if (io_timeout) {
                                last_io_in = time(NULL);
+                               if (flags & PIO_NEED_INPUT)
+                                       maybe_send_keepalive(last_io_in, 0);
+                       }
                        stats.total_read += n;
 
                        iobuf.in.len += n;
@@ -1053,13 +1065,15 @@ void io_set_sock_fds(int f_in, int f_out)
 void set_io_timeout(int secs)
 {
        io_timeout = secs;
+       allowed_lull = (io_timeout + 1) / 2;
 
-       if (!io_timeout || io_timeout > SELECT_TIMEOUT)
+       if (!io_timeout || allowed_lull > SELECT_TIMEOUT)
                select_timeout = SELECT_TIMEOUT;
        else
-               select_timeout = io_timeout;
+               select_timeout = allowed_lull;
 
-       allowed_lull = read_batch ? 0 : (io_timeout + 1) / 2;
+       if (read_batch)
+               allowed_lull = 0;
 }
 
 static void check_for_d_option_error(const char *msg)
@@ -1333,12 +1347,20 @@ void maybe_flush_socket(int important)
  * rsync versions.  This avoids any message forwarding, and leaves the raw-data
  * stream alone (since we can never be quite sure if that stream is in the
  * right state for a keep-alive message). */
-void maybe_send_keepalive(time_t now, BOOL allow_flush)
+void maybe_send_keepalive(time_t now, int flags)
 {
+       if (flags & MSK_ACTIVE_RECEIVER)
+               last_io_in = now; /* Fudge things when we're working hard on the files. */
+
        if (now - last_io_out >= allowed_lull) {
+               /* The receiver is special:  it only sends keep-alive messages if it is
+                * actively receiving data.  Otherwise, it lets the generator timeout. */
+               if (am_receiver && now - last_io_in >= io_timeout)
+                       return;
+
                if (!iobuf.msg.len && iobuf.out.len == iobuf.out_empty_len)
                        send_msg(MSG_DATA, "", 0, 0);
-               if (!allow_flush) {
+               if (!(flags & MSK_ALLOW_FLUSH)) {
                        /* Let the caller worry about writing out the data. */
                } else if (iobuf.msg.len)
                        perform_io(iobuf.msg.size - iobuf.msg.len + 1, PIO_NEED_MSGROOM);
@@ -1430,7 +1452,7 @@ static void read_a_msg(void)
                        goto invalid_msg;
                iobuf.in_multiplexed = 1;
                if (am_sender)
-                       maybe_send_keepalive(time(NULL), True);
+                       maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH);
                break;
        case MSG_DELETED:
                if (msg_bytes >= sizeof data)
diff --git a/main.c b/main.c
index 5955001..4f3729e 100644 (file)
--- a/main.c
+++ b/main.c
@@ -73,7 +73,6 @@ extern int send_msgs_to_gen;
 extern pid_t cleanup_child_pid;
 extern size_t bwlimit_writemax;
 extern unsigned int module_dirlen;
-extern BOOL we_send_keepalive_messages;
 extern BOOL flist_receiving_enabled;
 extern BOOL shutting_down;
 extern struct stats stats;
@@ -765,8 +764,6 @@ static void do_server_sender(int f_in, int f_out, int argc, char *argv[])
        struct file_list *flist;
        char *dir = argv[0];
 
-       we_send_keepalive_messages = io_timeout != 0 && protocol_version < 31;
-
        if (DEBUG_GTE(SEND, 1)) {
                rprintf(FINFO, "server_sender starting pid=%ld\n",
                        (long)getpid());
@@ -907,7 +904,6 @@ static int do_recv(int f_in, int f_out, char *local_name)
        }
 
        am_generator = 1;
-       we_send_keepalive_messages = io_timeout != 0;
        flist_receiving_enabled = True;
 
        io_end_multiplex_in(MPLX_SWITCHING);
@@ -1112,7 +1108,6 @@ int client_run(int f_in, int f_out, pid_t pid, int argc, char *argv[])
 
        if (am_sender) {
                keep_dirlinks = 0; /* Must be disabled on the sender. */
-               we_send_keepalive_messages = io_timeout != 0 && protocol_version < 31;
 
                if (always_checksum
                 && (log_format_has(stdout_format, 'C')
index e8fa3d1..6688dda 100644 (file)
@@ -48,6 +48,7 @@ extern int keep_partial;
 extern int checksum_len;
 extern int checksum_seed;
 extern int inplace;
+extern int allowed_lull;
 extern int delay_updates;
 extern mode_t orig_umask;
 extern struct stats stats;
@@ -254,6 +255,9 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r,
                if (INFO_GTE(PROGRESS, 1))
                        show_progress(offset, total_size);
 
+               if (allowed_lull)
+                       maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH | MSK_ACTIVE_RECEIVER);
+
                if (i > 0) {
                        if (DEBUG_GTE(DELTASUM, 3)) {
                                rprintf(FINFO,"data recv %d at %s\n",
diff --git a/rsync.c b/rsync.c
index 938969a..cefbe5f 100644 (file)
--- a/rsync.c
+++ b/rsync.c
@@ -368,7 +368,7 @@ int read_ndx_and_attrs(int f_in, int f_out, int *iflag_ptr, uchar *type_ptr,
        /* Support the protocol-29 keep-alive style. */
        if (protocol_version < 30 && ndx == cur_flist->used && iflags == ITEM_IS_NEW) {
                if (am_sender)
-                       maybe_send_keepalive(time(NULL), True);
+                       maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH);
                goto read_loop;
        }
 
diff --git a/rsync.h b/rsync.h
index 1cf7c6e..731f4fe 100644 (file)
--- a/rsync.h
+++ b/rsync.h
@@ -235,7 +235,7 @@ enum msgcode {
        MSG_IO_ERROR=22,/* the sending side had an I/O error */
        MSG_IO_TIMEOUT=33,/* tell client about a daemon's timeout value */
        MSG_NOOP=42,    /* a do-nothing message (legacy protocol-30 only) */
-       MSG_ERROR_EXIT=86, /* used by siblings and by protocol-31 */
+       MSG_ERROR_EXIT=86, /* synchronize an error exit (siblings and protocol >= 31) */
        MSG_SUCCESS=100,/* successfully updated indicated flist index */
        MSG_DELETED=101,/* successfully deleted a file on receiving side */
        MSG_NO_SEND=102,/* sender failed to open a file we wanted */
@@ -267,6 +267,10 @@ enum delret {
 #define MKP_DROP_NAME          (1<<0) /* drop trailing filename or trailing slash */
 #define MKP_SKIP_SLASH         (1<<1) /* skip one or more leading slashes */
 
+/* Defines for maybe_send_keepalive() */
+#define MSK_ALLOW_FLUSH        (1<<0)
+#define MSK_ACTIVE_RECEIVER    (1<<1)
+
 #include "errcode.h"
 
 #include "config.h"
index 60820dd..600ad84 100644 (file)
--- a/sender.c
+++ b/sender.c
@@ -43,7 +43,6 @@ extern int inplace;
 extern int batch_fd;
 extern int write_batch;
 extern int file_old_total;
-extern BOOL we_send_keepalive_messages;
 extern struct stats stats;
 extern struct file_list *cur_flist, *first_flist, *dir_flist;
 
@@ -64,7 +63,7 @@ static struct sum_struct *receive_sums(int f)
 {
        struct sum_struct *s;
        int32 i;
-       int lull_mod = allowed_lull * 5;
+       int lull_mod = protocol_version >= 31 ? 0 : allowed_lull * 5;
        OFF_T offset = 0;
 
        if (!(s = new(struct sum_struct)))
@@ -105,7 +104,7 @@ static struct sum_struct *receive_sums(int f)
                        s->sums[i].len = s->blength;
                offset += s->sums[i].len;
 
-               if (we_send_keepalive_messages && !(i % lull_mod))
+               if (lull_mod && !(i % lull_mod))
                        maybe_send_keepalive(time(NULL), True);
 
                if (DEBUG_GTE(DELTASUM, 3)) {