Fix erroneous "--fake-user" in the rsyncd.conf(5) man page.
[rsync/rsync.git] / io.c
diff --git a/io.c b/io.c
index 08afdd9..bf39ff9 100644 (file)
--- a/io.c
+++ b/io.c
@@ -38,8 +38,8 @@ extern int bwlimit;
 extern size_t bwlimit_writemax;
 extern int io_timeout;
 extern int am_server;
-extern int am_daemon;
 extern int am_sender;
+extern int am_receiver;
 extern int am_generator;
 extern int msgs2stderr;
 extern int inc_recurse;
@@ -50,6 +50,7 @@ extern int file_total;
 extern int file_old_total;
 extern int list_only;
 extern int read_batch;
+extern int compat_flags;
 extern int protect_args;
 extern int checksum_seed;
 extern int protocol_version;
@@ -65,10 +66,10 @@ extern iconv_t ic_send, ic_recv;
 
 int csum_length = SHORT_SUM_LENGTH; /* initial value */
 int allowed_lull = 0;
-int ignore_timeout = 0;
 int batch_fd = -1;
 int msgdone_cnt = 0;
 int forward_flist_data = 0;
+BOOL flist_receiving_enabled = False;
 
 /* Ignore an EOF error if non-zero. See whine_about_eof(). */
 int kluge_around_eof = 0;
@@ -83,7 +84,7 @@ static struct {
        xbuf in, out, msg;
        int in_fd;
        int out_fd; /* Both "out" and "msg" go to this fd. */
-       BOOL in_multiplexed;
+       int in_multiplexed;
        unsigned out_empty_len;
        size_t raw_data_header_pos;      /* in the out xbuf */
        size_t raw_flushing_ends_before; /* in the out xbuf */
@@ -115,7 +116,19 @@ static char int_byte_extra[64] = {
        2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 6, /* (C0 - FF)/4 */
 };
 
-#define IN_MULTIPLEXED (iobuf.in_multiplexed)
+/* Our I/O buffers are sized with no bits on in the lowest byte of the "size"
+ * (indeed, our rounding of sizes in 1024-byte units assures more than this).
+ * This allows the code that is storing bytes near the physical end of a
+ * circular buffer to temporarily reduce the buffer's size (in order to make
+ * some storing idioms easier), while also making it simple to restore the
+ * buffer's actual size when the buffer's "pos" wraps around to the start (we
+ * just round the buffer's size up again). */
+
+#define IOBUF_WAS_REDUCED(siz) ((siz) & 0xFF)
+#define IOBUF_RESTORE_SIZE(siz) (((siz) | 0xFF) + 1)
+
+#define IN_MULTIPLEXED (iobuf.in_multiplexed != 0)
+#define IN_MULTIPLEXED_AND_READY (iobuf.in_multiplexed > 0)
 #define OUT_MULTIPLEXED (iobuf.out_empty_len != 0)
 
 #define PIO_NEED_INPUT (1<<0) /* The *_NEED_* flags are mutually exclusive. */
@@ -136,27 +149,47 @@ enum festatus { FES_SUCCESS, FES_REDO, FES_NO_SEND };
 
 static flist_ndx_list redo_list, hlink_list;
 
+static void read_a_msg(void);
+static void drain_multiplex_messages(void);
 static void sleep_for_bwlimit(int bytes_written);
 
-static void check_timeout(void)
+static void check_timeout(BOOL allow_keepalive)
 {
-       time_t t;
-
-       if (!io_timeout || ignore_timeout)
+       time_t t, chk;
+
+       /* On the receiving side, the generator is now the one that decides
+        * when a timeout has occurred.  When it is sifting through a lot of
+        * files looking for work, it will be sending keep-alive messages to
+        * the sender, and even though the receiver won't be sending/receiving
+        * anything (not even keep-alive messages), the successful writes to
+        * the sender will keep things going.  If the receiver is actively
+        * receiving data, it will ensure that the generator knows that it is
+        * not idle by sending the generator keep-alive messages (since the
+        * generator might be blocked trying to send checksums, it needs to
+        * know that the receiver is active).  Thus, as long as one or the
+        * other is successfully doing work, the generator will not timeout. */
+       if (!io_timeout)
                return;
 
-       if (!last_io_in) {
-               last_io_in = time(NULL);
-               return;
+       t = time(NULL);
+
+       if (allow_keepalive) {
+               /* This may put data into iobuf.msg w/o flushing. */
+               maybe_send_keepalive(t, 0);
        }
 
-       t = time(NULL);
+       if (!last_io_in)
+               last_io_in = t;
 
-       if (t - last_io_in >= io_timeout) {
-               if (!am_server && !am_daemon) {
-                       rprintf(FERROR, "io timeout after %d seconds -- exiting\n",
-                               (int)(t-last_io_in));
-               }
+       if (am_receiver)
+               return;
+
+       chk = MAX(last_io_out, last_io_in);
+       if (t - chk >= io_timeout) {
+               if (am_server)
+                       msgs2stderr = 1;
+               rprintf(FERROR, "[%s] io timeout after %d seconds -- exiting\n",
+                       who_am_i(), (int)(t-chk));
                exit_cleanup(RERR_TIMEOUT);
        }
 }
@@ -173,9 +206,9 @@ static void check_timeout(void)
  * There is another case for older protocol versions (< 24) where the module
  * listing was not terminated, so we must ignore an EOF error in that case and
  * exit.  In this situation, kluge_around_eof will be > 0. */
-static NORETURN void whine_about_eof(int fd)
+static NORETURN void whine_about_eof(BOOL allow_kluge)
 {
-       if (kluge_around_eof && fd == sock_f_in) {
+       if (kluge_around_eof && allow_kluge) {
                int i;
                if (kluge_around_eof > 0)
                        exit_cleanup(0);
@@ -238,7 +271,8 @@ static size_t safe_read(int fd, char *buf, size_t len)
                                        who_am_i());
                                exit_cleanup(RERR_FILEIO);
                        }
-                       check_timeout();
+                       if (io_timeout)
+                               maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH);
                        continue;
                }
 
@@ -322,7 +356,8 @@ static void safe_write(int fd, const char *buf, size_t len)
                                        what_fd_is(fd), who_am_i());
                                exit_cleanup(RERR_FILEIO);
                        }
-                       check_timeout();
+                       if (io_timeout)
+                               maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH);
                        continue;
                }
 
@@ -442,23 +477,59 @@ static void forward_filesfrom_data(void)
        }
 }
 
-/* Perform buffered input and output until specified conditions are met.  When
- * given a "needed" read requirement, we'll return without doing any I/O if the
- * iobuf.in bytes are already available.  When reading, we'll read as many
- * bytes as we can into the buffer, and return as soon as we meet the minimum
- * read requirement.  When given a "needed" write requirement, we'll return
- * without doing any I/O if that many bytes will fit in the output buffer (we
- * check either iobuf.out or iobuf.msg, depending on the flags).  When writing,
- * we write out as much as we can, and return as soon as the given free-space
- * requirement is available.
+void reduce_iobuf_size(xbuf *out, size_t new_size)
+{
+       if (new_size < out->size) {
+               if (DEBUG_GTE(IO, 4)) {
+                       const char *name = out == &iobuf.out ? "iobuf.out"
+                                        : out == &iobuf.msg ? "iobuf.msg"
+                                        : NULL;
+                       if (name) {
+                               rprintf(FINFO, "[%s] reduced size of %s (-%d)\n",
+                                       who_am_i(), name, (int)(out->size - new_size));
+                       }
+               }
+               out->size = new_size;
+       }
+}
+
+void restore_iobuf_size(xbuf *out)
+{
+       if (IOBUF_WAS_REDUCED(out->size)) {
+               size_t new_size = IOBUF_RESTORE_SIZE(out->size);
+               if (DEBUG_GTE(IO, 4)) {
+                       const char *name = out == &iobuf.out ? "iobuf.out"
+                                        : out == &iobuf.msg ? "iobuf.msg"
+                                        : NULL;
+                       if (name) {
+                               rprintf(FINFO, "[%s] restored size of %s (+%d)\n",
+                                       who_am_i(), name, (int)(new_size - out->size));
+                       }
+               }
+               out->size = new_size;
+       }
+}
+
+/* Perform buffered input and/or output until specified conditions are met.
+ * When given a "needed" read or write request, this returns without doing any
+ * I/O if the needed input bytes or write space is already available.  Once I/O
+ * is needed, this will try to do whatever reading and/or writing is currently
+ * possible, up to the maximum buffer allowances, no matter if this is a read
+ * or write request.  However, the I/O stops as soon as the required input
+ * bytes or output space is available.  If this is not a read request, the
+ * routine may also do some advantageous reading of messages from a multiplexed
+ * input source (which ensures that we don't jam up with everyone in their
+ * "need to write" code and nobody reading the accumulated data that would make
+ * writing possible).
  *
- * The iobuf.out and iobuf.msg buffers are circular, so some writes into them
- * will need to be split when the data needs to wrap around to the start.  In
- * order to help make this easier for some operations (such as the use of
- * SIVAL() into the buffer) the buffers MUST have 4 bytes of overflow space at
- * the end that is not not counted in the "size".  The iobuf.in buffer is not
- * (currently) circular.  To facilitate the handling of MSG_DATA bytes as they
- * are read-from/written-into the buffers, see the three raw_* iobuf vars.
+ * The iobuf.in, .out and .msg buffers are all circular.  Callers need to be
+ * aware that some data copies will need to be split when the bytes wrap around
+ * from the end to the start.  In order to help make writing into the output
+ * buffers easier for some operations (such as the use of SIVAL() into the
+ * buffer) a buffer may be temporarily shortened by a small amount, but the
+ * original size will be automatically restored when the .pos wraps to the
+ * start.  See also the 3 raw_* iobuf vars that are used in the handling of
+ * MSG_DATA bytes as they are read-from/written-into the buffers.
  *
  * When writing, we flush data in the following priority order:
  *
@@ -475,8 +546,6 @@ static void forward_filesfrom_data(void)
  *
  *    - Make this routine able to read the generator-to-receiver batch flow?
  *
- *    - Make the input buffer circular?
- *
  * Unlike the old routines that this replaces, it is OK to read ahead as far as
  * we can because the read_a_msg() routine now reads its bytes out of the input
  * buffer.  In the old days, only raw data was in the input buffer, and any
@@ -498,36 +567,17 @@ static char *perform_io(size_t needed, int flags)
 
        switch (flags & PIO_NEED_FLAGS) {
        case PIO_NEED_INPUT:
+               /* We never resize the circular input buffer. */
+               if (iobuf.in.size < needed) {
+                       rprintf(FERROR, "need to read %ld bytes, iobuf.in.buf is only %ld bytes.\n",
+                               (long)needed, (long)iobuf.in.size);
+                       exit_cleanup(RERR_PROTOCOL);
+               }
+
                if (DEBUG_GTE(IO, 3)) {
                        rprintf(FINFO, "[%s] perform_io(%ld, %sinput)\n",
                                who_am_i(), (long)needed, flags & PIO_CONSUME_INPUT ? "consume&" : "");
                }
-
-               /* Make sure the input buffer is big enough to hold "needed" bytes.
-                * Also make sure it will fit in the free space at the end, or
-                * else we need to shift some bytes. */
-               if (needed && iobuf.in.size < needed) {
-                       if (!(iobuf.in.buf = realloc_array(iobuf.in.buf, char, needed)))
-                               out_of_memory("perform_io");
-                       if (DEBUG_GTE(IO, 4)) {
-                               rprintf(FINFO, "[%s] resized input buffer from %ld to %ld bytes.\n",
-                                       who_am_i(), (long)iobuf.in.size, (long)needed);
-                       }
-                       iobuf.in.size = needed;
-               }
-               if (iobuf.in.size - iobuf.in.pos < needed
-                || (iobuf.in.len < needed && iobuf.in.len < 1024
-                 && iobuf.in.size - (iobuf.in.pos + iobuf.in.len) < 1024)) {
-                       memmove(iobuf.in.buf, iobuf.in.buf + iobuf.in.pos, iobuf.in.len);
-                       if (DEBUG_GTE(IO, 4)) {
-                               rprintf(FINFO,
-                                       "[%s] moved %ld bytes from %ld to 0 in the input buffer (size=%ld, needed=%ld).\n",
-                                       who_am_i(), (long)iobuf.in.len, (long)iobuf.in.pos, (long)iobuf.in.size, (long)needed);
-                       }
-                       if (iobuf.raw_input_ends_before)
-                               iobuf.raw_input_ends_before -= iobuf.in.pos;
-                       iobuf.in.pos = 0;
-               }
                break;
 
        case PIO_NEED_OUTROOM:
@@ -593,7 +643,7 @@ static char *perform_io(size_t needed, int flags)
 
                FD_ZERO(&r_fds);
                FD_ZERO(&e_fds);
-               if (iobuf.in_fd >= 0 && iobuf.in.size - (iobuf.in.pos + iobuf.in.len)) {
+               if (iobuf.in_fd >= 0 && iobuf.in.size - iobuf.in.len) {
                        if (!read_batch || batch_fd >= 0) {
                                FD_SET(iobuf.in_fd, &r_fds);
                                FD_SET(iobuf.in_fd, &e_fds);
@@ -620,15 +670,6 @@ static char *perform_io(size_t needed, int flags)
 
                                        SIVAL(iobuf.out.buf + iobuf.raw_data_header_pos, 0,
                                              ((MPLEX_BASE + (int)MSG_DATA)<<24) + iobuf.out.len - 4);
-                                       if (iobuf.raw_data_header_pos + 4 > iobuf.out.size) {
-                                               int siz = (int)(iobuf.raw_data_header_pos + 4 - iobuf.out.size);
-                                               /* We used some of the overflow bytes, so move them. */
-                                               if (DEBUG_GTE(IO, 4)) {
-                                                       rprintf(FINFO, "[%s] wrap-bytes moved: %d (perform_io)\n",
-                                                               who_am_i(), siz);
-                                               }
-                                               memcpy(iobuf.out.buf, iobuf.out.buf + iobuf.out.size, siz);
-                                       }
 
                                        if (DEBUG_GTE(IO, 1)) {
                                                rprintf(FINFO, "[%s] send_msg(%d, %ld)\n",
@@ -639,6 +680,13 @@ static char *perform_io(size_t needed, int flags)
                                        iobuf.raw_data_header_pos = iobuf.raw_flushing_ends_before;
                                        if (iobuf.raw_data_header_pos >= iobuf.out.size)
                                                iobuf.raw_data_header_pos -= iobuf.out.size;
+                                       else if (iobuf.raw_data_header_pos + 4 > iobuf.out.size) {
+                                               /* The 4-byte header won't fit at the end of the buffer,
+                                                * so we'll temporarily reduce the output buffer's size
+                                                * and put the header at the start of the buffer. */
+                                               reduce_iobuf_size(&iobuf.out, iobuf.raw_data_header_pos);
+                                               iobuf.raw_data_header_pos = 0;
+                                       }
                                        /* Yes, it is possible for this to make len > size for a while. */
                                        iobuf.out.len += 4;
                                }
@@ -661,11 +709,19 @@ static char *perform_io(size_t needed, int flags)
                if (max_fd < 0) {
                        switch (flags & PIO_NEED_FLAGS) {
                        case PIO_NEED_INPUT:
+                               iobuf.in.len = 0;
+                               if (kluge_around_eof == 2)
+                                       exit_cleanup(0);
+                               if (iobuf.in_fd == -2)
+                                       whine_about_eof(True);
                                rprintf(FERROR, "error in perform_io: no fd for input.\n");
                                exit_cleanup(RERR_PROTOCOL);
                        case PIO_NEED_OUTROOM:
                        case PIO_NEED_MSGROOM:
                                msgs2stderr = 1;
+                               drain_multiplex_messages();
+                               if (iobuf.out_fd == -2)
+                                       whine_about_eof(True);
                                rprintf(FERROR, "error in perform_io: no fd for output.\n");
                                exit_cleanup(RERR_PROTOCOL);
                        default:
@@ -698,19 +754,24 @@ static char *perform_io(size_t needed, int flags)
                                send_extra_file_list(sock_f_out, -1);
                                extra_flist_sending_enabled = !flist_eof;
                        } else
-                               check_timeout();
+                               check_timeout((flags & PIO_NEED_INPUT) != 0);
                        FD_ZERO(&r_fds); /* Just in case... */
                        FD_ZERO(&w_fds);
                }
 
                if (iobuf.in_fd >= 0 && FD_ISSET(iobuf.in_fd, &r_fds)) {
-                       size_t pos = iobuf.in.pos + iobuf.in.len;
-                       size_t len = iobuf.in.size - pos;
+                       size_t len, pos = iobuf.in.pos + iobuf.in.len;
                        int n;
+                       if (pos >= iobuf.in.size) {
+                               pos -= iobuf.in.size;
+                               len = iobuf.in.size - iobuf.in.len;
+                       } else
+                               len = iobuf.in.size - pos;
                        if ((n = read(iobuf.in_fd, iobuf.in.buf + pos, len)) <= 0) {
                                if (n == 0) {
+                                       /* Signal that input has become invalid. */
                                        if (!read_batch || batch_fd < 0 || am_generator)
-                                               whine_about_eof(iobuf.in_fd); /* Doesn't return. */
+                                               iobuf.in_fd = -2;
                                        batch_fd = -1;
                                        continue;
                                }
@@ -724,20 +785,23 @@ static char *perform_io(size_t needed, int flags)
                                                rsyserr(FERROR_SOCKET, errno, "read error");
                                        } else
                                                rsyserr(FERROR, errno, "read error");
-                                       exit_cleanup(RERR_STREAMIO);
+                                       exit_cleanup(RERR_SOCKETIO);
                                }
                        }
                        if (msgs2stderr && DEBUG_GTE(IO, 2))
                                rprintf(FINFO, "[%s] recv=%ld\n", who_am_i(), (long)n);
 
-                       if (io_timeout)
+                       if (io_timeout) {
                                last_io_in = time(NULL);
+                               if (flags & PIO_NEED_INPUT)
+                                       maybe_send_keepalive(last_io_in, 0);
+                       }
                        stats.total_read += n;
 
                        iobuf.in.len += n;
                }
 
-               if (iobuf.out_fd >= 0 && FD_ISSET(iobuf.out_fd, &w_fds)) {
+               if (out && FD_ISSET(iobuf.out_fd, &w_fds)) {
                        size_t len = iobuf.raw_flushing_ends_before ? iobuf.raw_flushing_ends_before - out->pos : out->len;
                        int n;
 
@@ -752,9 +816,11 @@ static char *perform_io(size_t needed, int flags)
                                else {
                                        /* Don't write errors on a dead socket. */
                                        msgs2stderr = 1;
-                                       out->len = iobuf.raw_flushing_ends_before = out->pos = 0;
+                                       iobuf.out_fd = -2;
+                                       iobuf.out.len = iobuf.msg.len = iobuf.raw_flushing_ends_before = 0;
                                        rsyserr(FERROR_SOCKET, errno, "[%s] write error", who_am_i());
-                                       exit_cleanup(RERR_STREAMIO);
+                                       drain_multiplex_messages();
+                                       exit_cleanup(RERR_SOCKETIO);
                                }
                        }
                        if (msgs2stderr && DEBUG_GTE(IO, 2)) {
@@ -773,15 +839,26 @@ static char *perform_io(size_t needed, int flags)
                                if (iobuf.raw_flushing_ends_before)
                                        iobuf.raw_flushing_ends_before -= out->size;
                                out->pos = 0;
+                               restore_iobuf_size(out);
                        } else if (out->pos == iobuf.raw_flushing_ends_before)
                                iobuf.raw_flushing_ends_before = 0;
                        if ((out->len -= n) == empty_buf_len) {
                                out->pos = 0;
+                               restore_iobuf_size(out);
                                if (empty_buf_len)
                                        iobuf.raw_data_header_pos = 0;
                        }
                }
 
+               /* We need to help prevent deadlock by doing what reading
+                * we can whenever we are here trying to write. */
+               if (IN_MULTIPLEXED_AND_READY && !(flags & PIO_NEED_INPUT)) {
+                       while (!iobuf.raw_input_ends_before && iobuf.in.len > 512)
+                               read_a_msg();
+                       if (flist_receiving_enabled && iobuf.in.len > 512)
+                               wait_for_receiver(); /* generator only */
+               }
+
                if (ff_forward_fd >= 0 && FD_ISSET(ff_forward_fd, &r_fds)) {
                        /* This can potentially flush all output and enable
                         * multiplexed output, so keep this last in the loop
@@ -797,32 +874,59 @@ static char *perform_io(size_t needed, int flags)
        if (flags & PIO_CONSUME_INPUT) {
                iobuf.in.len -= needed;
                iobuf.in.pos += needed;
+               if (iobuf.in.pos == iobuf.raw_input_ends_before)
+                       iobuf.raw_input_ends_before = 0;
+               if (iobuf.in.pos >= iobuf.in.size) {
+                       iobuf.in.pos -= iobuf.in.size;
+                       if (iobuf.raw_input_ends_before)
+                               iobuf.raw_input_ends_before -= iobuf.in.size;
+               }
        }
 
        return data;
 }
 
+static void raw_read_buf(char *buf, size_t len)
+{
+       size_t pos = iobuf.in.pos;
+       char *data = perform_io(len, PIO_INPUT_AND_CONSUME);
+       if (iobuf.in.pos <= pos && len) {
+               size_t siz = len - iobuf.in.pos;
+               memcpy(buf, data, siz);
+               memcpy(buf + siz, iobuf.in.buf, iobuf.in.pos);
+       } else
+               memcpy(buf, data, len);
+}
+
+static int32 raw_read_int(void)
+{
+       char *data, buf[4];
+       if (iobuf.in.size - iobuf.in.pos >= 4)
+               data = perform_io(4, PIO_INPUT_AND_CONSUME);
+       else
+               raw_read_buf(data = buf, 4);
+       return IVAL(data, 0);
+}
+
 void noop_io_until_death(void)
 {
        char buf[1024];
 
-       kluge_around_eof = 1;
-       /* For protocol 31: setting an I/O timeout ensures that if something
-        * inexplicably weird happens, we won't hang around forever.  For older
-        * protocols: we can't tell the other side to die, so we linger a brief
-        * time (to try to give our error messages time to arrive) and then let
-        * the "unexpectedly" closed socket tell them to die. */
-       set_io_timeout(protocol_version >= 31 ? 30 : 1);
+       kluge_around_eof = 2;
+       /* Setting an I/O timeout ensures that if something inexplicably weird
+        * happens, we won't hang around forever. */
+       if (!io_timeout)
+               set_io_timeout(60);
 
        while (1)
                read_buf(iobuf.in_fd, buf, sizeof buf);
 }
 
-/* Buffer a message for the multiplexed output stream.  Is never used for MSG_DATA. */
+/* Buffer a message for the multiplexed output stream.  Is not used for (normal) MSG_DATA. */
 int send_msg(enum msgcode code, const char *buf, size_t len, int convert)
 {
        char *hdr;
-       size_t pos;
+       size_t needed, pos;
        BOOL want_debug = DEBUG_GTE(IO, 1) && convert >= 0 && (msgs2stderr || code != MSG_INFO);
 
        if (!OUT_MULTIPLEXED)
@@ -831,21 +935,32 @@ int send_msg(enum msgcode code, const char *buf, size_t len, int convert)
        if (want_debug)
                rprintf(FINFO, "[%s] send_msg(%d, %ld)\n", who_am_i(), (int)code, (long)len);
 
+       /* When checking for enough free space for this message, we need to
+        * make sure that there is space for the 4-byte header, plus we'll
+        * assume that we may waste up to 3 bytes (if the header doesn't fit
+        * at the physical end of the buffer). */
 #ifdef ICONV_OPTION
        if (convert > 0 && ic_send == (iconv_t)-1)
                convert = 0;
        if (convert > 0) {
                /* Ensuring double-size room leaves space for maximal conversion expansion. */
-               if (iobuf.msg.len + len*2 + 4 > iobuf.msg.size)
-                       perform_io(len*2 + 4, PIO_NEED_MSGROOM);
+               needed = len*2 + 4 + 3;
        } else
 #endif
-       if (iobuf.msg.len + len + 4 > iobuf.msg.size)
-               perform_io(len + 4, PIO_NEED_MSGROOM);
+               needed = len + 4 + 3;
+       if (iobuf.msg.len + needed > iobuf.msg.size)
+               perform_io(needed, PIO_NEED_MSGROOM);
 
        pos = iobuf.msg.pos + iobuf.msg.len; /* Must be set after any flushing. */
        if (pos >= iobuf.msg.size)
                pos -= iobuf.msg.size;
+       else if (pos + 4 > iobuf.msg.size) {
+               /* The 4-byte header won't fit at the end of the buffer,
+                * so we'll temporarily reduce the message buffer's size
+                * and put the header at the start of the buffer. */
+               reduce_iobuf_size(&iobuf.msg, pos);
+               pos = 0;
+       }
        hdr = iobuf.msg.buf + pos;
 
        iobuf.msg.len += 4; /* Allocate room for the coming header bytes. */
@@ -869,8 +984,8 @@ int send_msg(enum msgcode code, const char *buf, size_t len, int convert)
        {
                size_t siz;
 
-               if ((pos += 4) >= iobuf.msg.size)
-                       pos -= iobuf.msg.size;
+               if ((pos += 4) == iobuf.msg.size)
+                       pos = 0;
 
                /* Handle a split copy if we wrap around the end of the circular buffer. */
                if (pos >= iobuf.msg.pos && (siz = iobuf.msg.size - pos) < len) {
@@ -883,13 +998,6 @@ int send_msg(enum msgcode code, const char *buf, size_t len, int convert)
        }
 
        SIVAL(hdr, 0, ((MPLEX_BASE + (int)code)<<24) + len);
-       /* If the header used any overflow bytes, move them to the start. */
-       if ((pos = hdr+4 - iobuf.msg.buf) > iobuf.msg.size) {
-               int siz = (int)(pos - iobuf.msg.size);
-               if (DEBUG_GTE(IO, 4))
-                       rprintf(FINFO, "[%s] wrap-bytes moved: %d (send_msg)\n", who_am_i(), siz);
-               memcpy(iobuf.msg.buf, iobuf.msg.buf + iobuf.msg.size, siz);
-       }
 
        if (want_debug && convert > 0)
                rprintf(FINFO, "[%s] converted msg len=%ld\n", who_am_i(), (long)len);
@@ -958,13 +1066,15 @@ void io_set_sock_fds(int f_in, int f_out)
 void set_io_timeout(int secs)
 {
        io_timeout = secs;
+       allowed_lull = (io_timeout + 1) / 2;
 
-       if (!io_timeout || io_timeout > SELECT_TIMEOUT)
+       if (!io_timeout || allowed_lull > SELECT_TIMEOUT)
                select_timeout = SELECT_TIMEOUT;
        else
-               select_timeout = io_timeout;
+               select_timeout = allowed_lull;
 
-       allowed_lull = read_batch ? 0 : (io_timeout + 1) / 2;
+       if (read_batch)
+               allowed_lull = 0;
 }
 
 static void check_for_d_option_error(const char *msg)
@@ -1159,13 +1269,6 @@ BOOL io_start_buffering_out(int f_out)
        if (msgs2stderr && DEBUG_GTE(IO, 2))
                rprintf(FINFO, "[%s] io_start_buffering_out(%d)\n", who_am_i(), f_out);
 
-       if (OUT_MULTIPLEXED && !iobuf.msg.buf) {
-               iobuf.msg.size = IO_BUFFER_SIZE - 4;
-               if (!(iobuf.msg.buf = new_array(char, iobuf.msg.size + 4)))
-                       out_of_memory("io_start_buffering_out");
-               iobuf.msg.pos = iobuf.msg.len = 0;
-       }
-
        if (iobuf.out.buf) {
                if (iobuf.out_fd == -1)
                        iobuf.out_fd = f_out;
@@ -1174,11 +1277,7 @@ BOOL io_start_buffering_out(int f_out)
                return False;
        }
 
-       iobuf.out.size = IO_BUFFER_SIZE * 2 - 4;
-       /* The 4 overflow bytes makes some circular-buffer wrapping operations easier. */
-       if (!(iobuf.out.buf = new_array(char, iobuf.out.size + 4)))
-               out_of_memory("io_start_buffering_out");
-       iobuf.out.pos = iobuf.out.len = 0;
+       alloc_xbuf(&iobuf.out, ROUND_UP_1024(IO_BUFFER_SIZE * 2));
        iobuf.out_fd = f_out;
 
        return True;
@@ -1197,12 +1296,7 @@ BOOL io_start_buffering_in(int f_in)
                return False;
        }
 
-       iobuf.in.size = IO_BUFFER_SIZE;
-       if (!(iobuf.in.buf = new_array(char, iobuf.in.size)))
-               out_of_memory("io_start_buffering_in");
-
-       iobuf.in.pos = iobuf.in.len = 0;
-
+       alloc_xbuf(&iobuf.in, ROUND_UP_1024(IO_BUFFER_SIZE));
        iobuf.in_fd = f_in;
 
        return True;
@@ -1247,20 +1341,29 @@ void maybe_flush_socket(int important)
                io_flush(NORMAL_FLUSH);
 }
 
-void maybe_send_keepalive(void)
+/* Older rsync versions used to send either a MSG_NOOP (protocol 30) or a
+ * raw-data-based keep-alive (protocol 29), both of which implied forwarding of
+ * the message through the sender.  Since the new timeout method does not need
+ * any forwarding, we just send an empty MSG_DATA message, which works with all
+ * rsync versions.  This avoids any message forwarding, and leaves the raw-data
+ * stream alone (since we can never be quite sure if that stream is in the
+ * right state for a keep-alive message). */
+void maybe_send_keepalive(time_t now, int flags)
 {
-       if (time(NULL) - last_io_out >= allowed_lull) {
-               if (!iobuf.msg.len && iobuf.out.len == iobuf.out_empty_len) {
-                       if (protocol_version < 29)
-                               return; /* there's nothing we can do */
-                       if (protocol_version >= 30)
-                               send_msg(MSG_NOOP, "", 0, 0);
-                       else {
-                               write_int(iobuf.out_fd, cur_flist->used);
-                               write_shortint(iobuf.out_fd, ITEM_IS_NEW);
-                       }
-               }
-               if (iobuf.msg.len)
+       if (flags & MSK_ACTIVE_RECEIVER)
+               last_io_in = now; /* Fudge things when we're working hard on the files. */
+
+       if (now - last_io_out >= allowed_lull) {
+               /* The receiver is special:  it only sends keep-alive messages if it is
+                * actively receiving data.  Otherwise, it lets the generator timeout. */
+               if (am_receiver && now - last_io_in >= io_timeout)
+                       return;
+
+               if (!iobuf.msg.len && iobuf.out.len == iobuf.out_empty_len)
+                       send_msg(MSG_DATA, "", 0, 0);
+               if (!(flags & MSK_ALLOW_FLUSH)) {
+                       /* Let the caller worry about writing out the data. */
+               } else if (iobuf.msg.len)
                        perform_io(iobuf.msg.size - iobuf.msg.len + 1, PIO_NEED_MSGROOM);
                else if (iobuf.out.len > iobuf.out_empty_len)
                        io_flush(NORMAL_FLUSH);
@@ -1281,17 +1384,22 @@ void stop_flist_forward(void)
 /* Read a message from a multiplexed source. */
 static void read_a_msg(void)
 {
-       char *data, line[BIGPATHBUFLEN];
+       char data[BIGPATHBUFLEN];
        int tag, val;
        size_t msg_bytes;
 
-       data = perform_io(4, PIO_INPUT_AND_CONSUME);
-       tag = IVAL(data, 0);
+       /* This ensures that perform_io() does not try to do any message reading
+        * until we've read all of the data for this message.  We should also
+        * try to avoid calling things that will cause data to be written via
+        * perform_io() prior to this being reset to 1. */
+       iobuf.in_multiplexed = -1;
+
+       tag = raw_read_int();
 
        msg_bytes = tag & 0xFFFFFF;
        tag = (tag >> 24) - MPLEX_BASE;
 
-       if (DEBUG_GTE(IO, 1) && (msgs2stderr || tag != MSG_INFO))
+       if (DEBUG_GTE(IO, 1) && msgs2stderr)
                rprintf(FINFO, "[%s] got msg=%d, len=%ld\n", who_am_i(), (int)tag, (long)msg_bytes);
 
        switch (tag) {
@@ -1301,35 +1409,38 @@ static void read_a_msg(void)
                 * the buffer the msg data will end once it is read.  It is
                 * possible that this points off the end of the buffer, in
                 * which case the gradual reading of the input stream will
-                * cause this value to decrease and eventually become real. */
-               iobuf.raw_input_ends_before = iobuf.in.pos + msg_bytes;
+                * cause this value to wrap around and eventually become real. */
+               if (msg_bytes)
+                       iobuf.raw_input_ends_before = iobuf.in.pos + msg_bytes;
+               iobuf.in_multiplexed = 1;
                break;
        case MSG_STATS:
                if (msg_bytes != sizeof stats.total_read || !am_generator)
                        goto invalid_msg;
-               data = perform_io(sizeof stats.total_read, PIO_INPUT_AND_CONSUME);
-               memcpy((char*)&stats.total_read, data, sizeof stats.total_read);
+               raw_read_buf((char*)&stats.total_read, sizeof stats.total_read);
+               iobuf.in_multiplexed = 1;
                break;
        case MSG_REDO:
                if (msg_bytes != 4 || !am_generator)
                        goto invalid_msg;
-               data = perform_io(4, PIO_INPUT_AND_CONSUME);
-               got_flist_entry_status(FES_REDO, IVAL(data, 0));
+               val = raw_read_int();
+               iobuf.in_multiplexed = 1;
+               got_flist_entry_status(FES_REDO, val);
                break;
        case MSG_IO_ERROR:
-               if (msg_bytes != 4 || am_sender)
+               if (msg_bytes != 4)
                        goto invalid_msg;
-               data = perform_io(4, PIO_INPUT_AND_CONSUME);
-               val = IVAL(data, 0);
+               val = raw_read_int();
+               iobuf.in_multiplexed = 1;
                io_error |= val;
-               if (!am_generator)
+               if (am_receiver)
                        send_msg_int(MSG_IO_ERROR, val);
                break;
        case MSG_IO_TIMEOUT:
                if (msg_bytes != 4 || am_server || am_generator)
                        goto invalid_msg;
-               data = perform_io(4, PIO_INPUT_AND_CONSUME);
-               val = IVAL(data, 0);
+               val = raw_read_int();
+               iobuf.in_multiplexed = 1;
                if (!io_timeout || io_timeout > val) {
                        if (INFO_GTE(MISC, 2))
                                rprintf(FINFO, "Setting --timeout=%d to match server\n", val);
@@ -1337,15 +1448,20 @@ static void read_a_msg(void)
                }
                break;
        case MSG_NOOP:
+               /* Support protocol-30 keep-alive method. */
+               if (msg_bytes != 0)
+                       goto invalid_msg;
+               iobuf.in_multiplexed = 1;
                if (am_sender)
-                       maybe_send_keepalive();
+                       maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH);
                break;
        case MSG_DELETED:
-               if (msg_bytes >= sizeof line)
+               if (msg_bytes >= sizeof data)
                        goto overflow;
                if (am_generator) {
-                       memcpy(line, perform_io(msg_bytes, PIO_INPUT_AND_CONSUME), msg_bytes);
-                       send_msg(MSG_DELETED, line, msg_bytes, 1);
+                       raw_read_buf(data, msg_bytes);
+                       iobuf.in_multiplexed = 1;
+                       send_msg(MSG_DELETED, data, msg_bytes, 1);
                        break;
                }
 #ifdef ICONV_OPTION
@@ -1355,12 +1471,12 @@ static void read_a_msg(void)
                        int add_null = 0;
                        int flags = ICB_INCLUDE_BAD | ICB_INIT;
 
-                       INIT_CONST_XBUF(outbuf, line);
+                       INIT_CONST_XBUF(outbuf, data);
                        INIT_XBUF(inbuf, ibuf, 0, (size_t)-1);
 
                        while (msg_bytes) {
                                size_t len = msg_bytes > sizeof ibuf - inbuf.len ? sizeof ibuf - inbuf.len : msg_bytes;
-                               memcpy(ibuf + inbuf.len, perform_io(len, PIO_INPUT_AND_CONSUME), len);
+                               raw_read_buf(ibuf + inbuf.len, len);
                                inbuf.pos = 0;
                                inbuf.len += len;
                                if (!(msg_bytes -= len) && !ibuf[inbuf.len-1])
@@ -1382,13 +1498,14 @@ static void read_a_msg(void)
                        msg_bytes = outbuf.len;
                } else
 #endif
-                       memcpy(line, perform_io(msg_bytes, PIO_INPUT_AND_CONSUME), msg_bytes);
+                       raw_read_buf(data, msg_bytes);
+               iobuf.in_multiplexed = 1;
                /* A directory name was sent with the trailing null */
-               if (msg_bytes > 0 && !line[msg_bytes-1])
-                       log_delete(line, S_IFDIR);
+               if (msg_bytes > 0 && !data[msg_bytes-1])
+                       log_delete(data, S_IFDIR);
                else {
-                       line[msg_bytes] = '\0';
-                       log_delete(line, S_IFREG);
+                       data[msg_bytes] = '\0';
+                       log_delete(data, S_IFREG);
                }
                break;
        case MSG_SUCCESS:
@@ -1399,8 +1516,8 @@ static void read_a_msg(void)
                                inc_recurse ? "/inc" : "");
                        exit_cleanup(RERR_STREAMIO);
                }
-               data = perform_io(4, PIO_INPUT_AND_CONSUME);
-               val = IVAL(data, 0);
+               val = raw_read_int();
+               iobuf.in_multiplexed = 1;
                if (am_generator)
                        got_flist_entry_status(FES_SUCCESS, val);
                else
@@ -1409,8 +1526,8 @@ static void read_a_msg(void)
        case MSG_NO_SEND:
                if (msg_bytes != 4)
                        goto invalid_msg;
-               data = perform_io(4, PIO_INPUT_AND_CONSUME);
-               val = IVAL(data, 0);
+               val = raw_read_int();
+               iobuf.in_multiplexed = 1;
                if (am_generator)
                        got_flist_entry_status(FES_NO_SEND, val);
                else
@@ -1429,7 +1546,7 @@ static void read_a_msg(void)
        case MSG_ERROR:
        case MSG_ERROR_XFER:
        case MSG_WARNING:
-               if (msg_bytes >= sizeof line) {
+               if (msg_bytes >= sizeof data) {
                    overflow:
                        rprintf(FERROR,
                                "multiplexing overflow %d:%lu [%s%s]\n",
@@ -1437,34 +1554,51 @@ static void read_a_msg(void)
                                inc_recurse ? "/inc" : "");
                        exit_cleanup(RERR_STREAMIO);
                }
-               memcpy(line, perform_io(msg_bytes, PIO_INPUT_AND_CONSUME), msg_bytes);
-               rwrite((enum logcode)tag, line, msg_bytes, !am_generator);
+               raw_read_buf(data, msg_bytes);
+               iobuf.in_multiplexed = 1;
+               rwrite((enum logcode)tag, data, msg_bytes, !am_generator);
                if (first_message) {
-                       if (list_only && !am_sender && tag == 1 && msg_bytes < sizeof line) {
-                               line[msg_bytes] = '\0';
-                               check_for_d_option_error(line);
+                       if (list_only && !am_sender && tag == 1 && msg_bytes < sizeof data) {
+                               data[msg_bytes] = '\0';
+                               check_for_d_option_error(data);
                        }
                        first_message = 0;
                }
                break;
        case MSG_ERROR_EXIT:
+               if (msg_bytes == 4)
+                       val = raw_read_int();
+               else if (msg_bytes == 0)
+                       val = 0;
+               else
+                       goto invalid_msg;
+               iobuf.in_multiplexed = 1;
+               if (DEBUG_GTE(EXIT, 3))
+                       rprintf(FINFO, "[%s] got MSG_ERROR_EXIT with %ld bytes\n", who_am_i(), (long)msg_bytes);
                if (msg_bytes == 0) {
                        if (!am_sender && !am_generator) {
+                               if (DEBUG_GTE(EXIT, 3)) {
+                                       rprintf(FINFO, "[%s] sending MSG_ERROR_EXIT (len 0)\n",
+                                               who_am_i());
+                               }
                                send_msg(MSG_ERROR_EXIT, "", 0, 0);
                                io_flush(FULL_FLUSH);
                        }
-                       val = 0;
-               } else if (msg_bytes == 4) {
-                       data = perform_io(4, PIO_INPUT_AND_CONSUME);
-                       val = IVAL(data, 0);
-                       if (protocol_version >= 31) {
-                               if (am_generator)
-                                       send_msg_int(MSG_ERROR_EXIT, val);
-                               else
-                                       send_msg(MSG_ERROR_EXIT, "", 0, 0);
+               } else if (protocol_version >= 31) {
+                       if (am_generator) {
+                               if (DEBUG_GTE(EXIT, 3)) {
+                                       rprintf(FINFO, "[%s] sending MSG_ERROR_EXIT with exit_code %d\n",
+                                               who_am_i(), val);
+                               }
+                               send_msg_int(MSG_ERROR_EXIT, val);
+                       } else {
+                               if (DEBUG_GTE(EXIT, 3)) {
+                                       rprintf(FINFO, "[%s] sending MSG_ERROR_EXIT (len 0)\n",
+                                               who_am_i());
+                               }
+                               send_msg(MSG_ERROR_EXIT, "", 0, 0);
                        }
-               } else
-                       goto invalid_msg;
+               }
                /* Send a negative linenum so that we don't end up
                 * with a duplicate exit message. */
                _exit_cleanup(val, __FILE__, 0 - __LINE__);
@@ -1473,6 +1607,26 @@ static void read_a_msg(void)
                        tag, who_am_i(), inc_recurse ? "/inc" : "");
                exit_cleanup(RERR_STREAMIO);
        }
+
+       assert(iobuf.in_multiplexed > 0);
+}
+
+static void drain_multiplex_messages(void)
+{
+       while (IN_MULTIPLEXED_AND_READY && iobuf.in.len) {
+               if (iobuf.raw_input_ends_before) {
+                       size_t raw_len = iobuf.raw_input_ends_before - iobuf.in.pos;
+                       iobuf.raw_input_ends_before = 0;
+                       if (raw_len >= iobuf.in.len) {
+                               iobuf.in.len = 0;
+                               break;
+                       }
+                       iobuf.in.len -= raw_len;
+                       if ((iobuf.in.pos += raw_len) >= iobuf.in.size)
+                               iobuf.in.pos -= iobuf.in.size;
+               }
+               read_a_msg();
+       }
 }
 
 void wait_for_receiver(void)
@@ -1497,6 +1651,7 @@ void wait_for_receiver(void)
                        }
                } else {
                        struct file_list *flist;
+                       flist_receiving_enabled = False;
                        if (DEBUG_GTE(FLIST, 2)) {
                                rprintf(FINFO, "[%s] receiving flist for dir %d\n",
                                        who_am_i(), ndx);
@@ -1507,6 +1662,7 @@ void wait_for_receiver(void)
                        if (preserve_hard_links)
                                match_hard_links(flist);
 #endif
+                       flist_receiving_enabled = True;
                }
        }
 }
@@ -1628,12 +1784,12 @@ void read_buf(int f, char *buf, size_t len)
 {
        if (f != iobuf.in_fd) {
                if (safe_read(f, buf, len) != len)
-                       whine_about_eof(f); /* Doesn't return. */
+                       whine_about_eof(False); /* Doesn't return. */
                goto batch_copy;
        }
 
        if (!IN_MULTIPLEXED) {
-               memcpy(buf, perform_io(len, PIO_INPUT_AND_CONSUME), len);
+               raw_read_buf(buf, len);
                total_data_read += len;
                if (forward_flist_data)
                        write_buf(iobuf.out_fd, buf, len);
@@ -1644,20 +1800,15 @@ void read_buf(int f, char *buf, size_t len)
        }
 
        while (1) {
-               char *data;
                size_t siz;
 
                while (!iobuf.raw_input_ends_before)
                        read_a_msg();
 
                siz = MIN(len, iobuf.raw_input_ends_before - iobuf.in.pos);
-               data = perform_io(siz, PIO_INPUT_AND_CONSUME);
-               if (iobuf.in.pos == iobuf.raw_input_ends_before)
-                       iobuf.raw_input_ends_before = 0;
-
-               /* The bytes at the "data" pointer will survive long
-                * enough to make a copy, but not past future I/O. */
-               memcpy(buf, data, siz);
+               if (siz >= iobuf.in.size)
+                       siz = iobuf.in.size;
+               raw_read_buf(buf, siz);
                total_data_read += siz;
 
                if (forward_flist_data)
@@ -2085,11 +2236,11 @@ void io_printf(int fd, const char *format, ...)
        va_end(ap);
 
        if (len < 0)
-               exit_cleanup(RERR_STREAMIO);
+               exit_cleanup(RERR_PROTOCOL);
 
        if (len > (int)sizeof buf) {
                rprintf(FERROR, "io_printf() was too long for the buffer.\n");
-               exit_cleanup(RERR_STREAMIO);
+               exit_cleanup(RERR_PROTOCOL);
        }
 
        write_sbuf(fd, buf);
@@ -2103,6 +2254,9 @@ void io_start_multiplex_out(int fd)
        if (msgs2stderr && DEBUG_GTE(IO, 2))
                rprintf(FINFO, "[%s] io_start_multiplex_out(%d)\n", who_am_i(), fd);
 
+       if (!iobuf.msg.buf)
+               alloc_xbuf(&iobuf.msg, ROUND_UP_1024(IO_BUFFER_SIZE));
+
        iobuf.out_empty_len = 4; /* See also OUT_MULTIPLEXED */
        io_start_buffering_out(fd);
 
@@ -2116,7 +2270,7 @@ void io_start_multiplex_in(int fd)
        if (msgs2stderr && DEBUG_GTE(IO, 2))
                rprintf(FINFO, "[%s] io_start_multiplex_in(%d)\n", who_am_i(), fd);
 
-       iobuf.in_multiplexed = True; /* See also IN_MULTIPLEXED */
+       iobuf.in_multiplexed = 1; /* See also IN_MULTIPLEXED */
        io_start_buffering_in(fd);
 }
 
@@ -2127,7 +2281,7 @@ int io_end_multiplex_in(int mode)
        if (msgs2stderr && DEBUG_GTE(IO, 2))
                rprintf(FINFO, "[%s] io_end_multiplex_in(mode=%d)\n", who_am_i(), mode);
 
-       iobuf.in_multiplexed = False;
+       iobuf.in_multiplexed = 0;
        if (mode == MPLX_SWITCHING)
                iobuf.raw_input_ends_before = 0;
        else
@@ -2165,7 +2319,7 @@ void start_write_batch(int fd)
         * is involved. */
        write_int(batch_fd, protocol_version);
        if (protocol_version >= 30)
-               write_byte(batch_fd, inc_recurse);
+               write_byte(batch_fd, compat_flags);
        write_int(batch_fd, checksum_seed);
 
        if (am_sender)