From: Wayne Davison Date: Mon, 23 Nov 2009 16:16:18 +0000 (-0800) Subject: Change the handling of circular buffers to not waste 4 bytes X-Git-Url: https://mattmccutchen.net/rsync/rsync.git/commitdiff_plain/907e6a32a00ace7cf1e96f9c9132f83f4a208430 Change the handling of circular buffers to not waste 4 bytes all the time (we only waste from 1-3 bytes some of the time). --- diff --git a/io.c b/io.c index 4a6b95fb..9b1cdbcb 100644 --- a/io.c +++ b/io.c @@ -115,6 +115,17 @@ static char int_byte_extra[64] = { 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 6, /* (C0 - FF)/4 */ }; +/* Our I/O buffers are sized with no bits on in the lowest byte of the "size" + * (indeed, our rounding of sizes in 1024-byte units assures more than this). + * This allows the code that is storing bytes near the physical end of a + * circular buffer to temporarily reduce the buffer's size (in order to make + * some storing idioms easier), while also making it simple to restore the + * buffer's actual size when the buffer's "pos" wraps around to the start (we + * just round the buffer's size up again). */ + +#define IOBUF_WAS_REDUCED(siz) ((siz) & 0xFF) +#define IOBUF_RESTORE_SIZE(siz) (((siz) | 0xFF) + 1) + #define IN_MULTIPLEXED (iobuf.in_multiplexed) #define OUT_MULTIPLEXED (iobuf.out_empty_len != 0) @@ -443,6 +454,39 @@ static void forward_filesfrom_data(void) } } +void reduce_iobuf_size(xbuf *out, size_t new_size) +{ + if (new_size < out->size) { + if (DEBUG_GTE(IO, 4)) { + const char *name = out == &iobuf.out ? "iobuf.out" + : out == &iobuf.msg ? "iobuf.msg" + : NULL; + if (name) { + rprintf(FINFO, "[%s] reduced size of %s (-%d)\n", + who_am_i(), name, (int)(out->size - new_size)); + } + } + out->size = new_size; + } +} + +void restore_iobuf_size(xbuf *out) +{ + if (IOBUF_WAS_REDUCED(out->size)) { + size_t new_size = IOBUF_RESTORE_SIZE(out->size); + if (DEBUG_GTE(IO, 4)) { + const char *name = out == &iobuf.out ? "iobuf.out" + : out == &iobuf.msg ? "iobuf.msg" + : NULL; + if (name) { + rprintf(FINFO, "[%s] restored size of %s (+%d)\n", + who_am_i(), name, (int)(new_size - out->size)); + } + } + out->size = new_size; + } +} + /* Perform buffered input and output until specified conditions are met. When * given a "needed" read requirement, we'll return without doing any I/O if the * iobuf.in bytes are already available. When reading, we'll read as many @@ -508,13 +552,12 @@ static char *perform_io(size_t needed, int flags) * Also make sure it will fit in the free space at the end, or * else we need to shift some bytes. */ if (needed && iobuf.in.size < needed) { - if (!(iobuf.in.buf = realloc_array(iobuf.in.buf, char, needed))) - out_of_memory("perform_io"); + size_t new_size = ROUND_UP_1024(needed); if (DEBUG_GTE(IO, 4)) { - rprintf(FINFO, "[%s] resized input buffer from %ld to %ld bytes.\n", - who_am_i(), (long)iobuf.in.size, (long)needed); + rprintf(FINFO, "[%s] resizing input buffer from %ld to %ld bytes.\n", + who_am_i(), (long)iobuf.in.size, (long)new_size); } - iobuf.in.size = needed; + realloc_xbuf(&iobuf.in, new_size); } if (iobuf.in.size - iobuf.in.pos < needed || (iobuf.in.len < needed && iobuf.in.len < 1024 @@ -621,15 +664,6 @@ static char *perform_io(size_t needed, int flags) SIVAL(iobuf.out.buf + iobuf.raw_data_header_pos, 0, ((MPLEX_BASE + (int)MSG_DATA)<<24) + iobuf.out.len - 4); - if (iobuf.raw_data_header_pos + 4 > iobuf.out.size) { - int siz = (int)(iobuf.raw_data_header_pos + 4 - iobuf.out.size); - /* We used some of the overflow bytes, so move them. */ - if (DEBUG_GTE(IO, 4)) { - rprintf(FINFO, "[%s] wrap-bytes moved: %d (perform_io)\n", - who_am_i(), siz); - } - memcpy(iobuf.out.buf, iobuf.out.buf + iobuf.out.size, siz); - } if (DEBUG_GTE(IO, 1)) { rprintf(FINFO, "[%s] send_msg(%d, %ld)\n", @@ -640,6 +674,13 @@ static char *perform_io(size_t needed, int flags) iobuf.raw_data_header_pos = iobuf.raw_flushing_ends_before; if (iobuf.raw_data_header_pos >= iobuf.out.size) iobuf.raw_data_header_pos -= iobuf.out.size; + else if (iobuf.raw_data_header_pos + 4 > iobuf.out.size) { + /* The 4-byte header won't fit at the end of the buffer, + * so we'll temporarily reduce the output buffer's size + * and put the header at the start of the buffer. */ + reduce_iobuf_size(&iobuf.out, iobuf.raw_data_header_pos); + iobuf.raw_data_header_pos = 0; + } /* Yes, it is possible for this to make len > size for a while. */ iobuf.out.len += 4; } @@ -785,10 +826,12 @@ static char *perform_io(size_t needed, int flags) if (iobuf.raw_flushing_ends_before) iobuf.raw_flushing_ends_before -= out->size; out->pos = 0; + restore_iobuf_size(out); } else if (out->pos == iobuf.raw_flushing_ends_before) iobuf.raw_flushing_ends_before = 0; if ((out->len -= n) == empty_buf_len) { out->pos = 0; + restore_iobuf_size(out); if (empty_buf_len) iobuf.raw_data_header_pos = 0; } @@ -832,7 +875,7 @@ void noop_io_until_death(void) int send_msg(enum msgcode code, const char *buf, size_t len, int convert) { char *hdr; - size_t pos; + size_t needed, pos; BOOL want_debug = DEBUG_GTE(IO, 1) && convert >= 0 && (msgs2stderr || code != MSG_INFO); if (!OUT_MULTIPLEXED) @@ -841,21 +884,32 @@ int send_msg(enum msgcode code, const char *buf, size_t len, int convert) if (want_debug) rprintf(FINFO, "[%s] send_msg(%d, %ld)\n", who_am_i(), (int)code, (long)len); + /* When checking for enough free space for this message, we need to + * make sure that there is space for the 4-byte header, plus we'll + * assume that we may waste up to 3 bytes (if the header doesn't fit + * at the physical end of the buffer). */ #ifdef ICONV_OPTION if (convert > 0 && ic_send == (iconv_t)-1) convert = 0; if (convert > 0) { /* Ensuring double-size room leaves space for maximal conversion expansion. */ - if (iobuf.msg.len + len*2 + 4 > iobuf.msg.size) - perform_io(len*2 + 4, PIO_NEED_MSGROOM); + needed = len*2 + 4 + 3; } else #endif - if (iobuf.msg.len + len + 4 > iobuf.msg.size) - perform_io(len + 4, PIO_NEED_MSGROOM); + needed = len + 4 + 3; + if (iobuf.msg.len + needed > iobuf.msg.size) + perform_io(needed, PIO_NEED_MSGROOM); pos = iobuf.msg.pos + iobuf.msg.len; /* Must be set after any flushing. */ if (pos >= iobuf.msg.size) pos -= iobuf.msg.size; + else if (pos + 4 > iobuf.msg.size) { + /* The 4-byte header won't fit at the end of the buffer, + * so we'll temporarily reduce the message buffer's size + * and put the header at the start of the buffer. */ + reduce_iobuf_size(&iobuf.msg, pos); + pos = 0; + } hdr = iobuf.msg.buf + pos; iobuf.msg.len += 4; /* Allocate room for the coming header bytes. */ @@ -893,13 +947,6 @@ int send_msg(enum msgcode code, const char *buf, size_t len, int convert) } SIVAL(hdr, 0, ((MPLEX_BASE + (int)code)<<24) + len); - /* If the header used any overflow bytes, move them to the start. */ - if ((pos = hdr+4 - iobuf.msg.buf) > iobuf.msg.size) { - int siz = (int)(pos - iobuf.msg.size); - if (DEBUG_GTE(IO, 4)) - rprintf(FINFO, "[%s] wrap-bytes moved: %d (send_msg)\n", who_am_i(), siz); - memcpy(iobuf.msg.buf, iobuf.msg.buf + iobuf.msg.size, siz); - } if (want_debug && convert > 0) rprintf(FINFO, "[%s] converted msg len=%ld\n", who_am_i(), (long)len); @@ -1169,13 +1216,6 @@ BOOL io_start_buffering_out(int f_out) if (msgs2stderr && DEBUG_GTE(IO, 2)) rprintf(FINFO, "[%s] io_start_buffering_out(%d)\n", who_am_i(), f_out); - if (OUT_MULTIPLEXED && !iobuf.msg.buf) { - iobuf.msg.size = IO_BUFFER_SIZE - 4; - if (!(iobuf.msg.buf = new_array(char, iobuf.msg.size + 4))) - out_of_memory("io_start_buffering_out"); - iobuf.msg.pos = iobuf.msg.len = 0; - } - if (iobuf.out.buf) { if (iobuf.out_fd == -1) iobuf.out_fd = f_out; @@ -1184,11 +1224,7 @@ BOOL io_start_buffering_out(int f_out) return False; } - iobuf.out.size = IO_BUFFER_SIZE * 2 - 4; - /* The 4 overflow bytes makes some circular-buffer wrapping operations easier. */ - if (!(iobuf.out.buf = new_array(char, iobuf.out.size + 4))) - out_of_memory("io_start_buffering_out"); - iobuf.out.pos = iobuf.out.len = 0; + alloc_xbuf(&iobuf.out, ROUND_UP_1024(IO_BUFFER_SIZE * 2)); iobuf.out_fd = f_out; return True; @@ -1207,12 +1243,7 @@ BOOL io_start_buffering_in(int f_in) return False; } - iobuf.in.size = IO_BUFFER_SIZE; - if (!(iobuf.in.buf = new_array(char, iobuf.in.size))) - out_of_memory("io_start_buffering_in"); - - iobuf.in.pos = iobuf.in.len = 0; - + alloc_xbuf(&iobuf.in, ROUND_UP_1024(IO_BUFFER_SIZE)); iobuf.in_fd = f_in; return True; @@ -2130,6 +2161,9 @@ void io_start_multiplex_out(int fd) if (msgs2stderr && DEBUG_GTE(IO, 2)) rprintf(FINFO, "[%s] io_start_multiplex_out(%d)\n", who_am_i(), fd); + if (!iobuf.msg.buf) + alloc_xbuf(&iobuf.msg, ROUND_UP_1024(IO_BUFFER_SIZE)); + iobuf.out_empty_len = 4; /* See also OUT_MULTIPLEXED */ io_start_buffering_out(fd); diff --git a/rsync.c b/rsync.c index a3f9b950..3d45d01b 100644 --- a/rsync.c +++ b/rsync.c @@ -133,42 +133,53 @@ void setup_iconv(void) # endif } -/* Move any bytes in the overflow space to the start. This avoids any issue - * with a multibyte sequence that needs to span the end of the buffer. */ -static void wrap_overflow(xbuf *out, int siz) -{ - if (DEBUG_GTE(IO, 4)) - rprintf(FINFO, "[%s] wrap-bytes moved: %d (iconvbufs)\n", who_am_i(), siz); - memcpy(out->buf, out->buf + out->size, siz); -} - -/* This function converts the characters in the "in" xbuf into characters - * in the "out" xbuf. The "len" of the "in" xbuf is used starting from its - * "pos". The "size" of the "out" xbuf restricts how many characters can be - * stored, starting at its "pos+len" position. Note that the last byte of - * the buffer is never used, which reserves space for a terminating '\0'. - * If ICB_CIRCULAR_OUT is set, the output data can wrap around to the start, - * and the buf IS ASSUMED TO HAVE AN EXTRA 4 BYTES OF OVERFLOW SPACE at the - * end (the buffer will also not be expanded if it is already allocated). +/* This function converts the chars in the "in" xbuf into characters in the + * "out" xbuf. The ".len" chars of the "in" xbuf is used starting from its + * ".pos". The ".size" of the "out" xbuf restricts how many characters can + * be stored, starting at its ".pos+.len" position. Note that the last byte + * of the "out" xbuf is not used, which reserves space for a trailing '\0' + * (though it is up to the caller to store a trailing '\0', as needed). + * * We return a 0 on success or a -1 on error. An error also sets errno to * E2BIG, EILSEQ, or EINVAL (see below); otherwise errno will be set to 0. - * The "in" xbuf is altered to update "pos" and "len". The "out" xbuf has - * data appended, and its "len" incremented. If ICB_EXPAND_OUT is set in - * "flags", the "out" xbuf will also be allocated if empty, and expanded if - * too small (so E2BIG will not be returned). If ICB_INCLUDE_BAD is set in - * "flags", any badly-encoded chars are included verbatim in the "out" xbuf, - * so EILSEQ will not be returned. Likewise for ICB_INCLUDE_INCOMPLETE with - * respect to an incomplete multi-byte char at the end, which ensures that - * EINVAL is not returned. If ICB_INIT is set, the iconv() conversion state - * is initialized prior to processing the characters. */ + * The "in" xbuf is altered to update ".pos" and ".len". The "out" xbuf has + * data appended, and its ".len" incremented (see below for a ".size" note). + * + * If ICB_CIRCULAR_OUT is set in "flags", the chars going into the "out" xbuf + * can wrap around to the start, and the xbuf may have its ".size" reduced + * (presumably by 1 byte) if the iconv code doesn't have space to store a + * multi-byte character at the physical end of the ".buf" (though no reducing + * happens if ".pos" is < 1, since there is no room to wrap around). + * + * If ICB_EXPAND_OUT is set in "flags", the "out" xbuf will be allocated if + * empty, and (as long as ICB_CIRCULAR_OUT is not set) expanded if too small. + * This prevents the return of E2BIG (except for a circular xbuf). + * + * If ICB_INCLUDE_BAD is set in "flags", any badly-encoded chars are included + * verbatim in the "out" xbuf, so EILSEQ will not be returned. + * + * If ICB_INCLUDE_INCOMPLETE is set in "flags", any incomplete multi-byte + * chars are included, which ensures that EINVAL is not returned. + * + * If ICB_INIT is set, the iconv() conversion state is initialized prior to + * processing the characters. */ int iconvbufs(iconv_t ic, xbuf *in, xbuf *out, int flags) { ICONV_CONST char *ibuf; size_t icnt, ocnt, opos; char *obuf; - if (!out->size && flags & ICB_EXPAND_OUT) - alloc_xbuf(out, 1024); + if (!out->size && flags & ICB_EXPAND_OUT) { + size_t siz = ROUND_UP_1024(in->len * 2); + alloc_xbuf(out, siz); + } else if (out->len+1 >= out->size) { + /* There is no room to even start storing data. */ + if (!(flags & ICB_EXPAND_OUT) || flags & ICB_CIRCULAR_OUT) { + errno = E2BIG; + return -1; + } + realloc_xbuf(out, out->size + ROUND_UP_1024(in->len * 2)); + } if (flags & ICB_INIT) iconv(ic, NULL, 0, NULL, 0); @@ -180,12 +191,13 @@ int iconvbufs(iconv_t ic, xbuf *in, xbuf *out, int flags) if (flags & ICB_CIRCULAR_OUT) { if (opos >= out->size) { opos -= out->size; + /* We know that out->pos is not 0 due to the "no room" check + * above, so this can't go "negative". */ ocnt = out->pos - opos - 1; } else { - /* We only make use of the 4 bytes of overflow buffer - * if there is room to move the bytes to the start of - * the circular buffer. */ - ocnt = out->size - opos + MIN((ssize_t)out->pos - 1, 4); + /* Allow the use of all bytes to the physical end of the buffer + * unless pos is 0, in which case we reserve our trailing '\0'. */ + ocnt = out->size - opos - (out->pos ? 0 : 1); } } else ocnt = out->size - opos - 1; @@ -201,22 +213,32 @@ int iconvbufs(iconv_t ic, xbuf *in, xbuf *out, int flags) } else if (errno == EILSEQ) { if (!(flags & ICB_INCLUDE_BAD)) goto finish; - } else { + } else if (errno == E2BIG) { + size_t siz; opos = obuf - out->buf; - if (flags & ICB_CIRCULAR_OUT && opos > out->size) { - wrap_overflow(out, opos -= out->size); - obuf = out->buf + opos; - if ((ocnt = out->pos - opos - 1) > 0) - continue; + if (flags & ICB_CIRCULAR_OUT && out->pos > 1 && opos > out->pos) { + /* We are in a divided circular buffer at the physical + * end with room to wrap to the start. If iconv() refused + * to use one or more trailing bytes in the buffer, we + * set the size to ignore the unused bytes. */ + if (opos < out->size) + reduce_iobuf_size(out, opos); + obuf = out->buf; + ocnt = out->pos - 1; + continue; } if (!(flags & ICB_EXPAND_OUT) || flags & ICB_CIRCULAR_OUT) { errno = E2BIG; goto finish; } - realloc_xbuf(out, out->size + 1024); + siz = ROUND_UP_1024(in->len * 2); + realloc_xbuf(out, out->size + siz); obuf = out->buf + opos; - ocnt += 1024; + ocnt += siz; continue; + } else { + rsyserr(FERROR, errno, "unexpected error from iconv()"); + exit_cleanup(RERR_UNSUPPORTED); } *obuf++ = *ibuf++; ocnt--, icnt--; @@ -227,12 +249,8 @@ int iconvbufs(iconv_t ic, xbuf *in, xbuf *out, int flags) finish: opos = obuf - out->buf; - if (flags & ICB_CIRCULAR_OUT) { - if (opos > out->size) - wrap_overflow(out, opos - out->size); - else if (opos < out->pos) - opos += out->size; - } + if (flags & ICB_CIRCULAR_OUT && opos < out->pos) + opos += out->size; out->len = opos - out->pos; in->len = icnt; diff --git a/rsync.h b/rsync.h index abdb6e02..68f236dc 100644 --- a/rsync.h +++ b/rsync.h @@ -136,6 +136,8 @@ /* For compatibility with older rsyncs */ #define OLD_MAX_BLOCK_SIZE ((int32)1 << 29) +#define ROUND_UP_1024(siz) ((siz) & (1024-1) ? ((siz) | (1024-1)) + 1 : (siz)) + #define IOERR_GENERAL (1<<0) /* For backward compatibility, this must == 1 */ #define IOERR_VANISHED (1<<1) #define IOERR_DEL_LIMIT (1<<2)