From 4c17cdcb64e3f333f141b43b3193a19eece4c8a1 Mon Sep 17 00:00:00 2001 From: Wayne Davison Date: Fri, 26 Oct 2007 22:11:19 +0000 Subject: [PATCH] Chunk a really large file to avoid sender-side hash-table overload. --- match.c | 30 ++++++++++++++++++++---------- rsync.h | 2 +- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/match.c b/match.c index 8367d158..01e91173 100644 --- a/match.c +++ b/match.c @@ -46,9 +46,9 @@ static int32 *hash_table; #define SUM2HASH2(s1,s2) (((s1) + (s2)) & 0xFFFF) #define SUM2HASH(sum) SUM2HASH2((sum)&0xFFFF,(sum)>>16) -static void build_hash_table(struct sum_struct *s) +static int32 build_hash_table(struct sum_struct *s, int32 start) { - int32 i; + int32 i, end = s->count; if (!hash_table) { hash_table = new_array(int32, TABLESIZE); @@ -58,11 +58,21 @@ static void build_hash_table(struct sum_struct *s) memset(hash_table, 0xFF, TABLESIZE * sizeof hash_table[0]); - for (i = 0; i < s->count; i++) { + if (end - start > TABLESIZE*8/10) + end = start + TABLESIZE*8/10; + + for (i = start; i < end; i++) { uint32 t = SUM2HASH(s->sums[i].sum1); s->sums[i].chain = hash_table[t]; hash_table[t] = i; } + + if (verbose > 2) { + rprintf(FINFO, "built hash table for entries %ld - %ld\n", + (long)start, (long)end - 1); + } + + return end; } @@ -120,8 +130,8 @@ static void matched(int f, struct sum_struct *s, struct map_struct *buf, static void hash_search(int f,struct sum_struct *s, struct map_struct *buf, OFF_T len) { - OFF_T offset, end; - int32 k, want_i, backup; + OFF_T offset, end, reset = 0; + int32 k, want_i, backup, sum_pos = 0; char sum2[SUM_LENGTH]; uint32 s1, s2, sum; int more; @@ -159,6 +169,11 @@ static void hash_search(int f,struct sum_struct *s, int done_csum2 = 0; int32 i; + if (offset >= reset) { + sum_pos = build_hash_table(s, sum_pos); + reset = sum_pos * s->blength; + } + if (verbose > 4) { rprintf(FINFO, "offset=%.0f sum=%04x%04x\n", (double)offset, s2 & 0xFFFF, s1 & 0xFFFF); @@ -336,11 +351,6 @@ void match_sums(int f, struct sum_struct *s, struct map_struct *buf, OFF_T len) } if (len > 0 && s->count > 0) { - build_hash_table(s); - - if (verbose > 2) - rprintf(FINFO,"built hash table\n"); - hash_search(f, s, buf, len); if (verbose > 2) diff --git a/rsync.h b/rsync.h index d5111b3c..78d51c1d 100644 --- a/rsync.h +++ b/rsync.h @@ -122,7 +122,7 @@ #define CHUNK_SIZE (32*1024) #define MAX_MAP_SIZE (256*1024) #define IO_BUFFER_SIZE (4092) -#define MAX_BLOCK_SIZE ((int32)1 << 29) +#define MAX_BLOCK_SIZE ((int32)1 << 17) #define IOERR_GENERAL (1<<0) /* For backward compatibility, this must == 1 */ #define IOERR_VANISHED (1<<1) -- 2.34.1