Commit | Line | Data |
---|---|---|
f264662f WD |
1 | This patch causes the sender to segment its use of the block-finding |
2 | hashtable for really large files. This avoids overloading the | |
3 | hashtable, and the ensuing slowdown that ensues from trying to find | |
4 | blocks in an overloaded hashtable. This does mean that the sender | |
5 | won't notice some migrations of data past segment boundaries, but since | |
6 | this only affects files with ~1.6GB or more data, and the blocksize is | |
7 | already so large that we only find really large sequences of matching | |
8 | data anyway, I don't consider this that big of a loss. | |
9 | ||
10 | I also decreased the MAX_BLOCK_SIZE value to something more reasonable. | |
11 | ||
12 | To use this patch, run these commands for a successful build: | |
13 | ||
14 | patch -p1 <patches/segment_large_hash.diff | |
15 | ./configure (optional if already run) | |
16 | make | |
17 | ||
18 | --- old/match.c | |
19 | +++ new/match.c | |
20 | @@ -46,9 +46,9 @@ static int32 *hash_table; | |
21 | #define SUM2HASH2(s1,s2) (((s1) + (s2)) & 0xFFFF) | |
22 | #define SUM2HASH(sum) SUM2HASH2((sum)&0xFFFF,(sum)>>16) | |
23 | ||
24 | -static void build_hash_table(struct sum_struct *s) | |
25 | +static int32 build_hash_table(struct sum_struct *s, int32 start) | |
26 | { | |
27 | - int32 i; | |
28 | + int32 i, end = s->count; | |
29 | ||
30 | if (!hash_table) { | |
31 | hash_table = new_array(int32, TABLESIZE); | |
32 | @@ -58,11 +58,21 @@ static void build_hash_table(struct sum_ | |
33 | ||
34 | memset(hash_table, 0xFF, TABLESIZE * sizeof hash_table[0]); | |
35 | ||
36 | - for (i = 0; i < s->count; i++) { | |
37 | + if (end - start > TABLESIZE*8/10) | |
38 | + end = start + TABLESIZE*8/10; | |
39 | + | |
40 | + for (i = start; i < end; i++) { | |
41 | uint32 t = SUM2HASH(s->sums[i].sum1); | |
42 | s->sums[i].chain = hash_table[t]; | |
43 | hash_table[t] = i; | |
44 | } | |
45 | + | |
46 | + if (verbose > 2) { | |
47 | + rprintf(FINFO, "built hash table for entries %ld - %ld\n", | |
48 | + (long)start, (long)end - 1); | |
49 | + } | |
50 | + | |
51 | + return end; | |
52 | } | |
53 | ||
54 | ||
55 | @@ -120,8 +130,8 @@ static void matched(int f, struct sum_st | |
56 | static void hash_search(int f,struct sum_struct *s, | |
57 | struct map_struct *buf, OFF_T len) | |
58 | { | |
59 | - OFF_T offset, end; | |
60 | - int32 k, want_i, backup; | |
61 | + OFF_T offset, end, reset = 0; | |
62 | + int32 k, want_i, backup, sum_pos = 0; | |
63 | char sum2[SUM_LENGTH]; | |
64 | uint32 s1, s2, sum; | |
65 | int more; | |
66 | @@ -159,6 +169,11 @@ static void hash_search(int f,struct sum | |
67 | int done_csum2 = 0; | |
68 | int32 i; | |
69 | ||
70 | + if (offset >= reset) { | |
71 | + sum_pos = build_hash_table(s, sum_pos); | |
72 | + reset = sum_pos * s->blength; | |
73 | + } | |
74 | + | |
75 | if (verbose > 4) { | |
76 | rprintf(FINFO, "offset=%.0f sum=%04x%04x\n", | |
77 | (double)offset, s2 & 0xFFFF, s1 & 0xFFFF); | |
78 | @@ -336,11 +351,6 @@ void match_sums(int f, struct sum_struct | |
79 | } | |
80 | ||
81 | if (len > 0 && s->count > 0) { | |
82 | - build_hash_table(s); | |
83 | - | |
84 | - if (verbose > 2) | |
85 | - rprintf(FINFO,"built hash table\n"); | |
86 | - | |
87 | hash_search(f, s, buf, len); | |
88 | ||
89 | if (verbose > 2) | |
90 | --- old/rsync.h | |
91 | +++ new/rsync.h | |
92 | @@ -121,7 +121,7 @@ | |
93 | #define CHUNK_SIZE (32*1024) | |
94 | #define MAX_MAP_SIZE (256*1024) | |
95 | #define IO_BUFFER_SIZE (4092) | |
96 | -#define MAX_BLOCK_SIZE ((int32)1 << 29) | |
97 | +#define MAX_BLOCK_SIZE ((int32)1 << 17) | |
98 | ||
99 | #define IOERR_GENERAL (1<<0) /* For backward compatibility, this must == 1 */ | |
100 | #define IOERR_VANISHED (1<<1) |