Fixed failing hunks.
[rsync/rsync-patches.git] / segment_large_hash.diff
CommitLineData
f264662f
WD
1This patch causes the sender to segment its use of the block-finding
2hashtable for really large files. This avoids overloading the
3hashtable, and the ensuing slowdown that ensues from trying to find
4blocks in an overloaded hashtable. This does mean that the sender
5won't notice some migrations of data past segment boundaries, but since
6this only affects files with ~1.6GB or more data, and the blocksize is
7already so large that we only find really large sequences of matching
8data anyway, I don't consider this that big of a loss.
9
10I also decreased the MAX_BLOCK_SIZE value to something more reasonable.
11
12To use this patch, run these commands for a successful build:
13
14 patch -p1 <patches/segment_large_hash.diff
15 ./configure (optional if already run)
16 make
17
18--- old/match.c
19+++ new/match.c
20@@ -46,9 +46,9 @@ static int32 *hash_table;
21 #define SUM2HASH2(s1,s2) (((s1) + (s2)) & 0xFFFF)
22 #define SUM2HASH(sum) SUM2HASH2((sum)&0xFFFF,(sum)>>16)
23
24-static void build_hash_table(struct sum_struct *s)
25+static int32 build_hash_table(struct sum_struct *s, int32 start)
26 {
27- int32 i;
28+ int32 i, end = s->count;
29
30 if (!hash_table) {
31 hash_table = new_array(int32, TABLESIZE);
32@@ -58,11 +58,21 @@ static void build_hash_table(struct sum_
33
34 memset(hash_table, 0xFF, TABLESIZE * sizeof hash_table[0]);
35
36- for (i = 0; i < s->count; i++) {
37+ if (end - start > TABLESIZE*8/10)
38+ end = start + TABLESIZE*8/10;
39+
40+ for (i = start; i < end; i++) {
41 uint32 t = SUM2HASH(s->sums[i].sum1);
42 s->sums[i].chain = hash_table[t];
43 hash_table[t] = i;
44 }
45+
46+ if (verbose > 2) {
47+ rprintf(FINFO, "built hash table for entries %ld - %ld\n",
48+ (long)start, (long)end - 1);
49+ }
50+
51+ return end;
52 }
53
54
55@@ -120,8 +130,8 @@ static void matched(int f, struct sum_st
56 static void hash_search(int f,struct sum_struct *s,
57 struct map_struct *buf, OFF_T len)
58 {
59- OFF_T offset, end;
60- int32 k, want_i, backup;
61+ OFF_T offset, end, reset = 0;
62+ int32 k, want_i, backup, sum_pos = 0;
63 char sum2[SUM_LENGTH];
64 uint32 s1, s2, sum;
65 int more;
66@@ -159,6 +169,11 @@ static void hash_search(int f,struct sum
67 int done_csum2 = 0;
68 int32 i;
69
70+ if (offset >= reset) {
71+ sum_pos = build_hash_table(s, sum_pos);
72+ reset = sum_pos * s->blength;
73+ }
74+
75 if (verbose > 4) {
76 rprintf(FINFO, "offset=%.0f sum=%04x%04x\n",
77 (double)offset, s2 & 0xFFFF, s1 & 0xFFFF);
78@@ -336,11 +351,6 @@ void match_sums(int f, struct sum_struct
79 }
80
81 if (len > 0 && s->count > 0) {
82- build_hash_table(s);
83-
84- if (verbose > 2)
85- rprintf(FINFO,"built hash table\n");
86-
87 hash_search(f, s, buf, len);
88
89 if (verbose > 2)
90--- old/rsync.h
91+++ new/rsync.h
92@@ -121,7 +121,7 @@
93 #define CHUNK_SIZE (32*1024)
94 #define MAX_MAP_SIZE (256*1024)
95 #define IO_BUFFER_SIZE (4092)
96-#define MAX_BLOCK_SIZE ((int32)1 << 29)
97+#define MAX_BLOCK_SIZE ((int32)1 << 17)
98
99 #define IOERR_GENERAL (1<<0) /* For backward compatibility, this must == 1 */
100 #define IOERR_VANISHED (1<<1)