Commit | Line | Data |
---|---|---|
702a8903 WD |
1 | This patch makes the processing of large really large files more efficient |
2 | by making sure that the sender's hash table is large enough to hold all the | |
84495354 WD |
3 | checksum entries without being overloaded. |
4 | ||
5 | Updated to use the current hashtable method when possible, and the new | |
6 | hashtable method (which requires a modulus calculation for up to every byte | |
7 | of the source file) only on large files that need a larger hashtable size. | |
8 | This avoids slowing down files that don't need the extra-large hashtable. | |
702a8903 | 9 | |
c62af8d8 | 10 | This was updated for the latest codebase from a patch written by Shachar |
e1a06002 | 11 | Shemesh. |
702a8903 | 12 | |
03019e41 WD |
13 | To use this patch, run these commands for a successful build: |
14 | ||
15 | patch -p1 <patches/dynamic_hash.diff | |
16 | ./configure (optional if already run) | |
17 | make | |
18 | ||
702a8903 WD |
19 | --- old/match.c |
20 | +++ new/match.c | |
84495354 | 21 | @@ -39,29 +39,50 @@ static int total_matches; |
702a8903 WD |
22 | |
23 | extern struct stats stats; | |
24 | ||
e1a06002 | 25 | -#define TABLESIZE (1<<16) |
84495354 WD |
26 | +#define TRADITIONAL_TABLESIZE (1<<16) |
27 | ||
c541912f | 28 | +static uint32 tablesize; |
e1a06002 | 29 | static int32 *hash_table; |
702a8903 | 30 | |
84495354 WD |
31 | #define SUM2HASH2(s1,s2) (((s1) + (s2)) & 0xFFFF) |
32 | #define SUM2HASH(sum) SUM2HASH2((sum)&0xFFFF,(sum)>>16) | |
702a8903 | 33 | |
84495354 WD |
34 | +#define BIG_SUM2HASH(sum) ((sum)%tablesize) |
35 | + | |
702a8903 WD |
36 | static void build_hash_table(struct sum_struct *s) |
37 | { | |
84495354 | 38 | + static uint32 alloc_size; |
c541912f | 39 | int32 i; |
702a8903 | 40 | |
e1a06002 WD |
41 | - if (!hash_table) { |
42 | - hash_table = new_array(int32, TABLESIZE); | |
702a8903 | 43 | + /* Dynamically calculate the hash table size so that the hash load |
2b1e5f60 WD |
44 | + * for big files is about 80%. This number must be odd or s2 will |
45 | + * not be able to span the entire set. */ | |
c541912f | 46 | + tablesize = (uint32)(s->count/8) * 10 + 11; |
84495354 WD |
47 | + if (tablesize < TRADITIONAL_TABLESIZE) |
48 | + tablesize = TRADITIONAL_TABLESIZE; | |
49 | + if (tablesize > alloc_size || tablesize < alloc_size - 16*1024) { | |
e1a06002 WD |
50 | + if (hash_table) |
51 | + free(hash_table); | |
52 | + hash_table = new_array(int32, tablesize); | |
53 | if (!hash_table) | |
54 | out_of_memory("build_hash_table"); | |
84495354 | 55 | + alloc_size = tablesize; |
702a8903 WD |
56 | } |
57 | ||
e1a06002 WD |
58 | - memset(hash_table, 0xFF, TABLESIZE * sizeof hash_table[0]); |
59 | + memset(hash_table, 0xFF, tablesize * sizeof hash_table[0]); | |
702a8903 | 60 | |
84495354 WD |
61 | - for (i = 0; i < s->count; i++) { |
62 | - uint32 t = SUM2HASH(s->sums[i].sum1); | |
63 | - s->sums[i].chain = hash_table[t]; | |
64 | - hash_table[t] = i; | |
65 | + if (tablesize == TRADITIONAL_TABLESIZE) { | |
66 | + for (i = 0; i < s->count; i++) { | |
67 | + uint32 t = SUM2HASH(s->sums[i].sum1); | |
68 | + s->sums[i].chain = hash_table[t]; | |
69 | + hash_table[t] = i; | |
70 | + } | |
71 | + } else { | |
72 | + for (i = 0; i < s->count; i++) { | |
73 | + uint32 t = BIG_SUM2HASH(s->sums[i].sum1); | |
74 | + s->sums[i].chain = hash_table[t]; | |
75 | + hash_table[t] = i; | |
76 | + } | |
77 | } | |
78 | } | |
79 | ||
80 | @@ -164,11 +185,16 @@ static void hash_search(int f,struct sum | |
e1a06002 WD |
81 | (double)offset, s2 & 0xFFFF, s1 & 0xFFFF); |
82 | } | |
702a8903 | 83 | |
e1a06002 | 84 | - i = hash_table[SUM2HASH2(s1,s2)]; |
84495354 WD |
85 | - if (i < 0) |
86 | - goto null_hash; | |
87 | + if (tablesize == TRADITIONAL_TABLESIZE) { | |
88 | + if ((i = hash_table[SUM2HASH2(s1,s2)]) < 0) | |
89 | + goto null_hash; | |
90 | + sum = (s1 & 0xffff) | (s2 << 16); | |
91 | + } else { | |
92 | + sum = (s1 & 0xffff) | (s2 << 16); | |
93 | + if ((i = hash_table[BIG_SUM2HASH(sum)]) < 0) | |
94 | + goto null_hash; | |
95 | + } | |
1db27b7c WD |
96 | |
97 | - sum = (s1 & 0xffff) | (s2 << 16); | |
e1a06002 | 98 | hash_hits++; |
1db27b7c | 99 | do { |
e1a06002 | 100 | int32 l; |