Commit | Line | Data |
---|---|---|
702a8903 WD |
1 | This patch makes the processing of large really large files more efficient |
2 | by making sure that the sender's hash table is large enough to hold all the | |
e1a06002 WD |
3 | checksum entries without being overloaded. Unfortunately, the code adds a |
4 | modulus calculation for (up to) every byte of the source file, which slows | |
5 | down the code for normal file sizes (e.g. 4 CPU seconds slower on a Pentium | |
6 | III when copying a 65 MB file without very much matching data). | |
702a8903 | 7 | |
c62af8d8 | 8 | This was updated for the latest codebase from a patch written by Shachar |
e1a06002 | 9 | Shemesh. |
702a8903 WD |
10 | |
11 | --- old/match.c | |
12 | +++ new/match.c | |
e1a06002 | 13 | @@ -37,24 +37,31 @@ static int total_matches; |
702a8903 WD |
14 | |
15 | extern struct stats stats; | |
16 | ||
e1a06002 | 17 | -#define TABLESIZE (1<<16) |
702a8903 | 18 | - |
c541912f | 19 | +static uint32 tablesize; |
e1a06002 | 20 | static int32 *hash_table; |
702a8903 | 21 | |
e1a06002 WD |
22 | -#define SUM2HASH2(s1,s2) (((s1) + (s2)) & 0xFFFF) |
23 | -#define SUM2HASH(sum) SUM2HASH2((sum)&0xFFFF,(sum)>>16) | |
24 | +#define SUM2HASH(sum) ((sum)%tablesize) | |
702a8903 WD |
25 | |
26 | static void build_hash_table(struct sum_struct *s) | |
27 | { | |
c541912f WD |
28 | int32 i; |
29 | + uint32 prior_size = tablesize; | |
702a8903 | 30 | |
e1a06002 WD |
31 | - if (!hash_table) { |
32 | - hash_table = new_array(int32, TABLESIZE); | |
702a8903 | 33 | + /* Dynamically calculate the hash table size so that the hash load |
2b1e5f60 WD |
34 | + * for big files is about 80%. This number must be odd or s2 will |
35 | + * not be able to span the entire set. */ | |
c541912f | 36 | + tablesize = (uint32)(s->count/8) * 10 + 11; |
702a8903 WD |
37 | + if (tablesize < 65537) |
38 | + tablesize = 65537; /* a prime number */ | |
2b1e5f60 | 39 | + if (tablesize != prior_size) { |
e1a06002 WD |
40 | + if (hash_table) |
41 | + free(hash_table); | |
42 | + hash_table = new_array(int32, tablesize); | |
43 | if (!hash_table) | |
44 | out_of_memory("build_hash_table"); | |
702a8903 WD |
45 | } |
46 | ||
e1a06002 WD |
47 | - memset(hash_table, 0xFF, TABLESIZE * sizeof hash_table[0]); |
48 | + memset(hash_table, 0xFF, tablesize * sizeof hash_table[0]); | |
702a8903 | 49 | |
e1a06002 WD |
50 | for (i = 0; i < s->count; i++) { |
51 | uint32 t = SUM2HASH(s->sums[i].sum1); | |
52 | @@ -162,11 +169,11 @@ static void hash_search(int f,struct sum | |
53 | (double)offset, s2 & 0xFFFF, s1 & 0xFFFF); | |
54 | } | |
702a8903 | 55 | |
e1a06002 WD |
56 | - i = hash_table[SUM2HASH2(s1,s2)]; |
57 | + sum = (s1 & 0xffff) | (s2 << 16); | |
58 | + i = hash_table[SUM2HASH(sum)]; | |
59 | if (i < 0) | |
60 | goto null_hash; | |
1db27b7c WD |
61 | |
62 | - sum = (s1 & 0xffff) | (s2 << 16); | |
e1a06002 | 63 | hash_hits++; |
1db27b7c | 64 | do { |
e1a06002 | 65 | int32 l; |