Commit | Line | Data |
---|---|---|
702a8903 WD |
1 | This patch makes the processing of large really large files more efficient |
2 | by making sure that the sender's hash table is large enough to hold all the | |
e1a06002 WD |
3 | checksum entries without being overloaded. Unfortunately, the code adds a |
4 | modulus calculation for (up to) every byte of the source file, which slows | |
5 | down the code for normal file sizes (e.g. 4 CPU seconds slower on a Pentium | |
6 | III when copying a 65 MB file without very much matching data). | |
702a8903 | 7 | |
c62af8d8 | 8 | This was updated for the latest codebase from a patch written by Shachar |
e1a06002 | 9 | Shemesh. |
702a8903 | 10 | |
03019e41 WD |
11 | To use this patch, run these commands for a successful build: |
12 | ||
13 | patch -p1 <patches/dynamic_hash.diff | |
14 | ./configure (optional if already run) | |
15 | make | |
16 | ||
702a8903 WD |
17 | --- old/match.c |
18 | +++ new/match.c | |
03019e41 | 19 | @@ -40,24 +40,31 @@ static int total_matches; |
702a8903 WD |
20 | |
21 | extern struct stats stats; | |
22 | ||
e1a06002 | 23 | -#define TABLESIZE (1<<16) |
702a8903 | 24 | - |
c541912f | 25 | +static uint32 tablesize; |
e1a06002 | 26 | static int32 *hash_table; |
702a8903 | 27 | |
e1a06002 WD |
28 | -#define SUM2HASH2(s1,s2) (((s1) + (s2)) & 0xFFFF) |
29 | -#define SUM2HASH(sum) SUM2HASH2((sum)&0xFFFF,(sum)>>16) | |
30 | +#define SUM2HASH(sum) ((sum)%tablesize) | |
702a8903 WD |
31 | |
32 | static void build_hash_table(struct sum_struct *s) | |
33 | { | |
c541912f WD |
34 | int32 i; |
35 | + uint32 prior_size = tablesize; | |
702a8903 | 36 | |
e1a06002 WD |
37 | - if (!hash_table) { |
38 | - hash_table = new_array(int32, TABLESIZE); | |
702a8903 | 39 | + /* Dynamically calculate the hash table size so that the hash load |
2b1e5f60 WD |
40 | + * for big files is about 80%. This number must be odd or s2 will |
41 | + * not be able to span the entire set. */ | |
c541912f | 42 | + tablesize = (uint32)(s->count/8) * 10 + 11; |
702a8903 WD |
43 | + if (tablesize < 65537) |
44 | + tablesize = 65537; /* a prime number */ | |
2b1e5f60 | 45 | + if (tablesize != prior_size) { |
e1a06002 WD |
46 | + if (hash_table) |
47 | + free(hash_table); | |
48 | + hash_table = new_array(int32, tablesize); | |
49 | if (!hash_table) | |
50 | out_of_memory("build_hash_table"); | |
702a8903 WD |
51 | } |
52 | ||
e1a06002 WD |
53 | - memset(hash_table, 0xFF, TABLESIZE * sizeof hash_table[0]); |
54 | + memset(hash_table, 0xFF, tablesize * sizeof hash_table[0]); | |
702a8903 | 55 | |
e1a06002 WD |
56 | for (i = 0; i < s->count; i++) { |
57 | uint32 t = SUM2HASH(s->sums[i].sum1); | |
03019e41 | 58 | @@ -165,11 +172,11 @@ static void hash_search(int f,struct sum |
e1a06002 WD |
59 | (double)offset, s2 & 0xFFFF, s1 & 0xFFFF); |
60 | } | |
702a8903 | 61 | |
e1a06002 WD |
62 | - i = hash_table[SUM2HASH2(s1,s2)]; |
63 | + sum = (s1 & 0xffff) | (s2 << 16); | |
64 | + i = hash_table[SUM2HASH(sum)]; | |
65 | if (i < 0) | |
66 | goto null_hash; | |
1db27b7c WD |
67 | |
68 | - sum = (s1 & 0xffff) | (s2 << 16); | |
e1a06002 | 69 | hash_hits++; |
1db27b7c | 70 | do { |
e1a06002 | 71 | int32 l; |