Fixed a failing hunk.
[rsync/rsync-patches.git] / dynamic_hash.diff
... / ...
CommitLineData
1This patch makes the processing of large really large files more efficient
2by making sure that the sender's hash table is large enough to hold all the
3checksum entries without being overloaded. Unfortunately, the code adds a
4modulus calculation for (up to) every byte of the source file, which slows
5down the code for normal file sizes (e.g. 4 CPU seconds slower on a Pentium
6III when copying a 65 MB file without very much matching data).
7
8This was updated for the latest codebase from a patch written by Shachar
9Shemesh.
10
11To use this patch, run these commands for a successful build:
12
13 patch -p1 <patches/dynamic_hash.diff
14 ./configure (optional if already run)
15 make
16
17--- old/match.c
18+++ new/match.c
19@@ -40,24 +40,31 @@ static int total_matches;
20
21 extern struct stats stats;
22
23-#define TABLESIZE (1<<16)
24-
25+static uint32 tablesize;
26 static int32 *hash_table;
27
28-#define SUM2HASH2(s1,s2) (((s1) + (s2)) & 0xFFFF)
29-#define SUM2HASH(sum) SUM2HASH2((sum)&0xFFFF,(sum)>>16)
30+#define SUM2HASH(sum) ((sum)%tablesize)
31
32 static void build_hash_table(struct sum_struct *s)
33 {
34 int32 i;
35+ uint32 prior_size = tablesize;
36
37- if (!hash_table) {
38- hash_table = new_array(int32, TABLESIZE);
39+ /* Dynamically calculate the hash table size so that the hash load
40+ * for big files is about 80%. This number must be odd or s2 will
41+ * not be able to span the entire set. */
42+ tablesize = (uint32)(s->count/8) * 10 + 11;
43+ if (tablesize < 65537)
44+ tablesize = 65537; /* a prime number */
45+ if (tablesize != prior_size) {
46+ if (hash_table)
47+ free(hash_table);
48+ hash_table = new_array(int32, tablesize);
49 if (!hash_table)
50 out_of_memory("build_hash_table");
51 }
52
53- memset(hash_table, 0xFF, TABLESIZE * sizeof hash_table[0]);
54+ memset(hash_table, 0xFF, tablesize * sizeof hash_table[0]);
55
56 for (i = 0; i < s->count; i++) {
57 uint32 t = SUM2HASH(s->sums[i].sum1);
58@@ -165,11 +172,11 @@ static void hash_search(int f,struct sum
59 (double)offset, s2 & 0xFFFF, s1 & 0xFFFF);
60 }
61
62- i = hash_table[SUM2HASH2(s1,s2)];
63+ sum = (s1 & 0xffff) | (s2 << 16);
64+ i = hash_table[SUM2HASH(sum)];
65 if (i < 0)
66 goto null_hash;
67
68- sum = (s1 & 0xffff) | (s2 << 16);
69 hash_hits++;
70 do {
71 int32 l;