Fixed a failing hunk.
[rsync/rsync-patches.git] / dynamic_hash.diff
CommitLineData
702a8903
WD
1This patch makes the processing of large really large files more efficient
2by making sure that the sender's hash table is large enough to hold all the
e1a06002
WD
3checksum entries without being overloaded. Unfortunately, the code adds a
4modulus calculation for (up to) every byte of the source file, which slows
5down the code for normal file sizes (e.g. 4 CPU seconds slower on a Pentium
6III when copying a 65 MB file without very much matching data).
702a8903 7
c62af8d8 8This was updated for the latest codebase from a patch written by Shachar
e1a06002 9Shemesh.
702a8903 10
03019e41
WD
11To use this patch, run these commands for a successful build:
12
13 patch -p1 <patches/dynamic_hash.diff
14 ./configure (optional if already run)
15 make
16
702a8903
WD
17--- old/match.c
18+++ new/match.c
03019e41 19@@ -40,24 +40,31 @@ static int total_matches;
702a8903
WD
20
21 extern struct stats stats;
22
e1a06002 23-#define TABLESIZE (1<<16)
702a8903 24-
c541912f 25+static uint32 tablesize;
e1a06002 26 static int32 *hash_table;
702a8903 27
e1a06002
WD
28-#define SUM2HASH2(s1,s2) (((s1) + (s2)) & 0xFFFF)
29-#define SUM2HASH(sum) SUM2HASH2((sum)&0xFFFF,(sum)>>16)
30+#define SUM2HASH(sum) ((sum)%tablesize)
702a8903
WD
31
32 static void build_hash_table(struct sum_struct *s)
33 {
c541912f
WD
34 int32 i;
35+ uint32 prior_size = tablesize;
702a8903 36
e1a06002
WD
37- if (!hash_table) {
38- hash_table = new_array(int32, TABLESIZE);
702a8903 39+ /* Dynamically calculate the hash table size so that the hash load
2b1e5f60
WD
40+ * for big files is about 80%. This number must be odd or s2 will
41+ * not be able to span the entire set. */
c541912f 42+ tablesize = (uint32)(s->count/8) * 10 + 11;
702a8903
WD
43+ if (tablesize < 65537)
44+ tablesize = 65537; /* a prime number */
2b1e5f60 45+ if (tablesize != prior_size) {
e1a06002
WD
46+ if (hash_table)
47+ free(hash_table);
48+ hash_table = new_array(int32, tablesize);
49 if (!hash_table)
50 out_of_memory("build_hash_table");
702a8903
WD
51 }
52
e1a06002
WD
53- memset(hash_table, 0xFF, TABLESIZE * sizeof hash_table[0]);
54+ memset(hash_table, 0xFF, tablesize * sizeof hash_table[0]);
702a8903 55
e1a06002
WD
56 for (i = 0; i < s->count; i++) {
57 uint32 t = SUM2HASH(s->sums[i].sum1);
03019e41 58@@ -165,11 +172,11 @@ static void hash_search(int f,struct sum
e1a06002
WD
59 (double)offset, s2 & 0xFFFF, s1 & 0xFFFF);
60 }
702a8903 61
e1a06002
WD
62- i = hash_table[SUM2HASH2(s1,s2)];
63+ sum = (s1 & 0xffff) | (s2 << 16);
64+ i = hash_table[SUM2HASH(sum)];
65 if (i < 0)
66 goto null_hash;
1db27b7c
WD
67
68- sum = (s1 & 0xffff) | (s2 << 16);
e1a06002 69 hash_hits++;
1db27b7c 70 do {
e1a06002 71 int32 l;