Changed "%0x" in an rprintf() to "%02x".
[rsync/rsync-patches.git] / dynamic_hash.diff
CommitLineData
702a8903
WD
1This patch makes the processing of large really large files more efficient
2by making sure that the sender's hash table is large enough to hold all the
e1a06002
WD
3checksum entries without being overloaded. Unfortunately, the code adds a
4modulus calculation for (up to) every byte of the source file, which slows
5down the code for normal file sizes (e.g. 4 CPU seconds slower on a Pentium
6III when copying a 65 MB file without very much matching data).
702a8903 7
e1a06002
WD
8This was udapted for the latest codebase from a patch written by Shachar
9Shemesh.
702a8903
WD
10
11--- old/match.c
12+++ new/match.c
e1a06002 13@@ -37,24 +37,31 @@ static int total_matches;
702a8903
WD
14
15 extern struct stats stats;
16
e1a06002 17-#define TABLESIZE (1<<16)
702a8903 18-
c541912f 19+static uint32 tablesize;
e1a06002 20 static int32 *hash_table;
702a8903 21
e1a06002
WD
22-#define SUM2HASH2(s1,s2) (((s1) + (s2)) & 0xFFFF)
23-#define SUM2HASH(sum) SUM2HASH2((sum)&0xFFFF,(sum)>>16)
24+#define SUM2HASH(sum) ((sum)%tablesize)
702a8903
WD
25
26 static void build_hash_table(struct sum_struct *s)
27 {
c541912f
WD
28 int32 i;
29+ uint32 prior_size = tablesize;
702a8903 30
e1a06002
WD
31- if (!hash_table) {
32- hash_table = new_array(int32, TABLESIZE);
702a8903 33+ /* Dynamically calculate the hash table size so that the hash load
2b1e5f60
WD
34+ * for big files is about 80%. This number must be odd or s2 will
35+ * not be able to span the entire set. */
c541912f 36+ tablesize = (uint32)(s->count/8) * 10 + 11;
702a8903
WD
37+ if (tablesize < 65537)
38+ tablesize = 65537; /* a prime number */
2b1e5f60 39+ if (tablesize != prior_size) {
e1a06002
WD
40+ if (hash_table)
41+ free(hash_table);
42+ hash_table = new_array(int32, tablesize);
43 if (!hash_table)
44 out_of_memory("build_hash_table");
702a8903
WD
45 }
46
e1a06002
WD
47- memset(hash_table, 0xFF, TABLESIZE * sizeof hash_table[0]);
48+ memset(hash_table, 0xFF, tablesize * sizeof hash_table[0]);
702a8903 49
e1a06002
WD
50 for (i = 0; i < s->count; i++) {
51 uint32 t = SUM2HASH(s->sums[i].sum1);
52@@ -162,11 +169,11 @@ static void hash_search(int f,struct sum
53 (double)offset, s2 & 0xFFFF, s1 & 0xFFFF);
54 }
702a8903 55
e1a06002
WD
56- i = hash_table[SUM2HASH2(s1,s2)];
57+ sum = (s1 & 0xffff) | (s2 << 16);
58+ i = hash_table[SUM2HASH(sum)];
59 if (i < 0)
60 goto null_hash;
1db27b7c
WD
61
62- sum = (s1 & 0xffff) | (s2 << 16);
e1a06002 63 hash_hits++;
1db27b7c 64 do {
e1a06002 65 int32 l;