Fixed failing hunks.
[rsync/rsync-patches.git] / dynamic_hash.diff
CommitLineData
702a8903
WD
1This patch makes the processing of large really large files more efficient
2by making sure that the sender's hash table is large enough to hold all the
84495354
WD
3checksum entries without being overloaded.
4
5Updated to use the current hashtable method when possible, and the new
6hashtable method (which requires a modulus calculation for up to every byte
7of the source file) only on large files that need a larger hashtable size.
8This avoids slowing down files that don't need the extra-large hashtable.
702a8903 9
c62af8d8 10This was updated for the latest codebase from a patch written by Shachar
e1a06002 11Shemesh.
702a8903 12
03019e41
WD
13To use this patch, run these commands for a successful build:
14
15 patch -p1 <patches/dynamic_hash.diff
16 ./configure (optional if already run)
17 make
18
702a8903
WD
19--- old/match.c
20+++ new/match.c
84495354 21@@ -39,29 +39,50 @@ static int total_matches;
702a8903
WD
22
23 extern struct stats stats;
24
e1a06002 25-#define TABLESIZE (1<<16)
84495354
WD
26+#define TRADITIONAL_TABLESIZE (1<<16)
27
c541912f 28+static uint32 tablesize;
e1a06002 29 static int32 *hash_table;
702a8903 30
84495354
WD
31 #define SUM2HASH2(s1,s2) (((s1) + (s2)) & 0xFFFF)
32 #define SUM2HASH(sum) SUM2HASH2((sum)&0xFFFF,(sum)>>16)
702a8903 33
84495354
WD
34+#define BIG_SUM2HASH(sum) ((sum)%tablesize)
35+
702a8903
WD
36 static void build_hash_table(struct sum_struct *s)
37 {
84495354 38+ static uint32 alloc_size;
c541912f 39 int32 i;
702a8903 40
e1a06002
WD
41- if (!hash_table) {
42- hash_table = new_array(int32, TABLESIZE);
702a8903 43+ /* Dynamically calculate the hash table size so that the hash load
2b1e5f60
WD
44+ * for big files is about 80%. This number must be odd or s2 will
45+ * not be able to span the entire set. */
c541912f 46+ tablesize = (uint32)(s->count/8) * 10 + 11;
84495354
WD
47+ if (tablesize < TRADITIONAL_TABLESIZE)
48+ tablesize = TRADITIONAL_TABLESIZE;
49+ if (tablesize > alloc_size || tablesize < alloc_size - 16*1024) {
e1a06002
WD
50+ if (hash_table)
51+ free(hash_table);
52+ hash_table = new_array(int32, tablesize);
53 if (!hash_table)
54 out_of_memory("build_hash_table");
84495354 55+ alloc_size = tablesize;
702a8903
WD
56 }
57
e1a06002
WD
58- memset(hash_table, 0xFF, TABLESIZE * sizeof hash_table[0]);
59+ memset(hash_table, 0xFF, tablesize * sizeof hash_table[0]);
702a8903 60
84495354
WD
61- for (i = 0; i < s->count; i++) {
62- uint32 t = SUM2HASH(s->sums[i].sum1);
63- s->sums[i].chain = hash_table[t];
64- hash_table[t] = i;
65+ if (tablesize == TRADITIONAL_TABLESIZE) {
66+ for (i = 0; i < s->count; i++) {
67+ uint32 t = SUM2HASH(s->sums[i].sum1);
68+ s->sums[i].chain = hash_table[t];
69+ hash_table[t] = i;
70+ }
71+ } else {
72+ for (i = 0; i < s->count; i++) {
73+ uint32 t = BIG_SUM2HASH(s->sums[i].sum1);
74+ s->sums[i].chain = hash_table[t];
75+ hash_table[t] = i;
76+ }
77 }
78 }
79
80@@ -164,11 +185,16 @@ static void hash_search(int f,struct sum
e1a06002
WD
81 (double)offset, s2 & 0xFFFF, s1 & 0xFFFF);
82 }
702a8903 83
e1a06002 84- i = hash_table[SUM2HASH2(s1,s2)];
84495354
WD
85- if (i < 0)
86- goto null_hash;
87+ if (tablesize == TRADITIONAL_TABLESIZE) {
88+ if ((i = hash_table[SUM2HASH2(s1,s2)]) < 0)
89+ goto null_hash;
90+ sum = (s1 & 0xffff) | (s2 << 16);
91+ } else {
92+ sum = (s1 & 0xffff) | (s2 << 16);
93+ if ((i = hash_table[BIG_SUM2HASH(sum)]) < 0)
94+ goto null_hash;
95+ }
1db27b7c
WD
96
97- sum = (s1 & 0xffff) | (s2 << 16);
e1a06002 98 hash_hits++;
1db27b7c 99 do {
e1a06002 100 int32 l;