checksum entries without being overloaded. It also makes the hashing of
normal sized files use slightly less memory than before.
-http://lists.samba.org/archive/rsync/2005-March/011875.html
+An extended version of a patch by Shachar Shemesh.
--- old/match.c
+++ new/match.c
-{
- return (int)t1->t - (int)t2->t;
-}
-+static int32 tablesize, tablealloc;
++static int32 tablesize;
+static int32 *sum_table;
+#define gettag2(s1,s2) gettag((s1) + ((s2)<<16))
static void build_hash_table(struct sum_struct *s)
{
- int32 i;
-+ int32 i, t;
++ int32 i, prior_size = tablesize;
- if (!tag_table)
- tag_table = new_array(int32, TABLESIZE);
+ /* Dynamically calculate the hash table size so that the hash load
-+ * is always about 80%. This number must be odd or s2 will not be
-+ * able to span the entire set. */
++ * for big files is about 80%. This number must be odd or s2 will
++ * not be able to span the entire set. */
+ tablesize = (s->count/8) * 10 + 11;
+ if (tablesize < 65537)
+ tablesize = 65537; /* a prime number */
-+ if (tablesize > tablealloc) {
-+ tablealloc = tablesize;
-+ sum_table = realloc_array(sum_table, int32, tablealloc);
++ if (tablesize != prior_size) {
++ free(sum_table);
++ sum_table = new_array(int32, tablesize);
+ if (!sum_table)
+ out_of_memory("build_hash_table");
+ }
- targets = new_array(struct target, s->count);
- if (!tag_table || !targets)
- out_of_memory("build_hash_table");
-+ memset(sum_table, 0xFF, tablesize * sizeof (int32));
++ memset(sum_table, 0xFF, tablesize * sizeof (sum_table[0]));
for (i = 0; i < s->count; i++) {
- targets[i].i = i;
- targets[i].t = gettag(s->sums[i].sum1);
-+ t = gettag(s->sums[i].sum1);
++ int32 t = gettag(s->sums[i].sum1);
+ s->sums[i].chain = sum_table[t];
+ sum_table[t] = i;
}
OFF_T offset; /**< offset in file of this chunk */
int32 len; /**< length of chunk of file */
uint32 sum1; /**< simple checksum */
-+ int32 chain; /**< hash-table chaining */
++ int32 chain; /**< next hash-table collision */
short flags; /**< flag bits */
char sum2[SUM_LENGTH]; /**< checksum */
};
s->sums[i].offset = offset;
s->sums[i].flags = 0;
-+ s->sums[i].chain = 0;
++ s->sums[i].chain = -1;
if (i == s->count-1 && s->remainder != 0)
s->sums[i].len = s->remainder;