From: Wayne Davison Date: Sun, 26 Feb 2006 15:48:01 +0000 (+0000) Subject: Incorporated some feedback from Shachar Shemesh. X-Git-Url: https://mattmccutchen.net/rsync/rsync-patches.git/commitdiff_plain/2b1e5f60979bcd92433420d9c5ef7491fd860e12 Incorporated some feedback from Shachar Shemesh. --- diff --git a/dynamic_hash.diff b/dynamic_hash.diff index 261918b..2a6026f 100644 --- a/dynamic_hash.diff +++ b/dynamic_hash.diff @@ -3,7 +3,7 @@ by making sure that the sender's hash table is large enough to hold all the checksum entries without being overloaded. It also makes the hashing of normal sized files use slightly less memory than before. -http://lists.samba.org/archive/rsync/2005-March/011875.html +An extended version of a patch by Shachar Shemesh. --- old/match.c +++ new/match.c @@ -39,7 +39,7 @@ http://lists.samba.org/archive/rsync/2005-March/011875.html -{ - return (int)t1->t - (int)t2->t; -} -+static int32 tablesize, tablealloc; ++static int32 tablesize; +static int32 *sum_table; +#define gettag2(s1,s2) gettag((s1) + ((s2)<<16)) @@ -48,19 +48,19 @@ http://lists.samba.org/archive/rsync/2005-March/011875.html static void build_hash_table(struct sum_struct *s) { - int32 i; -+ int32 i, t; ++ int32 i, prior_size = tablesize; - if (!tag_table) - tag_table = new_array(int32, TABLESIZE); + /* Dynamically calculate the hash table size so that the hash load -+ * is always about 80%. This number must be odd or s2 will not be -+ * able to span the entire set. */ ++ * for big files is about 80%. This number must be odd or s2 will ++ * not be able to span the entire set. */ + tablesize = (s->count/8) * 10 + 11; + if (tablesize < 65537) + tablesize = 65537; /* a prime number */ -+ if (tablesize > tablealloc) { -+ tablealloc = tablesize; -+ sum_table = realloc_array(sum_table, int32, tablealloc); ++ if (tablesize != prior_size) { ++ free(sum_table); ++ sum_table = new_array(int32, tablesize); + if (!sum_table) + out_of_memory("build_hash_table"); + } @@ -68,12 +68,12 @@ http://lists.samba.org/archive/rsync/2005-March/011875.html - targets = new_array(struct target, s->count); - if (!tag_table || !targets) - out_of_memory("build_hash_table"); -+ memset(sum_table, 0xFF, tablesize * sizeof (int32)); ++ memset(sum_table, 0xFF, tablesize * sizeof (sum_table[0])); for (i = 0; i < s->count; i++) { - targets[i].i = i; - targets[i].t = gettag(s->sums[i].sum1); -+ t = gettag(s->sums[i].sum1); ++ int32 t = gettag(s->sums[i].sum1); + s->sums[i].chain = sum_table[t]; + sum_table[t] = i; } @@ -184,7 +184,7 @@ http://lists.samba.org/archive/rsync/2005-March/011875.html OFF_T offset; /**< offset in file of this chunk */ int32 len; /**< length of chunk of file */ uint32 sum1; /**< simple checksum */ -+ int32 chain; /**< hash-table chaining */ ++ int32 chain; /**< next hash-table collision */ short flags; /**< flag bits */ char sum2[SUM_LENGTH]; /**< checksum */ }; @@ -194,7 +194,7 @@ http://lists.samba.org/archive/rsync/2005-March/011875.html s->sums[i].offset = offset; s->sums[i].flags = 0; -+ s->sums[i].chain = 0; ++ s->sums[i].chain = -1; if (i == s->count-1 && s->remainder != 0) s->sums[i].len = s->remainder;