Commit | Line | Data |
---|---|---|
36923392 WD |
1 | /* |
2 | * Routines to provide a memory-efficient hashtable. | |
3 | * | |
b3bf9b9d | 4 | * Copyright (C) 2007-2009 Wayne Davison |
36923392 WD |
5 | * |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 3 of the License, or | |
9 | * (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License along | |
17 | * with this program; if not, visit the http://fsf.org website. | |
18 | */ | |
19 | ||
20 | #include "rsync.h" | |
21 | ||
22 | #define HASH_LOAD_LIMIT(size) ((size)*3/4) | |
23 | ||
24 | struct hashtable *hashtable_create(int size, int key64) | |
25 | { | |
b3347e2a | 26 | int req = size; |
36923392 | 27 | struct hashtable *tbl; |
d6e6333a | 28 | int node_size = key64 ? sizeof (struct ht_int64_node) |
36923392 WD |
29 | : sizeof (struct ht_int32_node); |
30 | ||
31 | /* Pick a power of 2 that can hold the requested size. */ | |
32 | if (size & (size-1) || size < 16) { | |
36923392 WD |
33 | size = 16; |
34 | while (size < req) | |
35 | size *= 2; | |
36 | } | |
37 | ||
38 | if (!(tbl = new(struct hashtable)) | |
39 | || !(tbl->nodes = new_array0(char, size * node_size))) | |
40 | out_of_memory("hashtable_create"); | |
41 | tbl->size = size; | |
42 | tbl->entries = 0; | |
43 | tbl->node_size = node_size; | |
60c25caa | 44 | tbl->key64 = key64 ? 1 : 0; |
36923392 | 45 | |
b3347e2a WD |
46 | if (DEBUG_GTE(HASH, 1)) { |
47 | char buf[32]; | |
48 | if (req != size) | |
49 | snprintf(buf, sizeof buf, "req: %d, ", req); | |
50 | else | |
51 | *buf = '\0'; | |
52 | rprintf(FINFO, "[%s] created hashtable %lx (%ssize: %d, keys: %d-bit)\n", | |
53 | who_am_i(), (long)tbl, buf, size, key64 ? 64 : 32); | |
54 | } | |
55 | ||
36923392 WD |
56 | return tbl; |
57 | } | |
58 | ||
59 | void hashtable_destroy(struct hashtable *tbl) | |
60 | { | |
b3347e2a WD |
61 | if (DEBUG_GTE(HASH, 1)) { |
62 | rprintf(FINFO, "[%s] destroyed hashtable %lx (size: %d, keys: %d-bit)\n", | |
63 | who_am_i(), (long)tbl, tbl->size, tbl->key64 ? 64 : 32); | |
64 | } | |
36923392 WD |
65 | free(tbl->nodes); |
66 | free(tbl); | |
67 | } | |
68 | ||
69 | /* This returns the node for the indicated key, either newly created or | |
70 | * already existing. Returns NULL if not allocating and not found. */ | |
71 | void *hashtable_find(struct hashtable *tbl, int64 key, int allocate_if_missing) | |
72 | { | |
d6e6333a | 73 | int key64 = tbl->key64; |
36923392 WD |
74 | struct ht_int32_node *node; |
75 | uint32 ndx; | |
76 | ||
60c25caa WD |
77 | if (key64 ? key == 0 : (int32)key == 0) { |
78 | rprintf(FERROR, "Internal hashtable error: illegal key supplied!\n"); | |
79 | exit_cleanup(RERR_MESSAGEIO); | |
80 | } | |
81 | ||
36923392 WD |
82 | if (allocate_if_missing && tbl->entries > HASH_LOAD_LIMIT(tbl->size)) { |
83 | void *old_nodes = tbl->nodes; | |
84 | int size = tbl->size * 2; | |
85 | int i; | |
86 | ||
87 | if (!(tbl->nodes = new_array0(char, size * tbl->node_size))) | |
88 | out_of_memory("hashtable_node"); | |
89 | tbl->size = size; | |
90 | tbl->entries = 0; | |
91 | ||
b3347e2a WD |
92 | if (DEBUG_GTE(HASH, 1)) { |
93 | rprintf(FINFO, "[%s] growing hashtable %lx (size: %d, keys: %d-bit)\n", | |
94 | who_am_i(), (long)tbl, size, key64 ? 64 : 32); | |
95 | } | |
96 | ||
36923392 WD |
97 | for (i = size / 2; i-- > 0; ) { |
98 | struct ht_int32_node *move_node = HT_NODE(tbl, old_nodes, i); | |
99 | int64 move_key = HT_KEY(move_node, key64); | |
100 | if (move_key == 0) | |
101 | continue; | |
102 | node = hashtable_find(tbl, move_key, 1); | |
103 | node->data = move_node->data; | |
104 | } | |
105 | ||
106 | free(old_nodes); | |
107 | } | |
108 | ||
109 | if (!key64) { | |
110 | /* Based on Jenkins One-at-a-time hash. */ | |
111 | uchar buf[4], *keyp = buf; | |
112 | int i; | |
113 | ||
df6350a8 | 114 | SIVALu(buf, 0, key); |
36923392 WD |
115 | for (ndx = 0, i = 0; i < 4; i++) { |
116 | ndx += keyp[i]; | |
117 | ndx += (ndx << 10); | |
118 | ndx ^= (ndx >> 6); | |
119 | } | |
120 | ndx += (ndx << 3); | |
121 | ndx ^= (ndx >> 11); | |
122 | ndx += (ndx << 15); | |
123 | } else { | |
124 | /* Based on Jenkins hashword() from lookup3.c. */ | |
125 | uint32 a, b, c; | |
126 | ||
127 | /* Set up the internal state */ | |
128 | a = b = c = 0xdeadbeef + (8 << 2); | |
129 | ||
130 | #define rot(x,k) (((x)<<(k)) ^ ((x)>>(32-(k)))) | |
4ecf3e06 | 131 | #if SIZEOF_INT64 >= 8 |
36923392 | 132 | b += (uint32)(key >> 32); |
4ecf3e06 | 133 | #endif |
36923392 WD |
134 | a += (uint32)key; |
135 | c ^= b; c -= rot(b, 14); | |
136 | a ^= c; a -= rot(c, 11); | |
137 | b ^= a; b -= rot(a, 25); | |
138 | c ^= b; c -= rot(b, 16); | |
139 | a ^= c; a -= rot(c, 4); | |
140 | b ^= a; b -= rot(a, 14); | |
141 | c ^= b; c -= rot(b, 24); | |
142 | #undef rot | |
143 | ndx = c; | |
144 | } | |
145 | ||
146 | /* If it already exists, return the node. If we're not | |
147 | * allocating, return NULL if the key is not found. */ | |
148 | while (1) { | |
149 | int64 nkey; | |
150 | ||
151 | ndx &= tbl->size - 1; | |
152 | node = HT_NODE(tbl, tbl->nodes, ndx); | |
153 | nkey = HT_KEY(node, key64); | |
154 | ||
155 | if (nkey == key) | |
156 | return node; | |
157 | if (nkey == 0) { | |
158 | if (!allocate_if_missing) | |
159 | return NULL; | |
160 | break; | |
161 | } | |
162 | ndx++; | |
163 | } | |
164 | ||
165 | /* Take over this empty spot and then return the node. */ | |
166 | if (key64) | |
167 | ((struct ht_int64_node*)node)->key = key; | |
168 | else | |
aad635f7 | 169 | node->key = (int32)key; |
36923392 WD |
170 | tbl->entries++; |
171 | return node; | |
172 | } |