| 1 | /* |
| 2 | * Routines to provide a memory-efficient hashtable. |
| 3 | * |
| 4 | * Copyright (C) 2007-2008 Wayne Davison |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or modify |
| 7 | * it under the terms of the GNU General Public License as published by |
| 8 | * the Free Software Foundation; either version 3 of the License, or |
| 9 | * (at your option) any later version. |
| 10 | * |
| 11 | * This program is distributed in the hope that it will be useful, |
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 | * GNU General Public License for more details. |
| 15 | * |
| 16 | * You should have received a copy of the GNU General Public License along |
| 17 | * with this program; if not, visit the http://fsf.org website. |
| 18 | */ |
| 19 | |
| 20 | #include "rsync.h" |
| 21 | |
| 22 | #define HASH_LOAD_LIMIT(size) ((size)*3/4) |
| 23 | |
| 24 | struct hashtable *hashtable_create(int size, int key64) |
| 25 | { |
| 26 | struct hashtable *tbl; |
| 27 | int node_size = key64 ? sizeof (struct ht_int64_node) |
| 28 | : sizeof (struct ht_int32_node); |
| 29 | |
| 30 | /* Pick a power of 2 that can hold the requested size. */ |
| 31 | if (size & (size-1) || size < 16) { |
| 32 | int req = size; |
| 33 | size = 16; |
| 34 | while (size < req) |
| 35 | size *= 2; |
| 36 | } |
| 37 | |
| 38 | if (!(tbl = new(struct hashtable)) |
| 39 | || !(tbl->nodes = new_array0(char, size * node_size))) |
| 40 | out_of_memory("hashtable_create"); |
| 41 | tbl->size = size; |
| 42 | tbl->entries = 0; |
| 43 | tbl->node_size = node_size; |
| 44 | tbl->key64 = key64; |
| 45 | |
| 46 | return tbl; |
| 47 | } |
| 48 | |
| 49 | void hashtable_destroy(struct hashtable *tbl) |
| 50 | { |
| 51 | free(tbl->nodes); |
| 52 | free(tbl); |
| 53 | } |
| 54 | |
| 55 | /* This returns the node for the indicated key, either newly created or |
| 56 | * already existing. Returns NULL if not allocating and not found. */ |
| 57 | void *hashtable_find(struct hashtable *tbl, int64 key, int allocate_if_missing) |
| 58 | { |
| 59 | int key64 = tbl->key64; |
| 60 | struct ht_int32_node *node; |
| 61 | uint32 ndx; |
| 62 | |
| 63 | if (allocate_if_missing && tbl->entries > HASH_LOAD_LIMIT(tbl->size)) { |
| 64 | void *old_nodes = tbl->nodes; |
| 65 | int size = tbl->size * 2; |
| 66 | int i; |
| 67 | |
| 68 | if (!(tbl->nodes = new_array0(char, size * tbl->node_size))) |
| 69 | out_of_memory("hashtable_node"); |
| 70 | tbl->size = size; |
| 71 | tbl->entries = 0; |
| 72 | |
| 73 | for (i = size / 2; i-- > 0; ) { |
| 74 | struct ht_int32_node *move_node = HT_NODE(tbl, old_nodes, i); |
| 75 | int64 move_key = HT_KEY(move_node, key64); |
| 76 | if (move_key == 0) |
| 77 | continue; |
| 78 | node = hashtable_find(tbl, move_key, 1); |
| 79 | node->data = move_node->data; |
| 80 | } |
| 81 | |
| 82 | free(old_nodes); |
| 83 | } |
| 84 | |
| 85 | if (!key64) { |
| 86 | /* Based on Jenkins One-at-a-time hash. */ |
| 87 | uchar buf[4], *keyp = buf; |
| 88 | int i; |
| 89 | |
| 90 | SIVAL(buf, 0, key); |
| 91 | for (ndx = 0, i = 0; i < 4; i++) { |
| 92 | ndx += keyp[i]; |
| 93 | ndx += (ndx << 10); |
| 94 | ndx ^= (ndx >> 6); |
| 95 | } |
| 96 | ndx += (ndx << 3); |
| 97 | ndx ^= (ndx >> 11); |
| 98 | ndx += (ndx << 15); |
| 99 | } else { |
| 100 | /* Based on Jenkins hashword() from lookup3.c. */ |
| 101 | uint32 a, b, c; |
| 102 | |
| 103 | /* Set up the internal state */ |
| 104 | a = b = c = 0xdeadbeef + (8 << 2); |
| 105 | |
| 106 | #define rot(x,k) (((x)<<(k)) ^ ((x)>>(32-(k)))) |
| 107 | #if SIZEOF_INT64 >= 8 |
| 108 | b += (uint32)(key >> 32); |
| 109 | #endif |
| 110 | a += (uint32)key; |
| 111 | c ^= b; c -= rot(b, 14); |
| 112 | a ^= c; a -= rot(c, 11); |
| 113 | b ^= a; b -= rot(a, 25); |
| 114 | c ^= b; c -= rot(b, 16); |
| 115 | a ^= c; a -= rot(c, 4); |
| 116 | b ^= a; b -= rot(a, 14); |
| 117 | c ^= b; c -= rot(b, 24); |
| 118 | #undef rot |
| 119 | ndx = c; |
| 120 | } |
| 121 | |
| 122 | /* If it already exists, return the node. If we're not |
| 123 | * allocating, return NULL if the key is not found. */ |
| 124 | while (1) { |
| 125 | int64 nkey; |
| 126 | |
| 127 | ndx &= tbl->size - 1; |
| 128 | node = HT_NODE(tbl, tbl->nodes, ndx); |
| 129 | nkey = HT_KEY(node, key64); |
| 130 | |
| 131 | if (nkey == key) |
| 132 | return node; |
| 133 | if (nkey == 0) { |
| 134 | if (!allocate_if_missing) |
| 135 | return NULL; |
| 136 | break; |
| 137 | } |
| 138 | ndx++; |
| 139 | } |
| 140 | |
| 141 | /* Take over this empty spot and then return the node. */ |
| 142 | if (key64) |
| 143 | ((struct ht_int64_node*)node)->key = key; |
| 144 | else |
| 145 | node->key = key; |
| 146 | tbl->entries++; |
| 147 | return node; |
| 148 | } |