Line data Source code
1 : /*
2 : * Based on libkmod-hash.c from libkmod - interface to kernel module operations
3 : * Copyright (C) 2011-2012 ProFUSION embedded systems
4 : * Copyright (C) 2013 L.Pereira <l@tia.mat.br>
5 : *
6 : * This library is free software; you can redistribute it and/or
7 : * modify it under the terms of the GNU Lesser General Public
8 : * License as published by the Free Software Foundation; either
9 : * version 2.1 of the License, or (at your option) any later version.
10 : *
11 : * This library is distributed in the hope that it will be useful,
12 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 : * Lesser General Public License for more details.
15 : *
16 : * You should have received a copy of the GNU Lesser General Public
17 : * License along with this library; if not, write to the Free Software
18 : * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 : */
20 :
21 : #define _DEFAULT_SOURCE
22 : #define _GNU_SOURCE
23 : #include <assert.h>
24 : #include <errno.h>
25 : #include <fcntl.h>
26 : #include <stdbool.h>
27 : #include <stdint.h>
28 : #include <stdlib.h>
29 : #include <string.h>
30 : #include <sys/stat.h>
31 : #include <sys/syscall.h>
32 : #include <sys/types.h>
33 : #include <unistd.h>
34 :
35 : #include "lwan-private.h"
36 : #include "hash.h"
37 : #include "murmur3.h"
38 :
39 : struct hash_bucket {
40 : void **keys;
41 : void **values;
42 : unsigned int *hashvals;
43 :
44 : unsigned int used;
45 : unsigned int total;
46 : };
47 :
48 : struct hash {
49 : unsigned int count;
50 : unsigned int n_buckets_mask;
51 :
52 : unsigned (*hash_value)(const void *key);
53 : int (*key_equal)(const void *k1, const void *k2);
54 : void (*free_value)(void *value);
55 : void (*free_key)(void *value);
56 :
57 : struct hash_bucket *buckets;
58 : };
59 :
60 : struct hash_entry {
61 : void **key;
62 : void **value;
63 : unsigned int *hashval;
64 :
65 : /* Only set when adding a new entry if it was already in the
66 : * hash table -- always 0/false otherwise. */
67 : bool existing;
68 : };
69 :
70 : #define MIN_BUCKETS 64
71 :
72 : /* Due to rehashing heuristics, most hash tables won't have more than 4
73 : * entries in each bucket. Use a conservative allocation threshold to
74 : * curb wasteful allocations */
75 : #define STEPS 4
76 :
77 : #define DEFAULT_ODD_CONSTANT 0x27d4eb2d
78 :
79 : static_assert((MIN_BUCKETS & (MIN_BUCKETS - 1)) == 0,
80 : "Bucket size is power of 2");
81 :
82 : static inline unsigned int hash_int_shift_mult(const void *keyptr);
83 : static inline unsigned int hash_int64_shift_mult(const void *keyptr);
84 :
85 : static unsigned int odd_constant = DEFAULT_ODD_CONSTANT;
86 : static unsigned (*hash_str)(const void *key) = murmur3_simple;
87 : static unsigned (*hash_int)(const void *key) = hash_int_shift_mult;
88 : static unsigned (*hash_int64)(const void *key) = hash_int64_shift_mult;
89 :
90 3109 : static bool resize_bucket(struct hash_bucket *bucket, unsigned int new_size)
91 : {
92 : void **new_keys;
93 : void **new_values;
94 : unsigned int *new_hashvals;
95 :
96 3109 : new_keys = reallocarray(bucket->keys, new_size, STEPS * sizeof(void *));
97 3109 : new_values = reallocarray(bucket->values, new_size, STEPS * sizeof(void *));
98 : new_hashvals =
99 3109 : reallocarray(bucket->hashvals, new_size, STEPS * sizeof(unsigned int));
100 :
101 3109 : if (new_keys)
102 3109 : bucket->keys = new_keys;
103 3109 : if (new_values)
104 3109 : bucket->values = new_values;
105 3109 : if (new_hashvals)
106 3109 : bucket->hashvals = new_hashvals;
107 :
108 3109 : if (new_keys && new_values && new_hashvals) {
109 3109 : bucket->total = new_size * STEPS;
110 3109 : return true;
111 : }
112 :
113 0 : return false;
114 : }
115 :
116 0 : static inline unsigned int hash_int_shift_mult(const void *keyptr)
117 : {
118 : /* http://www.concentric.net/~Ttwang/tech/inthash.htm */
119 0 : unsigned int key = (unsigned int)(long)keyptr;
120 0 : unsigned int c2 = odd_constant;
121 :
122 0 : key = (key ^ 61) ^ (key >> 16);
123 0 : key += key << 3;
124 0 : key ^= key >> 4;
125 0 : key *= c2;
126 0 : key ^= key >> 15;
127 0 : return key;
128 : }
129 :
130 0 : static inline unsigned int hash_int64_shift_mult(const void *keyptr)
131 : {
132 0 : const uint64_t key = (uint64_t)(uintptr_t)keyptr;
133 0 : uint32_t key_low = (uint32_t)(key & 0xffffffff);
134 0 : uint32_t key_high = (uint32_t)(key >> 32);
135 :
136 0 : return hash_int_shift_mult((void *)(uintptr_t)key_low) ^
137 0 : hash_int_shift_mult((void *)(uintptr_t)key_high);
138 : }
139 :
140 : #if defined(LWAN_HAVE_BUILTIN_CPU_INIT) && defined(LWAN_HAVE_BUILTIN_IA32_CRC32)
141 : static inline unsigned int hash_str_crc32(const void *keyptr)
142 : {
143 : unsigned int hash = odd_constant;
144 : const char *key = keyptr;
145 : size_t len = strlen(key);
146 :
147 : #if __x86_64__
148 : while (len >= sizeof(uint64_t)) {
149 : uint64_t data;
150 : memcpy(&data, key, sizeof(data));
151 : hash = (unsigned int)__builtin_ia32_crc32di(hash, data);
152 : key += sizeof(uint64_t);
153 : len -= sizeof(uint64_t);
154 : }
155 : #endif /* __x86_64__ */
156 : while (len >= sizeof(uint32_t)) {
157 : uint32_t data;
158 : memcpy(&data, key, sizeof(data));
159 : hash = __builtin_ia32_crc32si(hash, data);
160 : key += sizeof(uint32_t);
161 : len -= sizeof(uint32_t);
162 : }
163 : if (*key && *(key + 1)) {
164 : uint16_t data;
165 : memcpy(&data, key, sizeof(data));
166 : hash = __builtin_ia32_crc32hi(hash, data);
167 : key += sizeof(uint16_t);
168 : }
169 : /* Last byte might be the terminating NUL or the last character.
170 : * For a hash, this doesn't matter, and shaves off a branch.
171 : */
172 : hash = __builtin_ia32_crc32qi(hash, (unsigned char)*key);
173 :
174 : return hash;
175 : }
176 :
177 : static inline unsigned int hash_int_crc32(const void *keyptr)
178 : {
179 : return __builtin_ia32_crc32si(odd_constant,
180 : (unsigned int)(uintptr_t)keyptr);
181 : }
182 :
183 : static inline unsigned int hash_int64_crc32(const void *keyptr)
184 : {
185 : #ifdef __x86_64__
186 : return (unsigned int)__builtin_ia32_crc32di(odd_constant,
187 : (uint64_t)(uintptr_t)keyptr);
188 : #else
189 : const uint64_t key = (uint64_t)(uintptr_t)keyptr;
190 : uint32_t crc;
191 :
192 : crc = __builtin_ia32_crc32si(odd_constant, (uint32_t)(key & 0xffffffff));
193 : crc = __builtin_ia32_crc32si(crc, (uint32_t)(key >> 32));
194 :
195 : return crc;
196 : #endif
197 : }
198 :
199 : #endif
200 :
201 92 : __attribute__((constructor(65535))) static void initialize_odd_constant(void)
202 : {
203 : /* This constant is randomized in order to mitigate the DDoS attack
204 : * described by Crosby and Wallach in UsenixSec2003. */
205 92 : if (lwan_getentropy(&odd_constant, sizeof(odd_constant), 0) < 0)
206 0 : odd_constant = DEFAULT_ODD_CONSTANT;
207 92 : odd_constant |= 1;
208 92 : murmur3_set_seed(odd_constant);
209 :
210 : #if defined(LWAN_HAVE_BUILTIN_CPU_INIT) && defined(LWAN_HAVE_BUILTIN_IA32_CRC32)
211 : __builtin_cpu_init();
212 : if (__builtin_cpu_supports("sse4.2")) {
213 : lwan_status_debug("Using CRC32 instructions to calculate hashes");
214 : hash_str = hash_str_crc32;
215 : hash_int = hash_int_crc32;
216 : hash_int64 = hash_int64_crc32;
217 : }
218 : #endif
219 92 : }
220 :
221 0 : static inline int hash_int_key_equal(const void *k1, const void *k2)
222 : {
223 0 : return k1 == k2;
224 : }
225 :
226 3880 : static void no_op(void *arg __attribute__((unused))) {}
227 :
228 : static struct hash *
229 2517 : hash_internal_new(unsigned int (*hash_value)(const void *key),
230 : int (*key_equal)(const void *k1, const void *k2),
231 : void (*free_key)(void *value),
232 : void (*free_value)(void *value))
233 : {
234 2517 : struct hash *hash = malloc(sizeof(*hash));
235 :
236 2517 : if (hash == NULL)
237 0 : return NULL;
238 :
239 2517 : hash->buckets = calloc(MIN_BUCKETS, sizeof(struct hash_bucket));
240 2517 : if (hash->buckets == NULL) {
241 0 : free(hash);
242 0 : return NULL;
243 : }
244 :
245 2517 : hash->hash_value = hash_value;
246 2517 : hash->key_equal = key_equal;
247 :
248 2517 : hash->free_value = free_value;
249 2517 : hash->free_key = free_key;
250 :
251 2517 : hash->n_buckets_mask = MIN_BUCKETS - 1;
252 2517 : hash->count = 0;
253 :
254 2517 : return hash;
255 : }
256 :
257 5 : struct hash *hash_int_new(void (*free_key)(void *value),
258 : void (*free_value)(void *value))
259 : {
260 5 : return hash_internal_new(hash_int, hash_int_key_equal,
261 : free_key ? free_key : no_op,
262 : free_value ? free_value : no_op);
263 : }
264 :
265 0 : struct hash *hash_int64_new(void (*free_key)(void *value),
266 : void (*free_value)(void *value))
267 : {
268 0 : return hash_internal_new(hash_int64, hash_int_key_equal,
269 : free_key ? free_key : no_op,
270 : free_value ? free_value : no_op);
271 : }
272 :
273 2512 : struct hash *hash_str_new(void (*free_key)(void *value),
274 : void (*free_value)(void *value))
275 : {
276 2512 : return hash_internal_new(
277 : hash_str, (int (*)(const void *, const void *))streq,
278 : free_key ? free_key : no_op, free_value ? free_value : no_op);
279 : }
280 :
281 : static __attribute__((pure)) inline unsigned int
282 4459 : hash_n_buckets(const struct hash *hash)
283 : {
284 4459 : return hash->n_buckets_mask + 1;
285 : }
286 :
287 2322 : void hash_free(struct hash *hash)
288 : {
289 : struct hash_bucket *bucket, *bucket_end;
290 :
291 2322 : if (hash == NULL)
292 1084 : return;
293 :
294 1238 : bucket = hash->buckets;
295 1238 : bucket_end = hash->buckets + hash_n_buckets(hash);
296 80470 : for (; bucket < bucket_end; bucket++) {
297 82404 : for (unsigned int entry = 0; entry < bucket->used; entry++) {
298 3172 : hash->free_value(bucket->values[entry]);
299 3172 : hash->free_key(bucket->keys[entry]);
300 : }
301 79232 : free(bucket->keys);
302 79232 : free(bucket->values);
303 79232 : free(bucket->hashvals);
304 : }
305 1238 : free(hash->buckets);
306 1238 : free(hash);
307 : }
308 :
309 3219 : static struct hash_entry hash_add_entry_hashed(struct hash *hash,
310 : const void *key,
311 : unsigned int hashval)
312 : {
313 3219 : unsigned int pos = hashval & hash->n_buckets_mask;
314 3219 : struct hash_bucket *bucket = hash->buckets + pos;
315 : unsigned int entry;
316 3219 : bool existing = false;
317 :
318 3219 : if (bucket->used + 1 >= bucket->total) {
319 : unsigned int new_bucket_total;
320 :
321 3109 : if (__builtin_add_overflow(bucket->total, 1, &new_bucket_total))
322 0 : return (struct hash_entry) {};
323 :
324 3109 : if (!resize_bucket(bucket, new_bucket_total))
325 0 : return (struct hash_entry) {};
326 : }
327 :
328 3329 : for (entry = 0; entry < bucket->used; entry++) {
329 110 : if (hashval != bucket->hashvals[entry])
330 110 : continue;
331 0 : if (hash->key_equal(key, bucket->keys[entry])) {
332 0 : existing = true;
333 0 : goto done;
334 : }
335 : }
336 :
337 3219 : entry = bucket->used;
338 :
339 3219 : bucket->keys[entry] = NULL;
340 3219 : bucket->values[entry] = NULL;
341 3219 : bucket->hashvals[entry] = hashval;
342 :
343 3219 : bucket->used++;
344 3219 : hash->count++;
345 :
346 3219 : done:
347 3219 : return (struct hash_entry){
348 3219 : .key = &bucket->keys[entry],
349 3219 : .value = &bucket->values[entry],
350 3219 : .hashval = &bucket->hashvals[entry],
351 : .existing = existing,
352 : };
353 : }
354 :
355 0 : static void rehash(struct hash *hash, unsigned int new_bucket_size)
356 : {
357 0 : struct hash_bucket *buckets = calloc(new_bucket_size, sizeof(*buckets));
358 0 : const unsigned int n_buckets = hash_n_buckets(hash);
359 0 : struct hash hash_copy = *hash;
360 :
361 0 : assert((new_bucket_size & (new_bucket_size - 1)) == 0);
362 0 : assert(hash_n_buckets(hash) != new_bucket_size);
363 :
364 0 : if (buckets == NULL)
365 0 : return;
366 :
367 0 : hash_copy.count = 0;
368 0 : hash_copy.n_buckets_mask = new_bucket_size - 1;
369 0 : hash_copy.buckets = buckets;
370 :
371 : struct hash_bucket *bucket;
372 0 : struct hash_bucket *bucket_end = hash->buckets + n_buckets;
373 0 : for (bucket = hash->buckets; bucket < bucket_end; bucket++) {
374 0 : for (unsigned int old = 0; old < bucket->used; old++) {
375 : struct hash_entry new =
376 0 : hash_add_entry_hashed(&hash_copy,
377 0 : bucket->keys[old],
378 0 : bucket->hashvals[old]);
379 0 : if (UNLIKELY(!new.key))
380 0 : goto fail;
381 :
382 0 : *new.key = bucket->keys[old];
383 0 : *new.value = bucket->values[old];
384 : }
385 : }
386 :
387 : /* Original table must remain untouched in the event resizing fails:
388 : * previous loop may return early on allocation failure, so can't free
389 : * bucket entry arrays there. */
390 0 : for (bucket = hash->buckets; bucket < bucket_end; bucket++) {
391 0 : free(bucket->keys);
392 0 : free(bucket->values);
393 0 : free(bucket->hashvals);
394 : }
395 0 : free(hash->buckets);
396 :
397 0 : hash->buckets = buckets;
398 0 : hash->n_buckets_mask = new_bucket_size - 1;
399 :
400 0 : assert(hash_copy.count == hash->count);
401 :
402 0 : return;
403 :
404 0 : fail:
405 0 : for (bucket_end = bucket, bucket = hash->buckets; bucket < bucket_end;
406 0 : bucket++) {
407 0 : free(bucket->keys);
408 0 : free(bucket->values);
409 0 : free(bucket->hashvals);
410 : }
411 :
412 0 : free(buckets);
413 : }
414 :
415 3219 : static struct hash_entry hash_add_entry(struct hash *hash, const void *key)
416 : {
417 3219 : unsigned int hashval = hash->hash_value(key);
418 :
419 3219 : return hash_add_entry_hashed(hash, key, hashval);
420 : }
421 :
422 3219 : static inline bool need_rehash_grow(const struct hash *hash)
423 : {
424 : /* The heuristic to rehash and grow the number of buckets is if there's
425 : * more than 16 entries per bucket on average. This is the number of
426 : * elements in the hashvals array that would fit in a single cache line. */
427 3219 : return hash->count > hash_n_buckets(hash) * 16;
428 : }
429 :
430 2 : static inline bool need_rehash_shrink(const struct hash *hash)
431 : {
432 : /* A hash table will be shrunk if, on average, more than 50% of its
433 : * buckets are empty, but will never have less than MIN_BUCKETS buckets. */
434 2 : const unsigned int n_buckets = hash_n_buckets(hash);
435 :
436 2 : if (n_buckets <= MIN_BUCKETS)
437 2 : return false;
438 :
439 0 : if (hash->count > n_buckets / 2)
440 0 : return false;
441 :
442 0 : return true;
443 : }
444 :
445 : /*
446 : * add or replace key in hash map.
447 : *
448 : * none of key or value are copied, just references are remembered as is,
449 : * make sure they are live while pair exists in hash!
450 : */
451 3172 : int hash_add(struct hash *hash, const void *key, const void *value)
452 : {
453 3172 : struct hash_entry entry = hash_add_entry(hash, key);
454 :
455 3172 : if (!entry.key)
456 0 : return -errno;
457 3172 : if (entry.existing) {
458 0 : hash->free_key(*entry.key);
459 0 : hash->free_value(*entry.value);
460 : }
461 :
462 3172 : *entry.key = (void *)key;
463 3172 : *entry.value = (void *)value;
464 :
465 3172 : if (need_rehash_grow(hash))
466 0 : rehash(hash, hash_n_buckets(hash) * 2);
467 :
468 3172 : return 0;
469 : }
470 :
471 : /* similar to hash_add(), but fails if key already exists */
472 47 : int hash_add_unique(struct hash *hash, const void *key, const void *value)
473 : {
474 47 : struct hash_entry entry = hash_add_entry(hash, key);
475 :
476 47 : if (!entry.key)
477 0 : return -errno;
478 47 : if (entry.existing)
479 0 : return -EEXIST;
480 :
481 47 : *entry.key = (void *)key;
482 47 : *entry.value = (void *)value;
483 :
484 47 : if (need_rehash_grow(hash))
485 0 : rehash(hash, hash_n_buckets(hash) * 2);
486 :
487 47 : return 0;
488 : }
489 :
490 : static inline struct hash_entry
491 4907 : hash_find_entry(const struct hash *hash, const char *key, unsigned int hashval)
492 : {
493 4907 : unsigned int pos = hashval & hash->n_buckets_mask;
494 4907 : const struct hash_bucket *bucket = hash->buckets + pos;
495 :
496 5058 : for (unsigned int entry = 0; entry < bucket->used; entry++) {
497 3870 : if (hashval != bucket->hashvals[entry])
498 151 : continue;
499 3719 : if (hash->key_equal(key, bucket->keys[entry])) {
500 3719 : return (struct hash_entry){
501 3719 : .key = &bucket->keys[entry],
502 3719 : .value = &bucket->values[entry],
503 3719 : .hashval = &bucket->hashvals[entry],
504 : };
505 : }
506 : }
507 :
508 1188 : return (struct hash_entry){};
509 : }
510 :
511 4905 : void *hash_find(const struct hash *hash, const void *key)
512 : {
513 : struct hash_entry entry =
514 4905 : hash_find_entry(hash, key, hash->hash_value(key));
515 :
516 4905 : return entry.key ? *entry.value : NULL;
517 : }
518 :
519 2 : int hash_del(struct hash *hash, const void *key)
520 : {
521 2 : unsigned int hashval = hash->hash_value(key);
522 2 : unsigned int pos = hashval & hash->n_buckets_mask;
523 2 : struct hash_bucket *bucket = hash->buckets + pos;
524 :
525 2 : struct hash_entry entry = hash_find_entry(hash, key, hashval);
526 2 : if (entry.key == NULL)
527 0 : return -ENOENT;
528 :
529 2 : hash->free_value(*entry.value);
530 2 : hash->free_key(*entry.key);
531 :
532 2 : if (bucket->used > 1) {
533 : /* Instead of compacting the bucket array by moving elements, just copy
534 : * over the last element on top of the element being removed. This
535 : * changes the ordering inside the bucket array, but it's much more
536 : * efficient, as it always has to copy exactly at most 1 element instead
537 : * of potentially bucket->used elements. */
538 0 : void *last_key = &bucket->keys[bucket->used - 1];
539 :
540 : /* FIXME: Is comparing these pointers UB after calling free_key()? */
541 0 : if (entry.key != last_key) {
542 0 : *entry.key = last_key;
543 0 : *entry.value = bucket->values[bucket->used - 1];
544 0 : *entry.hashval = bucket->hashvals[bucket->used - 1];
545 : }
546 : }
547 :
548 2 : bucket->used--;
549 2 : hash->count--;
550 :
551 2 : if (need_rehash_shrink(hash)) {
552 0 : rehash(hash, hash_n_buckets(hash) / 2);
553 : } else {
554 2 : unsigned int steps_used = bucket->used / STEPS;
555 2 : unsigned int steps_total = bucket->total / STEPS;
556 :
557 2 : if (steps_used + 1 < steps_total) {
558 : unsigned int new_total;
559 :
560 0 : if (__builtin_add_overflow(steps_used, 1, &new_total))
561 0 : return -EOVERFLOW;
562 :
563 0 : resize_bucket(bucket, new_total);
564 : }
565 : }
566 :
567 2 : return 0;
568 : }
569 :
570 0 : unsigned int hash_get_count(const struct hash *hash) { return hash->count; }
571 :
572 0 : void hash_iter_init(const struct hash *hash, struct hash_iter *iter)
573 : {
574 0 : iter->hash = hash;
575 0 : iter->bucket = 0;
576 0 : iter->entry = -1;
577 0 : }
578 :
579 0 : bool hash_iter_next(struct hash_iter *iter,
580 : const void **key,
581 : const void **value)
582 : {
583 0 : const struct hash_bucket *b = iter->hash->buckets + iter->bucket;
584 :
585 0 : iter->entry++;
586 :
587 0 : if ((unsigned int)iter->entry >= b->used) {
588 0 : unsigned int n_buckets = hash_n_buckets(iter->hash);
589 :
590 0 : iter->entry = 0;
591 :
592 0 : for (iter->bucket++; iter->bucket < n_buckets; iter->bucket++) {
593 0 : b = iter->hash->buckets + iter->bucket;
594 :
595 0 : if (b->used > 0)
596 0 : break;
597 : }
598 :
599 0 : if (iter->bucket >= n_buckets)
600 0 : return false;
601 : }
602 :
603 0 : if (value != NULL)
604 0 : *value = b->values[iter->entry];
605 0 : if (key != NULL)
606 0 : *key = b->keys[iter->entry];
607 :
608 0 : return true;
609 : }
|