NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
khash.h
Go to the documentation of this file.
1 /* The MIT License
2 
3  Copyright (c) 2008, 2009, 2011 by Attractive Chaos <attractor@live.co.uk>
4 
5  Permission is hereby granted, free of charge, to any person obtaining
6  a copy of this software and associated documentation files (the
7  "Software"), to deal in the Software without restriction, including
8  without limitation the rights to use, copy, modify, merge, publish,
9  distribute, sublicense, and/or sell copies of the Software, and to
10  permit persons to whom the Software is furnished to do so, subject to
11  the following conditions:
12 
13  The above copyright notice and this permission notice shall be
14  included in all copies or substantial portions of the Software.
15 
16  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20  BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21  ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  SOFTWARE.
24 */
25 
26 /*
27  An example:
28 
29 #include "khash.h"
30 KHASH_MAP_INIT_INT(32, char)
31 int main() {
32  int ret, is_missing;
33  khiter_t k;
34  khash_t(32) *h = kh_init(32);
35  k = kh_put(32, h, 5, &ret);
36  kh_value(h, k) = 10;
37  k = kh_get(32, h, 10);
38  is_missing = (k == kh_end(h));
39  k = kh_get(32, h, 5);
40  kh_del(32, h, k);
41  for (k = kh_begin(h); k != kh_end(h); ++k)
42  if (kh_exist(h, k)) kh_value(h, k) = 1;
43  kh_destroy(32, h);
44  return 0;
45 }
46 */
47 
48 /*
49  2013-05-02 (0.2.8):
50 
51  * Use quadratic probing. When the capacity is power of 2, stepping function
52  i*(i+1)/2 guarantees to traverse each bucket. It is better than double
53  hashing on cache performance and is more robust than linear probing.
54 
55  In theory, double hashing should be more robust than quadratic probing.
56  However, my implementation is probably not for large hash tables, because
57  the second hash function is closely tied to the first hash function,
58  which reduce the effectiveness of double hashing.
59 
60  Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php
61 
62  2011-12-29 (0.2.7):
63 
64  * Minor code clean up; no actual effect.
65 
66  2011-09-16 (0.2.6):
67 
68  * The capacity is a power of 2. This seems to dramatically improve the
69  speed for simple keys. Thank Zilong Tan for the suggestion. Reference:
70 
71  - http://code.google.com/p/ulib/
72  - http://nothings.org/computer/judy/
73 
74  * Allow to optionally use linear probing which usually has better
75  performance for random input. Double hashing is still the default as it
76  is more robust to certain non-random input.
77 
78  * Added Wang's integer hash function (not used by default). This hash
79  function is more robust to certain non-random input.
80 
81  2011-02-14 (0.2.5):
82 
83  * Allow to declare global functions.
84 
85  2009-09-26 (0.2.4):
86 
87  * Improve portability
88 
89  2008-09-19 (0.2.3):
90 
91  * Corrected the example
92  * Improved interfaces
93 
94  2008-09-11 (0.2.2):
95 
96  * Improved speed a little in kh_put()
97 
98  2008-09-10 (0.2.1):
99 
100  * Added kh_clear()
101  * Fixed a compiling error
102 
103  2008-09-02 (0.2.0):
104 
105  * Changed to token concatenation which increases flexibility.
106 
107  2008-08-31 (0.1.2):
108 
109  * Fixed a bug in kh_get(), which has not been tested previously.
110 
111  2008-08-31 (0.1.1):
112 
113  * Added destructor
114 */
115 
116 
117 #ifndef __AC_KHASH_H
118 #define __AC_KHASH_H
119 
126 #define AC_VERSION_KHASH_H "0.2.8"
127 
128 #include <stdlib.h>
129 #include <string.h>
130 #include <limits.h>
131 
132 /* compiler specific configuration */
133 
134 #if UINT_MAX == 0xffffffffu
135 typedef unsigned int khint32_t;
136 #elif ULONG_MAX == 0xffffffffu
137 typedef unsigned long khint32_t;
138 #endif
139 
140 #if ULONG_MAX == ULLONG_MAX
141 typedef unsigned long khint64_t;
142 #else
143 typedef unsigned long long khint64_t;
144 #endif
145 
146 #ifdef _MSC_VER
147 #define kh_inline __inline
148 #else
149 #define kh_inline inline
150 #endif
151 
152 typedef khint32_t khint_t;
154 
155 #define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
156 #define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
157 #define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
158 #define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
159 #define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
160 #define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
161 #define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
162 
163 #define __ac_fsize(m) ((m) < 16? 1 : (m)>>4)
164 
165 #ifndef kroundup32
166 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
167 #endif
168 
169 #ifndef kcalloc
170 #define kcalloc(N,Z) calloc(N,Z)
171 #endif
172 #ifndef kmalloc
173 #define kmalloc(Z) malloc(Z)
174 #endif
175 #ifndef krealloc
176 #define krealloc(P,Z) realloc(P,Z)
177 #endif
178 #ifndef kfree
179 #define kfree(P) free(P)
180 #endif
181 
182 static const double __ac_HASH_UPPER = 0.77;
183 
184 #define __KHASH_TYPE(name, khkey_t, khval_t) \
185  typedef struct { \
186  khint_t n_buckets, size, n_occupied, upper_bound; \
187  khint32_t *flags; \
188  khkey_t *keys; \
189  khval_t *vals; \
190  } kh_##name##_t;
191 
192 #define __KHASH_PROTOTYPES(name, khkey_t, khval_t) \
193  extern kh_##name##_t *kh_init_##name(void); \
194  extern void kh_destroy_##name(kh_##name##_t *h); \
195  extern void kh_clear_##name(kh_##name##_t *h); \
196  extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \
197  extern int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \
198  extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \
199  extern void kh_del_##name(kh_##name##_t *h, khint_t x);
200 
201 #define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
202  SCOPE kh_##name##_t *kh_init_##name(void) { \
203  return (kh_##name##_t*)kcalloc(1, sizeof(kh_##name##_t)); \
204  } \
205  SCOPE void kh_destroy_##name(kh_##name##_t *h) \
206  { \
207  if (h) { \
208  kfree((void *)h->keys); kfree(h->flags); \
209  kfree((void *)h->vals); \
210  kfree(h); \
211  } \
212  } \
213  SCOPE void kh_clear_##name(kh_##name##_t *h) \
214  { \
215  if (h && h->flags) { \
216  memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \
217  h->size = h->n_occupied = 0; \
218  } \
219  } \
220  SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
221  { \
222  if (h->n_buckets) { \
223  khint_t k, i, last, mask, step = 0; \
224  mask = h->n_buckets - 1; \
225  k = __hash_func(key); i = k & mask; \
226  last = i; \
227  while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
228  i = (i + (++step)) & mask; \
229  if (i == last) return h->n_buckets; \
230  } \
231  return __ac_iseither(h->flags, i)? h->n_buckets : i; \
232  } else return 0; \
233  } \
234  SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
235  { /* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */ \
236  khint32_t *new_flags = 0; \
237  khint_t j = 1; \
238  { \
239  kroundup32(new_n_buckets); \
240  if (new_n_buckets < 4) new_n_buckets = 4; \
241  if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \
242  else { /* hash table size to be changed (shrink or expand); rehash */ \
243  new_flags = (khint32_t*)kmalloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
244  if (!new_flags) return -1; \
245  memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
246  if (h->n_buckets < new_n_buckets) { /* expand */ \
247  khkey_t *new_keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
248  if (!new_keys) return -1; \
249  h->keys = new_keys; \
250  if (kh_is_map) { \
251  khval_t *new_vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
252  if (!new_vals) return -1; \
253  h->vals = new_vals; \
254  } \
255  } /* otherwise shrink */ \
256  } \
257  } \
258  if (j) { /* rehashing is needed */ \
259  for (j = 0; j != h->n_buckets; ++j) { \
260  if (__ac_iseither(h->flags, j) == 0) { \
261  khkey_t key = h->keys[j]; \
262  khval_t val; \
263  khint_t new_mask; \
264  new_mask = new_n_buckets - 1; \
265  if (kh_is_map) val = h->vals[j]; \
266  __ac_set_isdel_true(h->flags, j); \
267  while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
268  khint_t k, i, step = 0; \
269  k = __hash_func(key); \
270  i = k & new_mask; \
271  while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \
272  __ac_set_isempty_false(new_flags, i); \
273  if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { /* kick out the existing element */ \
274  { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
275  if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \
276  __ac_set_isdel_true(h->flags, i); /* mark it as deleted in the old hash table */ \
277  } else { /* write the element and jump out of the loop */ \
278  h->keys[i] = key; \
279  if (kh_is_map) h->vals[i] = val; \
280  break; \
281  } \
282  } \
283  } \
284  } \
285  if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \
286  h->keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
287  if (kh_is_map) h->vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
288  } \
289  kfree(h->flags); /* free the working space */ \
290  h->flags = new_flags; \
291  h->n_buckets = new_n_buckets; \
292  h->n_occupied = h->size; \
293  h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
294  } \
295  return 0; \
296  } \
297  SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
298  { \
299  khint_t x; \
300  if (h->n_occupied >= h->upper_bound) { /* update the hash table */ \
301  if (h->n_buckets > (h->size<<1)) { \
302  if (kh_resize_##name(h, h->n_buckets - 1) < 0) { /* clear "deleted" elements */ \
303  *ret = -1; return h->n_buckets; \
304  } \
305  } else if (kh_resize_##name(h, h->n_buckets + 1) < 0) { /* expand the hash table */ \
306  *ret = -1; return h->n_buckets; \
307  } \
308  } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
309  { \
310  khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \
311  x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \
312  if (__ac_isempty(h->flags, i)) x = i; /* for speed up */ \
313  else { \
314  last = i; \
315  while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
316  if (__ac_isdel(h->flags, i)) site = i; \
317  i = (i + (++step)) & mask; \
318  if (i == last) { x = site; break; } \
319  } \
320  if (x == h->n_buckets) { \
321  if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \
322  else x = i; \
323  } \
324  } \
325  } \
326  if (__ac_isempty(h->flags, x)) { /* not present at all */ \
327  h->keys[x] = key; \
328  __ac_set_isboth_false(h->flags, x); \
329  ++h->size; ++h->n_occupied; \
330  *ret = 1; \
331  } else if (__ac_isdel(h->flags, x)) { /* deleted */ \
332  h->keys[x] = key; \
333  __ac_set_isboth_false(h->flags, x); \
334  ++h->size; \
335  *ret = 2; \
336  } else *ret = 0; /* Don't touch h->keys[x] if present and not deleted */ \
337  return x; \
338  } \
339  SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \
340  { \
341  if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \
342  __ac_set_isdel_true(h->flags, x); \
343  --h->size; \
344  } \
345  }
346 
347 #define KHASH_DECLARE(name, khkey_t, khval_t) \
348  __KHASH_TYPE(name, khkey_t, khval_t) \
349  __KHASH_PROTOTYPES(name, khkey_t, khval_t)
350 
351 #define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
352  __KHASH_TYPE(name, khkey_t, khval_t) \
353  __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
354 
355 #define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
356  KHASH_INIT2(name, static kh_inline, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
357 
358 /* --- BEGIN OF HASH FUNCTIONS --- */
359 
365 #define kh_int_hash_func(key) (khint32_t)(key)
366 
369 #define kh_int_hash_equal(a, b) ((a) == (b))
370 
375 #define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11)
376 
379 #define kh_int64_hash_equal(a, b) ((a) == (b))
380 
385 static kh_inline khint_t __ac_X31_hash_string(const char *s)
386 {
387  khint_t h = (khint_t)*s;
388  if (h) for (++s ; *s; ++s) h = (h << 5) - h + (khint_t)*s;
389  return h;
390 }
396 #define kh_str_hash_func(key) __ac_X31_hash_string(key)
397 
400 #define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
401 
402 static kh_inline khint_t __ac_Wang_hash(khint_t key)
403 {
404  key += ~(key << 15);
405  key ^= (key >> 10);
406  key += (key << 3);
407  key ^= (key >> 6);
408  key += ~(key << 11);
409  key ^= (key >> 16);
410  return key;
411 }
412 #define kh_int_hash_func2(k) __ac_Wang_hash((khint_t)key)
413 
414 /* --- END OF HASH FUNCTIONS --- */
415 
416 /* Other convenient macros... */
417 
422 #define khash_t(name) kh_##name##_t
423 
429 #define kh_init(name) kh_init_##name()
430 
436 #define kh_destroy(name, h) kh_destroy_##name(h)
437 
443 #define kh_clear(name, h) kh_clear_##name(h)
444 
451 #define kh_resize(name, h, s) kh_resize_##name(h, s)
452 
464 #define kh_put(name, h, k, r) kh_put_##name(h, k, r)
465 
473 #define kh_get(name, h, k) kh_get_##name(h, k)
474 
481 #define kh_del(name, h, k) kh_del_##name(h, k)
482 
489 #define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))
490 
497 #define kh_key(h, x) ((h)->keys[x])
498 
506 #define kh_val(h, x) ((h)->vals[x])
507 
511 #define kh_value(h, x) ((h)->vals[x])
512 
518 #define kh_begin(h) (khint_t)(0)
519 
525 #define kh_end(h) ((h)->n_buckets)
526 
532 #define kh_size(h) ((h)->size)
533 
539 #define kh_n_buckets(h) ((h)->n_buckets)
540 
548 #define kh_foreach(h, kvar, vvar, code) { khint_t __i; \
549  for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \
550  if (!kh_exist(h,__i)) continue; \
551  (kvar) = kh_key(h,__i); \
552  (vvar) = kh_val(h,__i); \
553  code; \
554  } }
555 
562 #define kh_foreach_value(h, vvar, code) { khint_t __i; \
563  for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \
564  if (!kh_exist(h,__i)) continue; \
565  (vvar) = kh_val(h,__i); \
566  code; \
567  } }
568 
569 /* More conenient interfaces */
570 
575 #define KHASH_SET_INIT_INT(name) \
576  KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
577 
583 #define KHASH_MAP_INIT_INT(name, khval_t) \
584  KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
585 
590 #define KHASH_SET_INIT_INT64(name) \
591  KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
592 
598 #define KHASH_MAP_INIT_INT64(name, khval_t) \
599  KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
600 
601 typedef const char *kh_cstr_t;
606 #define KHASH_SET_INIT_STR(name) \
607  KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
608 
614 #define KHASH_MAP_INIT_STR(name, khval_t) \
615  KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
616 
617 #endif /* __AC_KHASH_H */