NVBIO
Main Page
Modules
Classes
Examples
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
contrib
htslib
htslib
khash.h
Go to the documentation of this file.
1
/* The MIT License
2
3
Copyright (c) 2008, 2009, 2011 by Attractive Chaos <attractor@live.co.uk>
4
5
Permission is hereby granted, free of charge, to any person obtaining
6
a copy of this software and associated documentation files (the
7
"Software"), to deal in the Software without restriction, including
8
without limitation the rights to use, copy, modify, merge, publish,
9
distribute, sublicense, and/or sell copies of the Software, and to
10
permit persons to whom the Software is furnished to do so, subject to
11
the following conditions:
12
13
The above copyright notice and this permission notice shall be
14
included in all copies or substantial portions of the Software.
15
16
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
SOFTWARE.
24
*/
25
26
/*
27
An example:
28
29
#include "khash.h"
30
KHASH_MAP_INIT_INT(32, char)
31
int main() {
32
int ret, is_missing;
33
khiter_t k;
34
khash_t(32) *h = kh_init(32);
35
k = kh_put(32, h, 5, &ret);
36
kh_value(h, k) = 10;
37
k = kh_get(32, h, 10);
38
is_missing = (k == kh_end(h));
39
k = kh_get(32, h, 5);
40
kh_del(32, h, k);
41
for (k = kh_begin(h); k != kh_end(h); ++k)
42
if (kh_exist(h, k)) kh_value(h, k) = 1;
43
kh_destroy(32, h);
44
return 0;
45
}
46
*/
47
48
/*
49
2013-05-02 (0.2.8):
50
51
* Use quadratic probing. When the capacity is power of 2, stepping function
52
i*(i+1)/2 guarantees to traverse each bucket. It is better than double
53
hashing on cache performance and is more robust than linear probing.
54
55
In theory, double hashing should be more robust than quadratic probing.
56
However, my implementation is probably not for large hash tables, because
57
the second hash function is closely tied to the first hash function,
58
which reduce the effectiveness of double hashing.
59
60
Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php
61
62
2011-12-29 (0.2.7):
63
64
* Minor code clean up; no actual effect.
65
66
2011-09-16 (0.2.6):
67
68
* The capacity is a power of 2. This seems to dramatically improve the
69
speed for simple keys. Thank Zilong Tan for the suggestion. Reference:
70
71
- http://code.google.com/p/ulib/
72
- http://nothings.org/computer/judy/
73
74
* Allow to optionally use linear probing which usually has better
75
performance for random input. Double hashing is still the default as it
76
is more robust to certain non-random input.
77
78
* Added Wang's integer hash function (not used by default). This hash
79
function is more robust to certain non-random input.
80
81
2011-02-14 (0.2.5):
82
83
* Allow to declare global functions.
84
85
2009-09-26 (0.2.4):
86
87
* Improve portability
88
89
2008-09-19 (0.2.3):
90
91
* Corrected the example
92
* Improved interfaces
93
94
2008-09-11 (0.2.2):
95
96
* Improved speed a little in kh_put()
97
98
2008-09-10 (0.2.1):
99
100
* Added kh_clear()
101
* Fixed a compiling error
102
103
2008-09-02 (0.2.0):
104
105
* Changed to token concatenation which increases flexibility.
106
107
2008-08-31 (0.1.2):
108
109
* Fixed a bug in kh_get(), which has not been tested previously.
110
111
2008-08-31 (0.1.1):
112
113
* Added destructor
114
*/
115
116
117
#ifndef __AC_KHASH_H
118
#define __AC_KHASH_H
119
126
#define AC_VERSION_KHASH_H "0.2.8"
127
128
#include <stdlib.h>
129
#include <
string.h
>
130
#include <limits.h>
131
132
/* compiler specific configuration */
133
134
#if UINT_MAX == 0xffffffffu
135
typedef
unsigned
int
khint32_t;
136
#elif ULONG_MAX == 0xffffffffu
137
typedef
unsigned
long
khint32_t;
138
#endif
139
140
#if ULONG_MAX == ULLONG_MAX
141
typedef
unsigned
long
khint64_t
;
142
#else
143
typedef
unsigned
long
long
khint64_t
;
144
#endif
145
146
#ifdef _MSC_VER
147
#define kh_inline __inline
148
#else
149
#define kh_inline inline
150
#endif
151
152
typedef
khint32_t
khint_t
;
153
typedef
khint_t
khiter_t
;
154
155
#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
156
#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
157
#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
158
#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
159
#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
160
#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
161
#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
162
163
#define __ac_fsize(m) ((m) < 16? 1 : (m)>>4)
164
165
#ifndef kroundup32
166
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
167
#endif
168
169
#ifndef kcalloc
170
#define kcalloc(N,Z) calloc(N,Z)
171
#endif
172
#ifndef kmalloc
173
#define kmalloc(Z) malloc(Z)
174
#endif
175
#ifndef krealloc
176
#define krealloc(P,Z) realloc(P,Z)
177
#endif
178
#ifndef kfree
179
#define kfree(P) free(P)
180
#endif
181
182
static
const
double
__ac_HASH_UPPER = 0.77;
183
184
#define __KHASH_TYPE(name, khkey_t, khval_t) \
185
typedef struct { \
186
khint_t n_buckets, size, n_occupied, upper_bound; \
187
khint32_t *flags; \
188
khkey_t *keys; \
189
khval_t *vals; \
190
} kh_##name##_t;
191
192
#define __KHASH_PROTOTYPES(name, khkey_t, khval_t) \
193
extern kh_##name##_t *kh_init_##name(void); \
194
extern void kh_destroy_##name(kh_##name##_t *h); \
195
extern void kh_clear_##name(kh_##name##_t *h); \
196
extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \
197
extern int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \
198
extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \
199
extern void kh_del_##name(kh_##name##_t *h, khint_t x);
200
201
#define __KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
202
SCOPE kh_##name##_t *kh_init_##name(void) { \
203
return (kh_##name##_t*)kcalloc(1, sizeof(kh_##name##_t)); \
204
} \
205
SCOPE void kh_destroy_##name(kh_##name##_t *h) \
206
{ \
207
if (h) { \
208
kfree((void *)h->keys); kfree(h->flags); \
209
kfree((void *)h->vals); \
210
kfree(h); \
211
} \
212
} \
213
SCOPE void kh_clear_##name(kh_##name##_t *h) \
214
{ \
215
if (h && h->flags) { \
216
memset(h->flags, 0xaa, __ac_fsize(h->n_buckets) * sizeof(khint32_t)); \
217
h->size = h->n_occupied = 0; \
218
} \
219
} \
220
SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
221
{ \
222
if (h->n_buckets) { \
223
khint_t k, i, last, mask, step = 0; \
224
mask = h->n_buckets - 1; \
225
k = __hash_func(key); i = k & mask; \
226
last = i; \
227
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
228
i = (i + (++step)) & mask; \
229
if (i == last) return h->n_buckets; \
230
} \
231
return __ac_iseither(h->flags, i)? h->n_buckets : i; \
232
} else return 0; \
233
} \
234
SCOPE int kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
235
{
/* This function uses 0.25*n_buckets bytes of working space instead of [sizeof(key_t+val_t)+.25]*n_buckets. */
\
236
khint32_t *new_flags = 0; \
237
khint_t j = 1; \
238
{ \
239
kroundup32(new_n_buckets); \
240
if (new_n_buckets < 4) new_n_buckets = 4; \
241
if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0;
/* requested size is too small */
\
242
else {
/* hash table size to be changed (shrink or expand); rehash */
\
243
new_flags = (khint32_t*)kmalloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
244
if (!new_flags) return -1; \
245
memset(new_flags, 0xaa, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
246
if (h->n_buckets < new_n_buckets) {
/* expand */
\
247
khkey_t *new_keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
248
if (!new_keys) return -1; \
249
h->keys = new_keys; \
250
if (kh_is_map) { \
251
khval_t *new_vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
252
if (!new_vals) return -1; \
253
h->vals = new_vals; \
254
} \
255
}
/* otherwise shrink */
\
256
} \
257
} \
258
if (j) {
/* rehashing is needed */
\
259
for (j = 0; j != h->n_buckets; ++j) { \
260
if (__ac_iseither(h->flags, j) == 0) { \
261
khkey_t key = h->keys[j]; \
262
khval_t val; \
263
khint_t new_mask; \
264
new_mask = new_n_buckets - 1; \
265
if (kh_is_map) val = h->vals[j]; \
266
__ac_set_isdel_true(h->flags, j); \
267
while (1) {
/* kick-out process; sort of like in Cuckoo hashing */
\
268
khint_t k, i, step = 0; \
269
k = __hash_func(key); \
270
i = k & new_mask; \
271
while (!__ac_isempty(new_flags, i)) i = (i + (++step)) & new_mask; \
272
__ac_set_isempty_false(new_flags, i); \
273
if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) {
/* kick out the existing element */
\
274
{ khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
275
if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \
276
__ac_set_isdel_true(h->flags, i);
/* mark it as deleted in the old hash table */
\
277
} else {
/* write the element and jump out of the loop */
\
278
h->keys[i] = key; \
279
if (kh_is_map) h->vals[i] = val; \
280
break; \
281
} \
282
} \
283
} \
284
} \
285
if (h->n_buckets > new_n_buckets) {
/* shrink the hash table */
\
286
h->keys = (khkey_t*)krealloc((void *)h->keys, new_n_buckets * sizeof(khkey_t)); \
287
if (kh_is_map) h->vals = (khval_t*)krealloc((void *)h->vals, new_n_buckets * sizeof(khval_t)); \
288
} \
289
kfree(h->flags);
/* free the working space */
\
290
h->flags = new_flags; \
291
h->n_buckets = new_n_buckets; \
292
h->n_occupied = h->size; \
293
h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
294
} \
295
return 0; \
296
} \
297
SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
298
{ \
299
khint_t x; \
300
if (h->n_occupied >= h->upper_bound) {
/* update the hash table */
\
301
if (h->n_buckets > (h->size<<1)) { \
302
if (kh_resize_##name(h, h->n_buckets - 1) < 0) {
/* clear "deleted" elements */
\
303
*ret = -1; return h->n_buckets; \
304
} \
305
} else if (kh_resize_##name(h, h->n_buckets + 1) < 0) {
/* expand the hash table */
\
306
*ret = -1; return h->n_buckets; \
307
} \
308
}
/* TODO: to implement automatically shrinking; resize() already support shrinking */
\
309
{ \
310
khint_t k, i, site, last, mask = h->n_buckets - 1, step = 0; \
311
x = site = h->n_buckets; k = __hash_func(key); i = k & mask; \
312
if (__ac_isempty(h->flags, i)) x = i;
/* for speed up */
\
313
else { \
314
last = i; \
315
while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
316
if (__ac_isdel(h->flags, i)) site = i; \
317
i = (i + (++step)) & mask; \
318
if (i == last) { x = site; break; } \
319
} \
320
if (x == h->n_buckets) { \
321
if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \
322
else x = i; \
323
} \
324
} \
325
} \
326
if (__ac_isempty(h->flags, x)) {
/* not present at all */
\
327
h->keys[x] = key; \
328
__ac_set_isboth_false(h->flags, x); \
329
++h->size; ++h->n_occupied; \
330
*ret = 1; \
331
} else if (__ac_isdel(h->flags, x)) {
/* deleted */
\
332
h->keys[x] = key; \
333
__ac_set_isboth_false(h->flags, x); \
334
++h->size; \
335
*ret = 2; \
336
} else *ret = 0;
/* Don't touch h->keys[x] if present and not deleted */
\
337
return x; \
338
} \
339
SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \
340
{ \
341
if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \
342
__ac_set_isdel_true(h->flags, x); \
343
--h->size; \
344
} \
345
}
346
347
#define KHASH_DECLARE(name, khkey_t, khval_t) \
348
__KHASH_TYPE(name, khkey_t, khval_t) \
349
__KHASH_PROTOTYPES(name, khkey_t, khval_t)
350
351
#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
352
__KHASH_TYPE(name, khkey_t, khval_t) \
353
__KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
354
355
#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
356
KHASH_INIT2(name, static kh_inline, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
357
358
/* --- BEGIN OF HASH FUNCTIONS --- */
359
365
#define kh_int_hash_func(key) (khint32_t)(key)
366
369
#define kh_int_hash_equal(a, b) ((a) == (b))
370
375
#define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11)
376
379
#define kh_int64_hash_equal(a, b) ((a) == (b))
380
385
static
kh_inline
khint_t
__ac_X31_hash_string(
const
char
*s)
386
{
387
khint_t
h = (
khint_t
)*s;
388
if
(h)
for
(++s ; *s; ++s) h = (h << 5) - h + (
khint_t
)*s;
389
return
h;
390
}
396
#define kh_str_hash_func(key) __ac_X31_hash_string(key)
397
400
#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
401
402
static
kh_inline
khint_t
__ac_Wang_hash(
khint_t
key)
403
{
404
key += ~(key << 15);
405
key ^= (key >> 10);
406
key += (key << 3);
407
key ^= (key >> 6);
408
key += ~(key << 11);
409
key ^= (key >> 16);
410
return
key;
411
}
412
#define kh_int_hash_func2(k) __ac_Wang_hash((khint_t)key)
413
414
/* --- END OF HASH FUNCTIONS --- */
415
416
/* Other convenient macros... */
417
422
#define khash_t(name) kh_##name##_t
423
429
#define kh_init(name) kh_init_##name()
430
436
#define kh_destroy(name, h) kh_destroy_##name(h)
437
443
#define kh_clear(name, h) kh_clear_##name(h)
444
451
#define kh_resize(name, h, s) kh_resize_##name(h, s)
452
464
#define kh_put(name, h, k, r) kh_put_##name(h, k, r)
465
473
#define kh_get(name, h, k) kh_get_##name(h, k)
474
481
#define kh_del(name, h, k) kh_del_##name(h, k)
482
489
#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))
490
497
#define kh_key(h, x) ((h)->keys[x])
498
506
#define kh_val(h, x) ((h)->vals[x])
507
511
#define kh_value(h, x) ((h)->vals[x])
512
518
#define kh_begin(h) (khint_t)(0)
519
525
#define kh_end(h) ((h)->n_buckets)
526
532
#define kh_size(h) ((h)->size)
533
539
#define kh_n_buckets(h) ((h)->n_buckets)
540
548
#define kh_foreach(h, kvar, vvar, code) { khint_t __i; \
549
for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \
550
if (!kh_exist(h,__i)) continue; \
551
(kvar) = kh_key(h,__i); \
552
(vvar) = kh_val(h,__i); \
553
code; \
554
} }
555
562
#define kh_foreach_value(h, vvar, code) { khint_t __i; \
563
for (__i = kh_begin(h); __i != kh_end(h); ++__i) { \
564
if (!kh_exist(h,__i)) continue; \
565
(vvar) = kh_val(h,__i); \
566
code; \
567
} }
568
569
/* More conenient interfaces */
570
575
#define KHASH_SET_INIT_INT(name) \
576
KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
577
583
#define KHASH_MAP_INIT_INT(name, khval_t) \
584
KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
585
590
#define KHASH_SET_INIT_INT64(name) \
591
KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
592
598
#define KHASH_MAP_INIT_INT64(name, khval_t) \
599
KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
600
601
typedef
const
char
*
kh_cstr_t
;
606
#define KHASH_SET_INIT_STR(name) \
607
KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
608
614
#define KHASH_MAP_INIT_STR(name, khval_t) \
615
KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
616
617
#endif
/* __AC_KHASH_H */
Generated on Wed Feb 25 2015 08:32:48 for NVBIO by
1.8.4