NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
bloom_filter.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
30 #include <nvbio/basic/types.h>
31 #include <nvbio/basic/atomics.h>
33 
34 namespace nvbio {
35 
98 
101 
107 
110 
114 template <typename T>
116 {
117 };
118 
122 template <>
124 {
126  void operator() (uint32* word, const uint32 mask) const
127  {
128  atomic_or( word, mask );
129  }
130 
132  void operator() (uint32& block, const uint32 word, const uint32 mask) const
133  {
134  block |= mask;
135  }
136 };
137 
141 template <>
142 struct inplace_or<uint2>
143 {
145  void operator() (uint2* word, const uint2 mask) const
146  {
147  atomic_or( &(word->x), mask.x );
148  atomic_or( &(word->y), mask.y );
149  }
150 
152  void operator() (uint2& block, const uint32 word, const uint32 mask) const
153  {
154  if (word == 0) block.x |= mask;
155  else block.y |= mask;
156  }
157 };
158 
162 template <>
163 struct inplace_or<uint4>
164 {
166  void operator() (uint4* word, const uint4 mask) const
167  {
168  atomic_or( &(word->x), mask.x );
169  atomic_or( &(word->y), mask.y );
170  atomic_or( &(word->z), mask.z );
171  atomic_or( &(word->w), mask.w );
172  }
173 
175  void operator() (uint4& block, const uint32 word, const uint32 mask) const
176  {
177  if (word == 0) block.x |= mask;
178  else if (word == 1) block.y |= mask;
179  else if (word == 2) block.z |= mask;
180  else block.w |= mask;
181  }
182 };
183 
187 template <>
189 {
191  void operator() (uint64* word, const uint64 mask) const
192  {
193  atomic_or( word, mask );
194  }
195 };
196 
200 template <>
202 {
204  void operator() (uint64_2* word, const uint64_2 mask) const
205  {
206  atomic_or( &(word->x), mask.x );
207  atomic_or( &(word->y), mask.y );
208  }
209 
211  void operator() (uint64_2& block, const uint32 word, const uint64 mask) const
212  {
213  if (word == 0) block.x |= mask;
214  else block.y |= mask;
215  }
216 };
217 
221 template <>
223 {
225  void operator() (uint64_4* word, const uint64_4 mask) const
226  {
227  atomic_or( &(word->x), mask.x );
228  atomic_or( &(word->y), mask.y );
229  atomic_or( &(word->z), mask.z );
230  atomic_or( &(word->w), mask.w );
231  }
232 
234  void operator() (uint64_4& block, const uint32 word, const uint64 mask) const
235  {
236  if (word == 0) block.x |= mask;
237  else if (word == 1) block.y |= mask;
238  else if (word == 2) block.z |= mask;
239  else block.w |= mask;
240  }
241 };
242 
258 template <
259  uint32 K, // number of hash functions { Hash1 + i * Hash2 | i : 0, ..., K-1 }
260  typename Hash1, // first hash generator function
261  typename Hash2, // second hash generator function
262  typename Iterator, // storage iterator - must dereference to uint32
263  typename OrOperator = inplace_or<uint32> > // OR binary operator
265 {
274  bloom_filter(
275  const uint64 size,
276  Iterator storage,
277  const Hash1 hash1 = Hash1(),
278  const Hash2 hash2 = Hash2());
279 
282  template <typename Key>
284  void insert(const Key key, const OrOperator or_op = OrOperator());
285 
288  template <typename Key>
290  bool has(const Key key) const;
291 
294  template <typename Key>
296  bool operator[] (const Key key) const { return has( key ); }
297 
299  Iterator m_storage;
300  Hash1 m_hash1;
301  Hash2 m_hash2;
302 };
303 
304 
322 template <
323  typename Hash1, // first hash generator function
324  typename Hash2, // second hash generator function
325  typename Iterator, // storage iterator - must be one of {uint32|uint2|uint4|uint64|uint64_2|uint64_4}
326  typename OrOperator = inplace_or<typename std::iterator_traits<Iterator>::value_type> > // OR binary operator
328 {
329  typedef typename std::iterator_traits<Iterator>::value_type block_type;
331 
333  static const uint32 BLOCK_SIZE = sizeof(block_type)*8u;
334  static const uint32 WORD_SIZE = sizeof(word_type)*8u;
335 
337 
347  const uint32 k,
348  const uint64 size,
349  Iterator storage,
350  const Hash1 hash1 = Hash1(),
351  const Hash2 hash2 = Hash2());
352 
355  template <typename Key>
357  void insert(const Key key, const OrOperator or_op = OrOperator());
358 
361  template <typename Key>
363  bool has(const Key key) const;
364 
367  template <typename Key>
369  bool operator[] (const Key key) const { return has( key ); }
370 
373  Iterator m_storage;
374  Hash1 m_hash1;
375  Hash2 m_hash2;
376 };
377 
382 uint32 optimal_bloom_filter_hashes(const float bits_per_key)
383 {
384  return uint32( bits_per_key * logf(2.0) ) + 1u;
385 }
386 
391 float optimal_bloom_filter_bits_per_key(const float fp_rate)
392 {
393  return -logf(fp_rate) / (logf(2.0f)*logf(2.0f));
394 }
395 
400 {
401  return float(K) / logf(2.0f);
402 }
403 
408 void optimal_bloom_filter_parameters(const float fp_rate, uint32* K, float* bits_per_key)
409 {
410  *bits_per_key = optimal_bloom_filter_bits_per_key( fp_rate );
411 
412  *K = optimal_bloom_filter_hashes( *bits_per_key );
413 }
414 
419 float optimal_bloom_filter_fp_rate(const uint32 K, const float bits_per_key)
420 {
421  return powf( 1.0f - expf( -float(K) / bits_per_key ), float(K) );
422 }
423 
426 
427 } // namespace nvbio
428