NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
bloom_filters.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 // bloom_filters.h
29 //
30 
31 #pragma once
32 
33 #include "utils.h"
34 #include <nvbio/basic/numbers.h>
35 #include <nvbio/basic/threads.h>
36 #include <nvbio/basic/cuda/arch.h>
37 #include <nvbio/basic/vector.h>
38 #include <nvbio/basic/primitives.h>
39 #include <nvbio/basic/popcount.h>
40 
41 using namespace nvbio;
42 
55 
58 
62 
66 template <typename system_tag>
68 {
69  bool setup(const int _device, const uint64 sampled_words, const uint64 trusted_words)
70  {
71  device = _device;
72 
73  log_info(stderr, " setup device %d\n", device);
74  set_device();
75 
76  // gather device memory stats
77  size_t free_device, total_device;
78  device_memory( &free_device, &total_device );
79  log_stats(stderr, " device has %ld of %ld MB free\n", free_device/1024/1024, total_device/1024/1024);
80 
81  const uint32 bytes_per_word = 4u;
82  const uint64 needed_bytes = (sampled_words + trusted_words)*bytes_per_word + 256*1024*1024;
83 
84  if (needed_bytes > free_device)
85  {
86  log_warning(stderr, " insufficient memory: %.2f GB required\n", float( needed_bytes ) / float(1024*1024*1024));
87  return false;
88  }
89 
90  log_info(stderr, " allocating sampled kmer filter (%.2f GB)\n", float( sampled_words*bytes_per_word ) / float(1024*1024*1024));
91  sampled_kmers_storage.resize( sampled_words, 0u );
92 
93  log_info(stderr, " allocating trusted kmer filter (%.2f GB)\n", float( trusted_words*bytes_per_word ) / float(1024*1024*1024));
94  trusted_kmers_storage.resize( trusted_words, 0u );
95 
96  stats.resize( 10 );
97  return true;
98  }
99 
100  int device;
105 
107  {
108  return (type == SAMPLED_KMERS) ? sampled_kmers_storage :
109  trusted_kmers_storage;
110  }
112  {
113  return (type == SAMPLED_KMERS) ? sampled_kmers_storage :
114  trusted_kmers_storage;
115  }
117  {
118  set_device();
119 
120  if (type == SAMPLED_KMERS) bf = sampled_kmers_storage;
121  else bf = trusted_kmers_storage;
122  }
124  {
125  set_device();
126 
127  if (type == SAMPLED_KMERS) sampled_kmers_storage = bf;
128  else trusted_kmers_storage = bf;
129  }
131  {
132  set_device();
133 
134  threshold = _threshold;
135  }
136 
137  void set_device() const
138  {
139  if (equal<system_tag,device_tag>())
140  cudaSetDevice( device );
141  }
142  void device_memory(size_t* free_device, size_t* total_device) const
143  {
144  if (equal<system_tag,device_tag>())
145  cudaMemGetInfo( free_device, total_device );
146  else
147  *free_device = *total_device = 1024llu * 1024llu * 1024llu * 1024llu; // TODO!
148  }
149 };
150 
153 inline
154 void merge(BloomFilters<host_tag>* h_bloom_filters, const uint32 device_count, BloomFilters<device_tag>* d_bloom_filters, const KmersType type)
155 {
156  // merge the Bloom filters on the host
157  if (h_bloom_filters && device_count)
158  {
159  log_info(stderr," merge filters\n");
160 
161  // get the host Bloom filter
162  nvbio::vector<host_tag,uint32>& bf = h_bloom_filters->get_kmers( type );
164 
165  // merge with all the device ones
166  for (uint32 d = 0; d < device_count; ++d)
167  {
168  d_bloom_filters[d].get_kmers( type, bf2 );
169 
170  #pragma omp parallel for
171  for (int64 i = 0; i < (int64)bf.size(); ++i)
172  bf[i] |= bf2[i];
173  }
174 
175  // broadcast the merged Bloom filter to all devices
176  for (uint32 d = 0; d < device_count; ++d)
177  d_bloom_filters[d].set_kmers( type, bf );
178  }
179  else if (device_count > 1)
180  {
181  log_info(stderr," merge filters\n");
184 
185  // get the first device Bloom filter
186  d_bloom_filters[0].get_kmers( type, bf );
187 
188  // merge with all the rest
189  for (uint32 d = 1; d < device_count; ++d)
190  {
191  d_bloom_filters[d].get_kmers( type, bf2 );
192 
193  #pragma omp parallel for
194  for (int64 i = 0; i < (int64)bf.size(); ++i)
195  bf[i] |= bf2[i];
196  }
197 
198  // broadcast the merged Bloom filter to all devices
199  for (uint32 d = 0; d < device_count; ++d)
200  d_bloom_filters[d].set_kmers( type, bf );
201  }
202 }
203 
204 
207 inline
208 void merged_stats(const BloomFilters<host_tag>* h_bloom_filters, const uint32 device_count, const BloomFilters<device_tag>* d_bloom_filters, nvbio::vector<host_tag,uint64>& stats)
209 {
210  // merge the stats on the host
212 
213  // copy the first
214  if (h_bloom_filters)
215  stats = h_bloom_filters->stats;
216  else
217  stats.resize( 10, uint64(0) );
218 
219  // and merge with all the rest
220  for (uint32 d = 0; d < device_count; ++d)
221  {
222  d_bloom_filters[d].set_device();
223 
224  stats2 = d_bloom_filters[d].stats;
225 
226  for (size_t i = 0; i < stats.size(); ++i)
227  stats[i] += stats2[i];
228  }
229 }
230 
234 {
235  typedef uint4 argument_type;
236  typedef double result_type;
237 
238  block_occupancy_functor(const uint32 k) : K(float(k)) {}
239 
241  double operator() (const uint4 block) const
242  {
243  const uint32 used = popc( block.x ) + popc( block.y ) + popc( block.z ) + popc( block.w );
244  return powf( float(used)/128.0f, K );
245  }
246 
247  const float K;
248 };
249 
252 template <typename system_tag>
254  const BloomFilters<system_tag>& bloom_filters,
255  const KmersType type,
256  const uint32 K,
257  float& occupancy,
258  float& approx_size,
259  float& fp)
260 {
261  typedef typename if_equal<system_tag,device_tag,thrust::device_ptr<const uint32>, const uint32*>::type uint_pointer_type;
262  typedef typename if_equal<system_tag,device_tag,thrust::device_ptr<const uint4>, const uint4*>::type uint4_pointer_type;
263 
264  bloom_filters.set_device();
265 
266  const nvbio::vector<system_tag,uint32>& bf = bloom_filters.get_kmers( type );
267 
268  const uint32 n_words = uint32( bf.size() );
269  const uint32* words = raw_pointer( bf );
270 
271  // compute the number of bits set
272  nvbio::vector<system_tag,uint8> temp_storage;
273 
274  const uint64 bits_per_word = 32;
275 
276  const uint64 bits_set = nvbio::reduce(
277  n_words,
280  uint_pointer_type( words ), popc_functor<uint32>() ),
282  thrust::plus<uint64>(),
283  temp_storage );
284 
285  occupancy = float( double( bits_set ) / double( n_words * bits_per_word ) );
286  approx_size = float( -(double( n_words * bits_per_word ) * std::log( 1.0 - occupancy )) / double(K) );
287 
288  #if 0
289  // compute the actual false positive rate
290  fp = std::pow( occupancy, K );
291  #else
292  // compute the actual false positive rate - using the block-occupancy
293  fp = float( nvbio::reduce(
294  n_words / 4,
296  uint4_pointer_type( (const uint4*)words ),
298  thrust::plus<double>(),
299  temp_storage ) / double(n_words / 4) );
300  #endif
301 }
302