41 using namespace nvbio;
66 template <
typename system_tag>
73 log_info(stderr,
" setup device %d\n", device);
77 size_t free_device, total_device;
78 device_memory( &free_device, &total_device );
79 log_stats(stderr,
" device has %ld of %ld MB free\n", free_device/1024/1024, total_device/1024/1024);
81 const uint32 bytes_per_word = 4u;
82 const uint64 needed_bytes = (sampled_words + trusted_words)*bytes_per_word + 256*1024*1024;
84 if (needed_bytes > free_device)
86 log_warning(stderr,
" insufficient memory: %.2f GB required\n",
float( needed_bytes ) /
float(1024*1024*1024));
90 log_info(stderr,
" allocating sampled kmer filter (%.2f GB)\n",
float( sampled_words*bytes_per_word ) /
float(1024*1024*1024));
91 sampled_kmers_storage.resize( sampled_words, 0u );
93 log_info(stderr,
" allocating trusted kmer filter (%.2f GB)\n",
float( trusted_words*bytes_per_word ) /
float(1024*1024*1024));
94 trusted_kmers_storage.resize( trusted_words, 0u );
109 trusted_kmers_storage;
114 trusted_kmers_storage;
121 else bf = trusted_kmers_storage;
128 else trusted_kmers_storage = bf;
134 threshold = _threshold;
139 if (equal<system_tag,device_tag>())
140 cudaSetDevice( device );
144 if (equal<system_tag,device_tag>())
145 cudaMemGetInfo( free_device, total_device );
147 *free_device = *total_device = 1024llu * 1024llu * 1024llu * 1024llu;
157 if (h_bloom_filters && device_count)
159 log_info(stderr,
" merge filters\n");
166 for (
uint32 d = 0; d < device_count; ++d)
168 d_bloom_filters[d].
get_kmers( type, bf2 );
170 #pragma omp parallel for
176 for (
uint32 d = 0; d < device_count; ++d)
177 d_bloom_filters[d].set_kmers( type, bf );
179 else if (device_count > 1)
181 log_info(stderr,
" merge filters\n");
186 d_bloom_filters[0].
get_kmers( type, bf );
189 for (
uint32 d = 1; d < device_count; ++d)
191 d_bloom_filters[d].
get_kmers( type, bf2 );
193 #pragma omp parallel for
199 for (
uint32 d = 0; d < device_count; ++d)
200 d_bloom_filters[d].set_kmers( type, bf );
215 stats = h_bloom_filters->
stats;
217 stats.resize( 10,
uint64(0) );
220 for (
uint32 d = 0; d < device_count; ++d)
224 stats2 = d_bloom_filters[d].
stats;
226 for (
size_t i = 0; i < stats.size(); ++i)
227 stats[i] += stats2[i];
241 double operator() (
const uint4 block)
const
244 return powf(
float(used)/128.0f,
K );
252 template <
typename system_tag>
274 const uint64 bits_per_word = 32;
282 thrust::plus<uint64>(),
285 occupancy = float(
double( bits_set ) /
double( n_words * bits_per_word ) );
286 approx_size = float( -(
double( n_words * bits_per_word ) * std::log( 1.0 - occupancy )) /
double(K) );
290 fp = std::pow( occupancy, K );
296 uint4_pointer_type( (
const uint4*)words ),
298 thrust::plus<double>(),
299 temp_storage ) /
double(n_words / 4) );