NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
mem.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
30 #include <nvbio/fmindex/fmindex.h>
31 #include <nvbio/fmindex/bidir.h>
32 #include <nvbio/basic/types.h>
33 #include <nvbio/basic/numbers.h>
34 #include <nvbio/basic/algorithms.h>
35 #include <nvbio/basic/exceptions.h>
36 #include <nvbio/basic/vector.h>
38 #include <nvbio/basic/cuda/sort.h>
40 #include <nvbio/strings/string.h>
41 #include <thrust/sort.h>
42 #include <thrust/scan.h>
43 #include <thrust/binary_search.h>
44 #include <thrust/iterator/constant_iterator.h>
45 #include <thrust/iterator/counting_iterator.h>
46 
47 namespace nvbio {
48 
51 
81 template <typename pattern_type, typename fm_index_type, typename delegate_type>
84  const uint32 pattern_len,
85  const pattern_type pattern,
86  const uint32 x,
87  const fm_index_type f_index,
88  const fm_index_type r_index,
89  delegate_type& handler,
90  const uint32 min_intv = 1u,
91  const uint32 min_span = 1u);
92 
127 template <typename pattern_type, typename fm_index_type, typename delegate_type>
130  const uint32 pattern_len,
131  const pattern_type pattern,
132  const uint32 x,
133  const fm_index_type f_index,
134  const fm_index_type r_index,
135  delegate_type& handler,
136  const uint32 min_intv = 1u,
137  const uint32 min_span = 1u);
138 
155 template <typename system_tag, typename fm_index_type>
156 struct MEMFilter {};
157 
166 template <typename coord_type>
167 struct MEMRange
168 {
171 
172  static const uint32 GROUP_FLAG = 1u << 31;
173 
175  MEMRange() {}
176 
178  MEMRange(const base_type vec) : coords( vec ) {}
179 
181  MEMRange(const uint2 range, const uint32 string_id, const uint2 span, const bool flag = false)
182  {
183  coords.x = range.x;
184  coords.y = range.y;
185  coords.z = string_id;
186  coords.w = span.x | (span.y << 16);
187 
188  if (flag)
189  set_group_flag();
190  }
191 
193  MEMRange(const uint2 range, const uint32 string_id, const uint32 span_begin, const uint32 span_end, const bool flag = false)
194  {
195  coords.x = range.x;
196  coords.y = range.y;
197  coords.z = string_id;
198  coords.w = span_begin | (span_end << 16);
199 
200  if (flag)
201  set_group_flag();
202  }
203 
206 
208  bool group_flag() const { return (coords.z & GROUP_FLAG) ? true : false; }
209 
211  range_type range() const { return make_uint2( coords.x, coords.y ); }
212 
214  coord_type range_size() const { return 1u + coords.y - coords.x; }
215 
217  uint32 string_id() const { return uint32(coords.z) & (~GROUP_FLAG); }
218 
220  uint2 span() const { return make_uint2( uint32(coords.w) & 0xFFu, uint32(coords.w) >> 16u ); }
221 
223 };
224 
233 template <typename coord_type>
234 struct MEMHit
235 {
237 
239  MEMHit() {}
240 
242  MEMHit(const base_type vec) : coords( vec ) {}
243 
245  MEMHit(const uint32 index_pos, const uint32 string_id, const uint2 span)
246  {
247  coords.x = index_pos;
248  coords.y = string_id;
249  coords.z = span.x;
250  coords.w = span.y;
251  }
252 
254  MEMHit(const uint32 index_pos, const uint32 string_id, const uint32 span_begin, const uint32 span_end)
255  {
256  coords.x = index_pos;
257  coords.y = string_id;
258  coords.z = span_begin;
259  coords.w = span_end;
260  }
261 
263  coord_type index_pos() const { return coords.x; }
264 
266  uint32 string_id() const { return uint32(coords.y); }
267 
269  uint2 span() const { return make_uint2( uint32(coords.z), uint32(coords.w) ); }
270 
272 };
273 
290 template <typename fm_index_type>
291 struct MEMFilter<host_tag, fm_index_type>
292 {
294  typedef fm_index_type index_type;
295 
296  typedef typename index_type::index_type coord_type;
297  static const uint32 coord_dim = vector_traits<coord_type>::DIM;
298 
301  typedef mem_type hit_type;
302 
314  template <typename string_set_type>
315  uint64 rank(
316  const fm_index_type& f_index,
317  const fm_index_type& r_index,
318  const string_set_type& string_set,
319  const uint32 min_intv = 1u,
320  const uint32 max_intv = uint32(-1),
321  const uint32 min_span = 1u,
322  const uint32 split_len = uint32(-1),
323  const uint32 split_width = uint32(-1));
324 
327  uint32 first_hit(const uint32 string_id) const;
328 
336  template <typename mems_iterator>
337  void locate(
338  const uint64 begin,
339  const uint64 end,
340  mems_iterator mems);
341 
344  uint64 n_hits() const { return m_n_occurrences; }
345 
348  uint64 n_mems() const { return m_n_occurrences; }
349 
352  uint64 n_ranges() const { return m_mem_ranges.allocated_size(); }
353 
359  thrust::host_vector<uint64> m_slots;
360 };
361 
378 template <typename fm_index_type>
379 struct MEMFilter<device_tag, fm_index_type>
380 {
382  typedef fm_index_type index_type;
383 
384  typedef typename index_type::index_type coord_type;
385  static const uint32 coord_dim = vector_traits<coord_type>::DIM;
386 
389  typedef mem_type hit_type;
390 
402  template <typename string_set_type>
403  uint64 rank(
404  const fm_index_type& f_index,
405  const fm_index_type& r_index,
406  const string_set_type& string_set,
407  const uint32 min_intv = 1u,
408  const uint32 max_intv = uint32(-1),
409  const uint32 min_span = 1u,
410  const uint32 split_len = uint32(-1),
411  const uint32 split_width = uint32(-1));
412 
415  uint32 first_hit(const uint32 string_id) const;
416 
424  template <typename mems_iterator>
425  void locate(
426  const uint64 begin,
427  const uint64 end,
428  mems_iterator mems);
429 
432  uint64 n_hits() const { return m_n_occurrences; }
433 
436  uint64 n_mems() const { return m_n_occurrences; }
437 
440  uint64 n_ranges() const { return m_mem_ranges.allocated_size(); }
441 
447  thrust::device_vector<uint64> m_slots;
448  thrust::device_vector<uint8> d_temp_storage;
449 };
450 
453 template <typename system_tag, typename fm_index_type>
455 
472 template <typename fm_index_type>
473 struct MEMFilterHost : public MEMFilter<host_tag, fm_index_type> {};
474 
491 template <typename fm_index_type>
492 struct MEMFilterDevice : public MEMFilter<device_tag, fm_index_type> {};
493 
495 
496 } // namespace nvbio
497 
498 #include <nvbio/fmindex/mem_inl.h>