NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
fmindex.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <vector>
33 #include <algorithm>
34 #include <nvbio/basic/mmap.h>
35 #include <nvbio/basic/vector.h>
37 #include <nvbio/basic/cuda/ldg.h>
38 #include <nvbio/fmindex/fmindex.h>
39 #include <nvbio/fmindex/ssa.h>
40 
41 namespace nvbio {
44 namespace io {
46 
64 
67 
76 
85 {
86  static const uint32 FORWARD = 0x02;
87  static const uint32 REVERSE = 0x04;
88  static const uint32 SA = 0x10;
89 
90  static const uint32 BWT_BITS = 2u; // NOTE: DNA alphabet
91  static const bool BWT_BIG_ENDIAN = true; // NOTE: needs to be true to allow fast BWT construction
92  static const uint32 BWT_SYMBOLS_PER_WORD = (8*sizeof(uint32))/BWT_BITS;
93 
94  static const uint32 OCC_INT = 64;
95  static const uint32 SA_INT = 16;
96 
97  typedef const uint32* bwt_occ_type;
98  typedef const uint32* count_table_type;
99 
101  SA_INT,
102  const uint32*> ssa_type;
103 
107  m_flags ( 0 ),
108  m_seq_length ( 0 ),
109  m_bwt_occ_words ( 0 ),
110  m_sa_words ( 0 ),
111  m_primary ( 0 ),
112  m_rprimary ( 0 ),
113  m_L2 ( NULL ),
114  m_bwt_occ ( NULL ),
115  m_rbwt_occ ( NULL ),
116  m_count_table ( NULL )
117  {}
118 
119  uint32 flags() const { return m_flags; }
120  uint32 length() const { return m_seq_length; }
121  uint32 primary() const { return m_primary; }
122  uint32 rprimary() const { return m_rprimary; }
123  bool has_ssa() const { return m_ssa.m_ssa != NULL; }
124  bool has_rssa() const { return m_rssa.m_ssa != NULL; }
125  const uint32* bwt_occ() const { return m_bwt_occ; }
126  const uint32* rbwt_occ() const { return m_rbwt_occ; }
127  const uint32* count_table() const { return m_count_table; }
128  uint32 bwt_occ_words() const { return m_bwt_occ_words; }
129  uint32 sa_words() const { return m_sa_words; }
130  ssa_type ssa() const { return m_ssa; }
131  ssa_type rssa() const { return m_rssa; }
132  const uint32* L2() const { return m_L2; }
133 
134 public:
141 
148 };
149 
158 {
159  typedef const uint4* bwt_occ_type;
162 
163  typedef const uint32* count_table_type;
166 
167  typedef rank_dictionary<
168  BWT_BITS,
171  occ_type,
173 
176 
177  FMIndexData();
178  virtual ~FMIndexData() {}
179 
184 
187 
188  ssa_type ssa_iterator() const { return ssa(); }
189  ssa_type rssa_iterator() const { return rssa(); }
190 
192 
195 
198 
201 };
202 
203 void init_ssa(
204  const FMIndexData& driver_data,
207 
212 {
217  int load(
218  const char* genome_prefix,
219  const uint32 flags = FORWARD | REVERSE | SA);
220 
227 };
228 
230 {
236  uint32 L2[5];
237 };
238 
244 {
246 
251  int load(
252  const char* genome_prefix, const char* mapped_name);
253 
254 private:
255  Info m_info;
256  ServerMappedFile m_bwt_occ_file;
257  ServerMappedFile m_rbwt_occ_file;
258  ServerMappedFile m_sa_file;
259  ServerMappedFile m_rsa_file;
260  ServerMappedFile m_info_file;
261 
262  uint32 m_count_table_vec[256];
263  uint32 m_L2_vec[5];
264 };
265 
271 {
273 
277  int load(
278  const char* genome_name);
279 
285 
288 };
289 
295 {
296  static const uint32 FORWARD = 0x02;
297  static const uint32 REVERSE = 0x04;
298  static const uint32 SA = 0x10;
299 
300  // FM-index type interfaces
301  //
309 
313 
314  typedef rank_dictionary<
315  BWT_BITS,
318  occ_type,
320 
323 
328  FMIndexDataDevice(const FMIndexData& host_data, const uint32 flags = FORWARD | REVERSE);
329 
330  uint64 allocated() const { return m_allocated; }
331 
334  occ_type occ_iterator() const { return occ_type(bwt_occ_type((const uint4*) bwt_occ())); }
335  occ_type rocc_iterator() const { return occ_type(bwt_occ_type((const uint4*)rbwt_occ())); }
336 
337  bwt_type bwt_iterator() const { return bwt_type(bwt_occ_type((const uint4*) bwt_occ())); }
338  bwt_type rbwt_iterator() const { return bwt_type(bwt_occ_type((const uint4*)rbwt_occ())); }
339 
342 
344 
347 
350 
353 
354 private:
355  uint64 m_allocated;
356  nvbio::vector<device_tag,uint32> m_bwt_occ_vec;
357  nvbio::vector<device_tag,uint32> m_rbwt_occ_vec;
360  nvbio::vector<device_tag,uint32> m_count_table_vec;
362 };
363 
366 void init_ssa(
367  const FMIndexDataDevice& driver_data,
370 
373 
374 } // namespace io
375 } // namespace nvbio