NVBIO
Main Page
Modules
Classes
Examples
File List
File Members
•
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
nvbio
io
fmindex
fmindex.h
Go to the documentation of this file.
1
/*
2
* nvbio
3
* Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4
*
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions are met:
7
* * Redistributions of source code must retain the above copyright
8
* notice, this list of conditions and the following disclaimer.
9
* * Redistributions in binary form must reproduce the above copyright
10
* notice, this list of conditions and the following disclaimer in the
11
* documentation and/or other materials provided with the distribution.
12
* * Neither the name of the NVIDIA CORPORATION nor the
13
* names of its contributors may be used to endorse or promote products
14
* derived from this software without specific prior written permission.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
*/
27
28
#pragma once
29
30
#include <stdio.h>
31
#include <stdlib.h>
32
#include <vector>
33
#include <algorithm>
34
#include <
nvbio/basic/mmap.h
>
35
#include <
nvbio/basic/vector.h
>
36
#include <
nvbio/basic/deinterleaved_iterator.h
>
37
#include <
nvbio/basic/cuda/ldg.h
>
38
#include <
nvbio/fmindex/fmindex.h
>
39
#include <
nvbio/fmindex/ssa.h
>
40
41
namespace
nvbio {
44
namespace
io {
46
64
67
76
84
struct
FMIndexDataCore
85
{
86
static
const
uint32
FORWARD
= 0x02;
87
static
const
uint32
REVERSE
= 0x04;
88
static
const
uint32
SA
= 0x10;
89
90
static
const
uint32
BWT_BITS
= 2u;
// NOTE: DNA alphabet
91
static
const
bool
BWT_BIG_ENDIAN
=
true
;
// NOTE: needs to be true to allow fast BWT construction
92
static
const
uint32
BWT_SYMBOLS_PER_WORD
= (8*
sizeof
(
uint32
))/
BWT_BITS
;
93
94
static
const
uint32
OCC_INT
= 64;
95
static
const
uint32
SA_INT
= 16;
96
97
typedef
const
uint32
*
bwt_occ_type
;
98
typedef
const
uint32
*
count_table_type
;
99
100
typedef
SSA_index_multiple_context
<
101
SA_INT
,
102
const
uint32
*>
ssa_type
;
103
106
FMIndexDataCore
() :
107
m_flags
( 0 ),
108
m_seq_length
( 0 ),
109
m_bwt_occ_words
( 0 ),
110
m_sa_words
( 0 ),
111
m_primary
( 0 ),
112
m_rprimary
( 0 ),
113
m_L2
( NULL ),
114
m_bwt_occ
( NULL ),
115
m_rbwt_occ
( NULL ),
116
m_count_table
( NULL )
117
{}
118
119
uint32
flags
()
const
{
return
m_flags
; }
120
uint32
length
()
const
{
return
m_seq_length
; }
121
uint32
primary
()
const
{
return
m_primary
; }
122
uint32
rprimary
()
const
{
return
m_rprimary
; }
123
bool
has_ssa
()
const
{
return
m_ssa
.
m_ssa
!= NULL; }
124
bool
has_rssa
()
const
{
return
m_rssa
.
m_ssa
!= NULL; }
125
const
uint32
*
bwt_occ
()
const
{
return
m_bwt_occ
; }
126
const
uint32
*
rbwt_occ
()
const
{
return
m_rbwt_occ
; }
127
const
uint32
*
count_table
()
const
{
return
m_count_table
; }
128
uint32
bwt_occ_words
()
const
{
return
m_bwt_occ_words
; }
129
uint32
sa_words
()
const
{
return
m_sa_words
; }
130
ssa_type
ssa
()
const
{
return
m_ssa
; }
131
ssa_type
rssa
()
const
{
return
m_rssa
; }
132
const
uint32
*
L2
()
const
{
return
m_L2
; }
133
134
public
:
135
uint32
m_flags
;
136
uint32
m_seq_length
;
137
uint32
m_bwt_occ_words
;
138
uint32
m_sa_words
;
139
uint32
m_primary
;
140
uint32
m_rprimary
;
141
142
uint32
*
m_L2
;
143
uint32
*
m_bwt_occ
;
144
uint32
*
m_rbwt_occ
;
145
uint32
*
m_count_table
;
146
ssa_type
m_ssa
;
147
ssa_type
m_rssa
;
148
};
149
157
struct
FMIndexData
:
public
FMIndexDataCore
158
{
159
typedef
const
uint4*
bwt_occ_type
;
160
typedef
deinterleaved_iterator<2,0,bwt_occ_type>
bwt_type
;
161
typedef
deinterleaved_iterator<2,1,bwt_occ_type>
occ_type
;
162
163
typedef
const
uint32
*
count_table_type
;
164
typedef
SSA_index_multiple<SA_INT>
ssa_storage_type
;
165
typedef
PackedStream<bwt_type,uint8,BWT_BITS,BWT_BIG_ENDIAN>
bwt_stream_type
;
166
167
typedef
rank_dictionary
<
168
BWT_BITS
,
169
FMIndexDataCore::OCC_INT
,
170
bwt_stream_type
,
171
occ_type
,
172
count_table_type
>
rank_dict_type
;
173
174
typedef
fm_index<rank_dict_type, ssa_type>
fm_index_type
;
175
typedef
fm_index<rank_dict_type, null_type>
partial_fm_index_type
;
176
177
FMIndexData
();
178
virtual
~FMIndexData
() {}
179
182
occ_type
occ_iterator
()
const
{
return
occ_type
(
bwt_occ_type
(
bwt_occ
())); }
183
occ_type
rocc_iterator
()
const
{
return
occ_type
(
bwt_occ_type
(
rbwt_occ
())); }
184
185
bwt_type
bwt_iterator
()
const
{
return
bwt_type
(
bwt_occ_type
(
bwt_occ
())); }
186
bwt_type
rbwt_iterator
()
const
{
return
bwt_type
(
bwt_occ_type
(
rbwt_occ
())); }
187
188
ssa_type
ssa_iterator
()
const
{
return
ssa
(); }
189
ssa_type
rssa_iterator
()
const
{
return
rssa
(); }
190
191
count_table_type
count_table_iterator
()
const
{
return
count_table_type
(
count_table
() ); }
192
193
rank_dict_type
rank_dict
()
const
{
return
rank_dict_type
(
bwt_stream_type
(
bwt_iterator
() ),
occ_iterator
(),
count_table_iterator
() ); }
194
rank_dict_type
rrank_dict
()
const
{
return
rank_dict_type
(
bwt_stream_type
(
rbwt_iterator
() ),
rocc_iterator
(),
count_table_iterator
() ); }
195
196
fm_index_type
index
()
const
{
return
fm_index_type
(
length
(),
primary
(),
L2
(),
rank_dict
(),
ssa_iterator
() ); }
197
fm_index_type
rindex
()
const
{
return
fm_index_type
(
length
(),
rprimary
(),
L2
(),
rrank_dict
(),
rssa_iterator
() ); }
198
199
partial_fm_index_type
partial_index
()
const
{
return
partial_fm_index_type
(
length
(),
primary
(),
L2
(),
rank_dict
(),
null_type
() ); }
200
partial_fm_index_type
rpartial_index
()
const
{
return
partial_fm_index_type
(
length
(),
rprimary
(),
L2
(),
rrank_dict
(),
null_type
() ); }
201
};
202
203
void
init_ssa
(
204
const
FMIndexData& driver_data,
205
FMIndexData::ssa_storage_type
& ssa,
206
FMIndexData::ssa_storage_type
& rssa);
207
211
struct
FMIndexDataHost
:
public
FMIndexData
212
{
217
int
load
(
218
const
char
* genome_prefix,
219
const
uint32
flags
=
FORWARD
|
REVERSE
|
SA
);
220
221
nvbio::vector<host_tag,uint32>
m_bwt_occ_vec
;
222
nvbio::vector<host_tag,uint32>
m_rbwt_occ_vec
;
223
nvbio::vector<host_tag,uint32>
m_ssa_vec
;
224
nvbio::vector<host_tag,uint32>
m_rssa_vec
;
225
uint32
m_count_table_vec
[256];
226
uint32
m_L2_vec
[5];
227
};
228
229
struct
FMIndexDataMMAPInfo
230
{
231
uint32
sequence_length
;
232
uint32
bwt_occ_words
;
233
uint32
sa_words
;
234
uint32
primary
;
235
uint32
rprimary
;
236
uint32
L2
[5];
237
};
238
243
struct
FMIndexDataMMAPServer
:
public
FMIndexData
244
{
245
typedef
FMIndexDataMMAPInfo
Info
;
246
251
int
load
(
252
const
char
* genome_prefix,
const
char
* mapped_name);
253
254
private
:
255
Info
m_info;
256
ServerMappedFile
m_bwt_occ_file;
257
ServerMappedFile
m_rbwt_occ_file;
258
ServerMappedFile
m_sa_file;
259
ServerMappedFile
m_rsa_file;
260
ServerMappedFile
m_info_file;
261
262
uint32
m_count_table_vec[256];
263
uint32
m_L2_vec[5];
264
};
265
270
struct
FMIndexDataMMAP
:
public
FMIndexData
271
{
272
typedef
FMIndexDataMMAPInfo
Info
;
273
277
int
load
(
278
const
char
* genome_name);
279
280
MappedFile
m_bwt_occ_file
;
281
MappedFile
m_rbwt_occ_file
;
282
MappedFile
m_sa_file
;
283
MappedFile
m_rsa_file
;
284
MappedFile
m_info_file
;
285
286
uint32
m_count_table_vec
[256];
287
uint32
m_L2_vec
[5];
288
};
289
294
struct
FMIndexDataDevice
:
public
FMIndexData
295
{
296
static
const
uint32
FORWARD
= 0x02;
297
static
const
uint32
REVERSE
= 0x04;
298
static
const
uint32
SA
= 0x10;
299
300
// FM-index type interfaces
301
//
302
typedef
cuda::ldg_pointer<uint4>
bwt_occ_type
;
303
typedef
deinterleaved_iterator<2,0,bwt_occ_type>
bwt_type
;
304
typedef
deinterleaved_iterator<2,1,bwt_occ_type>
occ_type
;
305
typedef
cuda::ldg_pointer<uint32>
count_table_type
;
306
typedef
cuda::ldg_pointer<uint32>
ssa_ldg_type
;
307
typedef
SSA_index_multiple_device<SA_INT>
ssa_storage_type
;
308
typedef
PackedStream<bwt_type,uint8,BWT_BITS,BWT_BIG_ENDIAN>
bwt_stream_type
;
309
310
typedef
SSA_index_multiple_context
<
311
FMIndexDataCore::SA_INT
,
312
ssa_ldg_type
>
ssa_type
;
313
314
typedef
rank_dictionary
<
315
BWT_BITS
,
316
FMIndexDataCore::OCC_INT
,
317
bwt_stream_type
,
318
occ_type
,
319
count_table_type
>
rank_dict_type
;
320
321
typedef
fm_index<rank_dict_type,ssa_type>
fm_index_type
;
322
typedef
fm_index<rank_dict_type,null_type>
partial_fm_index_type
;
323
328
FMIndexDataDevice
(
const
FMIndexData
& host_data,
const
uint32
flags
=
FORWARD
|
REVERSE
);
329
330
uint64
allocated
()
const
{
return
m_allocated; }
331
334
occ_type
occ_iterator
()
const
{
return
occ_type
(
bwt_occ_type
((
const
uint4*)
bwt_occ
())); }
335
occ_type
rocc_iterator
()
const
{
return
occ_type
(
bwt_occ_type
((
const
uint4*)
rbwt_occ
())); }
336
337
bwt_type
bwt_iterator
()
const
{
return
bwt_type
(
bwt_occ_type
((
const
uint4*)
bwt_occ
())); }
338
bwt_type
rbwt_iterator
()
const
{
return
bwt_type
(
bwt_occ_type
((
const
uint4*)
rbwt_occ
())); }
339
340
ssa_type
ssa_iterator
()
const
{
return
ssa_type
(
ssa_ldg_type
(
m_ssa
.
m_ssa
)); }
341
ssa_type
rssa_iterator
()
const
{
return
ssa_type
(
ssa_ldg_type
(
m_rssa
.
m_ssa
)); }
342
343
count_table_type
count_table_iterator
()
const
{
return
count_table_type
(
count_table
() ); }
344
345
rank_dict_type
rank_dict
()
const
{
return
rank_dict_type
(
bwt_stream_type
(
bwt_iterator
() ),
occ_iterator
(),
count_table_iterator
() ); }
346
rank_dict_type
rrank_dict
()
const
{
return
rank_dict_type
(
bwt_stream_type
(
rbwt_iterator
() ),
rocc_iterator
(),
count_table_iterator
() ); }
347
348
fm_index_type
index
()
const
{
return
fm_index_type
(
length
(),
primary
(),
L2
(),
rank_dict
(),
ssa_iterator
() ); }
349
fm_index_type
rindex
()
const
{
return
fm_index_type
(
length
(),
rprimary
(),
L2
(),
rrank_dict
(),
rssa_iterator
() ); }
350
351
partial_fm_index_type
partial_index
()
const
{
return
partial_fm_index_type
(
length
(),
primary
(),
L2
(),
rank_dict
(),
null_type
() ); }
352
partial_fm_index_type
rpartial_index
()
const
{
return
partial_fm_index_type
(
length
(),
rprimary
(),
L2
(),
rrank_dict
(),
null_type
() ); }
353
354
private
:
355
uint64
m_allocated;
356
nvbio::vector<device_tag,uint32>
m_bwt_occ_vec;
357
nvbio::vector<device_tag,uint32>
m_rbwt_occ_vec;
358
nvbio::vector<device_tag,uint32>
m_ssa_vec;
359
nvbio::vector<device_tag,uint32>
m_rssa_vec;
360
nvbio::vector<device_tag,uint32>
m_count_table_vec;
361
nvbio::vector<device_tag,uint32>
m_L2_vec;
362
};
363
366
void
init_ssa
(
367
const
FMIndexDataDevice& driver_data,
368
FMIndexDataDevice::ssa_storage_type
& ssa,
369
FMIndexDataDevice::ssa_storage_type
& rssa);
370
373
374
}
// namespace io
375
}
// namespace nvbio
Generated on Wed Feb 25 2015 08:32:58 for NVBIO by
1.8.4