NVBIO
Main Page
Modules
Classes
Examples
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
nvbio
io
fmindex
fmindex.h
Go to the documentation of this file.
1
/*
2
* nvbio
3
* Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4
*
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions are met:
7
* * Redistributions of source code must retain the above copyright
8
* notice, this list of conditions and the following disclaimer.
9
* * Redistributions in binary form must reproduce the above copyright
10
* notice, this list of conditions and the following disclaimer in the
11
* documentation and/or other materials provided with the distribution.
12
* * Neither the name of the NVIDIA CORPORATION nor the
13
* names of its contributors may be used to endorse or promote products
14
* derived from this software without specific prior written permission.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
*/
27
28
#pragma once
29
30
#include <stdio.h>
31
#include <stdlib.h>
32
#include <vector>
33
#include <algorithm>
34
#include <
nvbio/basic/mmap.h
>
35
#include <
nvbio/basic/vector.h
>
36
#include <
nvbio/basic/deinterleaved_iterator.h
>
37
#include <
nvbio/basic/cuda/ldg.h
>
38
#include <
nvbio/fmindex/fmindex.h
>
39
#include <
nvbio/fmindex/ssa.h
>
40
41
namespace
nvbio {
44
namespace
io {
46
64
67
76
84
struct
FMIndexDataCore
85
{
86
static
const
uint32
FORWARD
= 0x02;
87
static
const
uint32
REVERSE
= 0x04;
88
static
const
uint32
SA
= 0x10;
89
90
static
const
uint32
BWT_BITS
= 2u;
// NOTE: DNA alphabet
91
static
const
bool
BWT_BIG_ENDIAN
=
true
;
// NOTE: needs to be true to allow fast BWT construction
92
static
const
uint32
BWT_SYMBOLS_PER_WORD
= (8*
sizeof
(
uint32
))/
BWT_BITS
;
93
94
static
const
uint32
OCC_INT
= 64;
95
static
const
uint32
SA_INT
= 16;
96
97
typedef
const
uint32
*
bwt_occ_type
;
98
typedef
const
uint32
*
count_table_type
;
99
100
typedef
SSA_index_multiple_context
<
101
SA_INT
,
102
const
uint32
*>
ssa_type
;
103
106
FMIndexDataCore
() :
107
m_flags
( 0 ),
108
m_seq_length
( 0 ),
109
m_bwt_occ_words
( 0 ),
110
m_sa_words
( 0 ),
111
m_primary
( 0 ),
112
m_rprimary
( 0 ),
113
m_L2
( NULL ),
114
m_bwt_occ
( NULL ),
115
m_rbwt_occ
( NULL ),
116
m_count_table
( NULL )
117
{}
118
119
uint32
flags
()
const
{
return
m_flags
; }
120
uint32
length
()
const
{
return
m_seq_length
; }
121
uint32
primary
()
const
{
return
m_primary
; }
122
uint32
rprimary
()
const
{
return
m_rprimary
; }
123
bool
has_ssa
()
const
{
return
m_ssa
.
m_ssa
!= NULL; }
124
bool
has_rssa
()
const
{
return
m_rssa
.
m_ssa
!= NULL; }
125
const
uint32
*
bwt_occ
()
const
{
return
m_bwt_occ
; }
126
const
uint32
*
rbwt_occ
()
const
{
return
m_rbwt_occ
; }
127
const
uint32
*
count_table
()
const
{
return
m_count_table
; }
128
uint32
bwt_occ_words
()
const
{
return
m_bwt_occ_words
; }
129
uint32
sa_words
()
const
{
return
m_sa_words
; }
130
ssa_type
ssa
()
const
{
return
m_ssa
; }
131
ssa_type
rssa
()
const
{
return
m_rssa
; }
132
const
uint32
*
L2
()
const
{
return
m_L2
; }
133
134
public
:
135
uint32
m_flags
;
136
uint32
m_seq_length
;
137
uint32
m_bwt_occ_words
;
138
uint32
m_sa_words
;
139
uint32
m_primary
;
140
uint32
m_rprimary
;
141
142
uint32
*
m_L2
;
143
uint32
*
m_bwt_occ
;
144
uint32
*
m_rbwt_occ
;
145
uint32
*
m_count_table
;
146
ssa_type
m_ssa
;
147
ssa_type
m_rssa
;
148
};
149
157
struct
FMIndexData
:
public
FMIndexDataCore
158
{
159
typedef
const
uint4*
bwt_occ_type
;
160
typedef
deinterleaved_iterator<2,0,bwt_occ_type>
bwt_type
;
161
typedef
deinterleaved_iterator<2,1,bwt_occ_type>
occ_type
;
162
163
typedef
const
uint32
*
count_table_type
;
164
typedef
SSA_index_multiple<SA_INT>
ssa_storage_type
;
165
typedef
PackedStream<bwt_type,uint8,BWT_BITS,BWT_BIG_ENDIAN>
bwt_stream_type
;
166
167
typedef
rank_dictionary
<
168
BWT_BITS
,
169
FMIndexDataCore::OCC_INT
,
170
bwt_stream_type
,
171
occ_type
,
172
count_table_type
>
rank_dict_type
;
173
174
typedef
fm_index<rank_dict_type, ssa_type>
fm_index_type
;
175
typedef
fm_index<rank_dict_type, null_type>
partial_fm_index_type
;
176
177
FMIndexData
();
178
virtual
~FMIndexData
() {}
179
182
occ_type
occ_iterator
()
const
{
return
occ_type
(
bwt_occ_type
(
bwt_occ
())); }
183
occ_type
rocc_iterator
()
const
{
return
occ_type
(
bwt_occ_type
(
rbwt_occ
())); }
184
185
bwt_type
bwt_iterator
()
const
{
return
bwt_type
(
bwt_occ_type
(
bwt_occ
())); }
186
bwt_type
rbwt_iterator
()
const
{
return
bwt_type
(
bwt_occ_type
(
rbwt_occ
())); }
187
188
ssa_type
ssa_iterator
()
const
{
return
ssa
(); }
189
ssa_type
rssa_iterator
()
const
{
return
rssa
(); }
190
191
count_table_type
count_table_iterator
()
const
{
return
count_table_type
(
count_table
() ); }
192
193
rank_dict_type
rank_dict
()
const
{
return
rank_dict_type
(
bwt_stream_type
(
bwt_iterator
() ),
occ_iterator
(),
count_table_iterator
() ); }
194
rank_dict_type
rrank_dict
()
const
{
return
rank_dict_type
(
bwt_stream_type
(
rbwt_iterator
() ),
rocc_iterator
(),
count_table_iterator
() ); }
195
196
fm_index_type
index
()
const
{
return
fm_index_type
(
length
(),
primary
(),
L2
(),
rank_dict
(),
ssa_iterator
() ); }
197
fm_index_type
rindex
()
const
{
return
fm_index_type
(
length
(),
rprimary
(),
L2
(),
rrank_dict
(),
rssa_iterator
() ); }
198
199
partial_fm_index_type
partial_index
()
const
{
return
partial_fm_index_type
(
length
(),
primary
(),
L2
(),
rank_dict
(),
null_type
() ); }
200
partial_fm_index_type
rpartial_index
()
const
{
return
partial_fm_index_type
(
length
(),
rprimary
(),
L2
(),
rrank_dict
(),
null_type
() ); }
201
};
202
203
void
init_ssa
(
204
const
FMIndexData& driver_data,
205
FMIndexData::ssa_storage_type
& ssa,
206
FMIndexData::ssa_storage_type
& rssa);
207
211
struct
FMIndexDataHost
:
public
FMIndexData
212
{
217
int
load
(
218
const
char
* genome_prefix,
219
const
uint32
flags
=
FORWARD
|
REVERSE
|
SA
);
220
221
nvbio::vector<host_tag,uint32>
m_bwt_occ_vec
;
222
nvbio::vector<host_tag,uint32>
m_rbwt_occ_vec
;
223
nvbio::vector<host_tag,uint32>
m_ssa_vec
;
224
nvbio::vector<host_tag,uint32>
m_rssa_vec
;
225
uint32
m_count_table_vec
[256];
226
uint32
m_L2_vec
[5];
227
};
228
229
struct
FMIndexDataMMAPInfo
230
{
231
uint32
sequence_length
;
232
uint32
bwt_occ_words
;
233
uint32
sa_words
;
234
uint32
primary
;
235
uint32
rprimary
;
236
uint32
L2
[5];
237
};
238
243
struct
FMIndexDataMMAPServer
:
public
FMIndexData
244
{
245
typedef
FMIndexDataMMAPInfo
Info
;
246
251
int
load
(
252
const
char
* genome_prefix,
const
char
* mapped_name);
253
254
private
:
255
Info
m_info;
256
ServerMappedFile
m_bwt_occ_file;
257
ServerMappedFile
m_rbwt_occ_file;
258
ServerMappedFile
m_sa_file;
259
ServerMappedFile
m_rsa_file;
260
ServerMappedFile
m_info_file;
261
262
uint32
m_count_table_vec[256];
263
uint32
m_L2_vec[5];
264
};
265
270
struct
FMIndexDataMMAP
:
public
FMIndexData
271
{
272
typedef
FMIndexDataMMAPInfo
Info
;
273
277
int
load
(
278
const
char
* genome_name);
279
280
MappedFile
m_bwt_occ_file
;
281
MappedFile
m_rbwt_occ_file
;
282
MappedFile
m_sa_file
;
283
MappedFile
m_rsa_file
;
284
MappedFile
m_info_file
;
285
286
uint32
m_count_table_vec
[256];
287
uint32
m_L2_vec
[5];
288
};
289
294
struct
FMIndexDataDevice
:
public
FMIndexData
295
{
296
static
const
uint32
FORWARD
= 0x02;
297
static
const
uint32
REVERSE
= 0x04;
298
static
const
uint32
SA
= 0x10;
299
300
// FM-index type interfaces
301
//
302
typedef
cuda::ldg_pointer<uint4>
bwt_occ_type
;
303
typedef
deinterleaved_iterator<2,0,bwt_occ_type>
bwt_type
;
304
typedef
deinterleaved_iterator<2,1,bwt_occ_type>
occ_type
;
305
typedef
cuda::ldg_pointer<uint32>
count_table_type
;
306
typedef
cuda::ldg_pointer<uint32>
ssa_ldg_type
;
307
typedef
SSA_index_multiple_device<SA_INT>
ssa_storage_type
;
308
typedef
PackedStream<bwt_type,uint8,BWT_BITS,BWT_BIG_ENDIAN>
bwt_stream_type
;
309
310
typedef
SSA_index_multiple_context
<
311
FMIndexDataCore::SA_INT
,
312
ssa_ldg_type
>
ssa_type
;
313
314
typedef
rank_dictionary
<
315
BWT_BITS
,
316
FMIndexDataCore::OCC_INT
,
317
bwt_stream_type
,
318
occ_type
,
319
count_table_type
>
rank_dict_type
;
320
321
typedef
fm_index<rank_dict_type,ssa_type>
fm_index_type
;
322
typedef
fm_index<rank_dict_type,null_type>
partial_fm_index_type
;
323
328
FMIndexDataDevice
(
const
FMIndexData
& host_data,
const
uint32
flags
=
FORWARD
|
REVERSE
);
329
330
uint64
allocated
()
const
{
return
m_allocated; }
331
334
occ_type
occ_iterator
()
const
{
return
occ_type
(
bwt_occ_type
((
const
uint4*)
bwt_occ
())); }
335
occ_type
rocc_iterator
()
const
{
return
occ_type
(
bwt_occ_type
((
const
uint4*)
rbwt_occ
())); }
336
337
bwt_type
bwt_iterator
()
const
{
return
bwt_type
(
bwt_occ_type
((
const
uint4*)
bwt_occ
())); }
338
bwt_type
rbwt_iterator
()
const
{
return
bwt_type
(
bwt_occ_type
((
const
uint4*)
rbwt_occ
())); }
339
340
ssa_type
ssa_iterator
()
const
{
return
ssa_type
(
ssa_ldg_type
(
m_ssa
.
m_ssa
)); }
341
ssa_type
rssa_iterator
()
const
{
return
ssa_type
(
ssa_ldg_type
(
m_rssa
.
m_ssa
)); }
342
343
count_table_type
count_table_iterator
()
const
{
return
count_table_type
(
count_table
() ); }
344
345
rank_dict_type
rank_dict
()
const
{
return
rank_dict_type
(
bwt_stream_type
(
bwt_iterator
() ),
occ_iterator
(),
count_table_iterator
() ); }
346
rank_dict_type
rrank_dict
()
const
{
return
rank_dict_type
(
bwt_stream_type
(
rbwt_iterator
() ),
rocc_iterator
(),
count_table_iterator
() ); }
347
348
fm_index_type
index
()
const
{
return
fm_index_type
(
length
(),
primary
(),
L2
(),
rank_dict
(),
ssa_iterator
() ); }
349
fm_index_type
rindex
()
const
{
return
fm_index_type
(
length
(),
rprimary
(),
L2
(),
rrank_dict
(),
rssa_iterator
() ); }
350
351
partial_fm_index_type
partial_index
()
const
{
return
partial_fm_index_type
(
length
(),
primary
(),
L2
(),
rank_dict
(),
null_type
() ); }
352
partial_fm_index_type
rpartial_index
()
const
{
return
partial_fm_index_type
(
length
(),
rprimary
(),
L2
(),
rrank_dict
(),
null_type
() ); }
353
354
private
:
355
uint64
m_allocated;
356
nvbio::vector<device_tag,uint32>
m_bwt_occ_vec;
357
nvbio::vector<device_tag,uint32>
m_rbwt_occ_vec;
358
nvbio::vector<device_tag,uint32>
m_ssa_vec;
359
nvbio::vector<device_tag,uint32>
m_rssa_vec;
360
nvbio::vector<device_tag,uint32>
m_count_table_vec;
361
nvbio::vector<device_tag,uint32>
m_L2_vec;
362
};
363
366
void
init_ssa
(
367
const
FMIndexDataDevice& driver_data,
368
FMIndexDataDevice::ssa_storage_type
& ssa,
369
FMIndexDataDevice::ssa_storage_type
& rssa);
370
373
374
}
// namespace io
375
}
// namespace nvbio
Generated on Wed Feb 25 2015 08:32:58 for NVBIO by
1.8.4