NVBIO
Main Page
Modules
Classes
Examples
File List
File Members
•
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
nvbio
sufsort
bwte.h
Go to the documentation of this file.
1
/*
2
* nvbio
3
* Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4
*
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions are met:
7
* * Redistributions of source code must retain the above copyright
8
* notice, this list of conditions and the following disclaimer.
9
* * Redistributions in binary form must reproduce the above copyright
10
* notice, this list of conditions and the following disclaimer in the
11
* documentation and/or other materials provided with the distribution.
12
* * Neither the name of the NVIDIA CORPORATION nor the
13
* names of its contributors may be used to endorse or promote products
14
* derived from this software without specific prior written permission.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
*/
27
28
#pragma once
29
30
#include <cub/cub.cuh>
31
#include <mgpuhost.cuh>
32
#include <moderngpu.cuh>
33
#include <
nvbio/basic/vector.h
>
34
#include <
nvbio/strings/string_set.h
>
35
#include <
nvbio/sufsort/sufsort.h
>
36
#include <
nvbio/sufsort/sufsort_priv.h
>
37
#include <
nvbio/sufsort/compression_sort.h
>
38
#include <
nvbio/fmindex/paged_text.h
>
39
40
namespace
nvbio {
41
42
//#define QUICK_CHECK
43
//#define QUICK_CHECK_REPORT
44
//#define CHECK_COPY
45
//#define CHECK_INSERTION
46
//#define CHECK_SORTING
47
#if defined(QUICK_CHECK_REPORT) || defined(CHECK_SORTING)
48
#define HOST_STRING_IDS
49
#endif
50
53
60
63
67
struct
BWTEBlock
68
{
69
uint32
max_block_suffixes
;
// reserved space
70
uint32
max_block_strings
;
// reserved space
71
uint32
n_strings
;
// number of strings
72
uint32
n_suffixes
;
// number of suffixes
73
nvbio::vector<host_tag,uint32>
h_SA
;
// host block SA
74
nvbio::vector<host_tag,uint32>
h_cum_lengths
;
// host block string lengths
75
nvbio::vector<host_tag,uint32>
h_string_ids
;
// host block string ids
76
nvbio::vector<host_tag,uint32>
h_dollar_off
;
// host block dollar offsets
77
nvbio::vector<host_tag,uint32>
h_dollar_id
;
// host block dollar ids
78
nvbio::vector<host_tag,uint64>
h_dollar_pos
;
// host block dollar insertion positions
79
nvbio::vector<host_tag,uint8>
h_BWT
;
// host BWT block storage
80
83
void
reserve
(
const
uint32
_max_block_strings,
const
uint32
_max_block_suffixes);
84
};
85
94
template
<
95
uint32
SYMBOL_SIZE,
96
bool
BIG_ENDIAN,
97
typename
storage_type =
const
uint32
*,
98
typename
offsets_iterator =
const
uint64
*>
99
struct
BWTEContext
100
{
101
typedef
typename
std::iterator_traits<offsets_iterator>::value_type
index_type
;
102
typedef
ConcatenatedStringSet
<
103
PackedStream<storage_type,uint8,SYMBOL_SIZE,BIG_ENDIAN,index_type>
,
104
offsets_iterator>
string_set_type
;
105
108
BWTEContext
(
const
int
device);
109
112
uint64
needed_device_memory
(
const
uint32
_max_block_strings,
const
uint32
_max_block_suffixes)
const
;
113
116
void
reserve
(
const
uint32
_max_block_strings,
const
uint32
_max_block_suffixes);
117
127
void
append_block
(
128
const
uint32
block_begin,
129
const
uint32
block_end,
130
const
string_set_type
string_set,
131
PagedText<SYMBOL_SIZE,BIG_ENDIAN>
& BWT_ext,
132
SparseSymbolSet
& BWT_ext_dollars,
133
const
bool
forward);
134
135
// sort the given block
136
//
137
void
sort_block
(
138
const
uint32
block_begin,
139
const
uint32
block_end,
140
const
string_set_type
string_set,
141
BWTEBlock
& block);
142
143
// merge the given sorted block
144
//
145
void
merge_block
(
146
const
uint32
block_begin,
147
const
uint32
block_end,
148
const
string_set_type
string_set,
149
BWTEBlock
& block,
150
PagedText<SYMBOL_SIZE,BIG_ENDIAN>
& BWT_ext,
151
SparseSymbolSet
& BWT_ext_dollars,
152
const
bool
forward);
153
154
private
:
155
typedef
typename
std::iterator_traits<storage_type>::value_type radix_type;
156
157
static
const
uint32
SYMBOL_COUNT = 1u << SYMBOL_SIZE;
158
static
const
uint32
RADIX_BITS =
uint32
( 8u *
sizeof
(radix_type) );
159
static
const
uint32
DOLLAR_BITS = RADIX_BITS <= 32 ? 4 : 5;
160
161
typedef
priv::ChunkLoader<SYMBOL_SIZE,BIG_ENDIAN,storage_type,offsets_iterator,host_tag,device_tag>
chunk_loader_type
;
162
typedef
typename
chunk_loader_type::chunk_set_type
chunk_set_type
;
163
164
typedef
priv::DeviceStringSetRadices<chunk_set_type,SYMBOL_SIZE,DOLLAR_BITS,RADIX_BITS>
string_set_handler_type
;
165
typedef
priv::SetSuffixFlattener<SYMBOL_SIZE>
suffix_flattener_type
;
166
167
static
const
uint32
SORTING_SLICE_SIZE = 2u;
// determines how frequently sorted suffixes are pruned
168
169
// rank the block suffixes wrt BWT_ext
170
//
171
void
rank_block(
172
const
uint32
block_begin,
173
const
uint32
block_end,
174
const
string_set_type
string_set,
175
const
BWTEBlock
& block,
176
PagedText<SYMBOL_SIZE,BIG_ENDIAN>
& BWT_ext,
177
SparseSymbolSet
& BWT_ext_dollars,
178
const
bool
forward);
179
180
// insert the block
181
//
182
void
insert_block(
183
BWTEBlock
& block,
184
PagedText<SYMBOL_SIZE,BIG_ENDIAN>
& BWT_ext,
185
SparseSymbolSet
& BWT_ext_dollars);
186
187
uint32
max_block_suffixes;
188
uint32
max_block_strings;
189
190
uint64
n_strings_ext;
191
uint64
n_suffixes_ext;
192
193
uint64
n_processed_strings;
194
uint64
n_processed_suffixes;
195
196
mgpu::ContextPtr
mgpu_ctxt;
197
string_set_handler_type
string_set_handler;
198
cuda::CompressionSort
string_sorter;
199
suffix_flattener_type
suffixes;
200
chunk_loader_type
chunk_loader;
201
202
BWTEBlock
block;
// sorted block
203
nvbio::vector<host_tag,uint64>
g;
// host insertion positions
204
nvbio::vector<host_tag,uint64>
g_sorted;
// host sorted insertions
205
206
nvbio::vector<device_tag,uint8>
d_BWT_block;
// device block bwt
207
nvbio::vector<device_tag,uint32>
d_dollar_off;
// device block dollar offsets
208
nvbio::vector<device_tag,uint32>
d_dollar_id;
// device block dollar ids
209
210
nvbio::vector<device_tag,uint2>
d_suffixes;
// device localized suffixes
211
212
nvbio::vector<device_tag,uint8>
d_temp_storage;
// device temporary storage
213
nvbio::vector<host_tag, uint8>
h_temp_storage;
// host temporary storage
214
215
float
load_time;
216
float
sort_time;
217
float
copy_time;
218
float
rank_time;
219
float
insert_time;
220
float
insert_dollars_time;
221
};
222
231
template
<u
int
32 SYMBOL_SIZE,
bool
BIG_ENDIAN,
typename
storage_type,
typename
offsets_iterator>
232
void
bwte
(
233
const
ConcatenatedStringSet
<
234
PackedStream
<storage_type,
uint8
,SYMBOL_SIZE,BIG_ENDIAN,
typename
std::iterator_traits<offsets_iterator>::value_type>,
235
offsets_iterator> string_set,
236
PagedText<SYMBOL_SIZE,BIG_ENDIAN>
& BWT_ext,
237
SparseSymbolSet
& BWT_ext_dollars,
238
BWTParams
* params = NULL);
239
242
243
}
// namespace nvbio
244
245
#include <
nvbio/sufsort/bwte_inl.h
>
Generated on Wed Feb 25 2015 08:33:03 for NVBIO by
1.8.4