NVBIO
Main Page
Modules
Classes
Examples
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
nvbio
sufsort
bwte.h
Go to the documentation of this file.
1
/*
2
* nvbio
3
* Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4
*
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions are met:
7
* * Redistributions of source code must retain the above copyright
8
* notice, this list of conditions and the following disclaimer.
9
* * Redistributions in binary form must reproduce the above copyright
10
* notice, this list of conditions and the following disclaimer in the
11
* documentation and/or other materials provided with the distribution.
12
* * Neither the name of the NVIDIA CORPORATION nor the
13
* names of its contributors may be used to endorse or promote products
14
* derived from this software without specific prior written permission.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
*/
27
28
#pragma once
29
30
#include <cub/cub.cuh>
31
#include <mgpuhost.cuh>
32
#include <moderngpu.cuh>
33
#include <
nvbio/basic/vector.h
>
34
#include <
nvbio/strings/string_set.h
>
35
#include <
nvbio/sufsort/sufsort.h
>
36
#include <
nvbio/sufsort/sufsort_priv.h
>
37
#include <
nvbio/sufsort/compression_sort.h
>
38
#include <
nvbio/fmindex/paged_text.h
>
39
40
namespace
nvbio {
41
42
//#define QUICK_CHECK
43
//#define QUICK_CHECK_REPORT
44
//#define CHECK_COPY
45
//#define CHECK_INSERTION
46
//#define CHECK_SORTING
47
#if defined(QUICK_CHECK_REPORT) || defined(CHECK_SORTING)
48
#define HOST_STRING_IDS
49
#endif
50
53
60
63
67
struct
BWTEBlock
68
{
69
uint32
max_block_suffixes
;
// reserved space
70
uint32
max_block_strings
;
// reserved space
71
uint32
n_strings
;
// number of strings
72
uint32
n_suffixes
;
// number of suffixes
73
nvbio::vector<host_tag,uint32>
h_SA
;
// host block SA
74
nvbio::vector<host_tag,uint32>
h_cum_lengths
;
// host block string lengths
75
nvbio::vector<host_tag,uint32>
h_string_ids
;
// host block string ids
76
nvbio::vector<host_tag,uint32>
h_dollar_off
;
// host block dollar offsets
77
nvbio::vector<host_tag,uint32>
h_dollar_id
;
// host block dollar ids
78
nvbio::vector<host_tag,uint64>
h_dollar_pos
;
// host block dollar insertion positions
79
nvbio::vector<host_tag,uint8>
h_BWT
;
// host BWT block storage
80
83
void
reserve
(
const
uint32
_max_block_strings,
const
uint32
_max_block_suffixes);
84
};
85
94
template
<
95
uint32
SYMBOL_SIZE,
96
bool
BIG_ENDIAN,
97
typename
storage_type =
const
uint32
*,
98
typename
offsets_iterator =
const
uint64
*>
99
struct
BWTEContext
100
{
101
typedef
typename
std::iterator_traits<offsets_iterator>::value_type
index_type
;
102
typedef
ConcatenatedStringSet
<
103
PackedStream<storage_type,uint8,SYMBOL_SIZE,BIG_ENDIAN,index_type>
,
104
offsets_iterator>
string_set_type
;
105
108
BWTEContext
(
const
int
device);
109
112
uint64
needed_device_memory
(
const
uint32
_max_block_strings,
const
uint32
_max_block_suffixes)
const
;
113
116
void
reserve
(
const
uint32
_max_block_strings,
const
uint32
_max_block_suffixes);
117
127
void
append_block
(
128
const
uint32
block_begin,
129
const
uint32
block_end,
130
const
string_set_type
string_set,
131
PagedText<SYMBOL_SIZE,BIG_ENDIAN>
& BWT_ext,
132
SparseSymbolSet
& BWT_ext_dollars,
133
const
bool
forward);
134
135
// sort the given block
136
//
137
void
sort_block
(
138
const
uint32
block_begin,
139
const
uint32
block_end,
140
const
string_set_type
string_set,
141
BWTEBlock
& block);
142
143
// merge the given sorted block
144
//
145
void
merge_block
(
146
const
uint32
block_begin,
147
const
uint32
block_end,
148
const
string_set_type
string_set,
149
BWTEBlock
& block,
150
PagedText<SYMBOL_SIZE,BIG_ENDIAN>
& BWT_ext,
151
SparseSymbolSet
& BWT_ext_dollars,
152
const
bool
forward);
153
154
private
:
155
typedef
typename
std::iterator_traits<storage_type>::value_type radix_type;
156
157
static
const
uint32
SYMBOL_COUNT = 1u << SYMBOL_SIZE;
158
static
const
uint32
RADIX_BITS =
uint32
( 8u *
sizeof
(radix_type) );
159
static
const
uint32
DOLLAR_BITS = RADIX_BITS <= 32 ? 4 : 5;
160
161
typedef
priv::ChunkLoader<SYMBOL_SIZE,BIG_ENDIAN,storage_type,offsets_iterator,host_tag,device_tag>
chunk_loader_type
;
162
typedef
typename
chunk_loader_type::chunk_set_type
chunk_set_type
;
163
164
typedef
priv::DeviceStringSetRadices<chunk_set_type,SYMBOL_SIZE,DOLLAR_BITS,RADIX_BITS>
string_set_handler_type
;
165
typedef
priv::SetSuffixFlattener<SYMBOL_SIZE>
suffix_flattener_type
;
166
167
static
const
uint32
SORTING_SLICE_SIZE = 2u;
// determines how frequently sorted suffixes are pruned
168
169
// rank the block suffixes wrt BWT_ext
170
//
171
void
rank_block(
172
const
uint32
block_begin,
173
const
uint32
block_end,
174
const
string_set_type
string_set,
175
const
BWTEBlock
& block,
176
PagedText<SYMBOL_SIZE,BIG_ENDIAN>
& BWT_ext,
177
SparseSymbolSet
& BWT_ext_dollars,
178
const
bool
forward);
179
180
// insert the block
181
//
182
void
insert_block(
183
BWTEBlock
& block,
184
PagedText<SYMBOL_SIZE,BIG_ENDIAN>
& BWT_ext,
185
SparseSymbolSet
& BWT_ext_dollars);
186
187
uint32
max_block_suffixes;
188
uint32
max_block_strings;
189
190
uint64
n_strings_ext;
191
uint64
n_suffixes_ext;
192
193
uint64
n_processed_strings;
194
uint64
n_processed_suffixes;
195
196
mgpu::ContextPtr
mgpu_ctxt;
197
string_set_handler_type
string_set_handler;
198
cuda::CompressionSort
string_sorter;
199
suffix_flattener_type
suffixes;
200
chunk_loader_type
chunk_loader;
201
202
BWTEBlock
block;
// sorted block
203
nvbio::vector<host_tag,uint64>
g;
// host insertion positions
204
nvbio::vector<host_tag,uint64>
g_sorted;
// host sorted insertions
205
206
nvbio::vector<device_tag,uint8>
d_BWT_block;
// device block bwt
207
nvbio::vector<device_tag,uint32>
d_dollar_off;
// device block dollar offsets
208
nvbio::vector<device_tag,uint32>
d_dollar_id;
// device block dollar ids
209
210
nvbio::vector<device_tag,uint2>
d_suffixes;
// device localized suffixes
211
212
nvbio::vector<device_tag,uint8>
d_temp_storage;
// device temporary storage
213
nvbio::vector<host_tag, uint8>
h_temp_storage;
// host temporary storage
214
215
float
load_time;
216
float
sort_time;
217
float
copy_time;
218
float
rank_time;
219
float
insert_time;
220
float
insert_dollars_time;
221
};
222
231
template
<u
int
32 SYMBOL_SIZE,
bool
BIG_ENDIAN,
typename
storage_type,
typename
offsets_iterator>
232
void
bwte
(
233
const
ConcatenatedStringSet
<
234
PackedStream
<storage_type,
uint8
,SYMBOL_SIZE,BIG_ENDIAN,
typename
std::iterator_traits<offsets_iterator>::value_type>,
235
offsets_iterator> string_set,
236
PagedText<SYMBOL_SIZE,BIG_ENDIAN>
& BWT_ext,
237
SparseSymbolSet
& BWT_ext_dollars,
238
BWTParams
* params = NULL);
239
242
243
}
// namespace nvbio
244
245
#include <
nvbio/sufsort/bwte_inl.h
>
Generated on Wed Feb 25 2015 08:33:03 for NVBIO by
1.8.4