NVBIO
Main Page
Modules
Classes
Examples
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
nvbio
strings
wavelet_tree.h
Go to the documentation of this file.
1
/*
2
* nvbio
3
* Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4
*
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions are met:
7
* * Redistributions of source code must retain the above copyright
8
* notice, this list of conditions and the following disclaimer.
9
* * Redistributions in binary form must reproduce the above copyright
10
* notice, this list of conditions and the following disclaimer in the
11
* documentation and/or other materials provided with the distribution.
12
* * Neither the name of the NVIDIA CORPORATION nor the
13
* names of its contributors may be used to endorse or promote products
14
* derived from this software without specific prior written permission.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
*/
27
28
#pragma once
29
30
#include <
nvbio/strings/string.h
>
31
#include <
nvbio/basic/packed_vector.h
>
32
#include <
nvbio/basic/primitives.h
>
33
#include <
nvbio/basic/cuda/sort.h
>
34
#include <thrust/sort.h>
35
#include <algorithm>
36
#include <stack>
37
38
namespace
nvbio {
39
42
57
60
76
template
<
typename
BitStreamIterator,
typename
IndexIterator,
typename
SymbolType = u
int
8>
77
struct
WaveletTree
78
{
79
// define the system tag
80
typedef
typename
iterator_system<BitStreamIterator>::type
system_tag
;
81
typedef
typename
stream_traits<BitStreamIterator>::index_type
index_type
;
82
typedef
SymbolType
symbol_type
;
83
typedef
SymbolType
value_type
;
84
typedef
BitStreamIterator
bit_iterator
;
85
typedef
IndexIterator
index_iterator
;
86
typedef
WaveletTree<BitStreamIterator,IndexIterator>
text_type
;
// the text is the wavelet tree itself
87
88
typedef
typename
vector_type<index_type,2>::type
range_type
;
89
typedef
null_type
vector_type
;
// unsupported, would require knowing alphabet size
90
93
NVBIO_HOST_DEVICE
94
WaveletTree
(
95
const
uint32
_symbol_size = 0,
96
const
index_type
_size = 0,
97
const
BitStreamIterator _bits = BitStreamIterator(),
98
const
IndexIterator _nodes = IndexIterator(),
99
const
IndexIterator _occ = IndexIterator()) :
100
m_symbol_size
( _symbol_size ),
101
m_size
( _size ),
102
m_bits
( _bits ),
103
m_nodes
( _nodes ),
104
m_occ
( _occ ) {}
105
108
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE
109
void
resize
(
const
uint32
_size,
const
uint32
_symbol_size)
110
{
111
m_size
= _size;
112
m_symbol_size
= _symbol_size;
113
}
114
117
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE
118
uint32
symbol_count
()
const
{
return
1u <<
m_symbol_size
; }
119
122
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE
123
uint32
symbol_size
()
const
{
return
m_symbol_size
; }
124
127
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE
128
index_type
size
()
const
{
return
m_size
; }
129
132
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE
133
BitStreamIterator
bits
()
const
{
return
m_bits
; }
134
137
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE
138
IndexIterator
splits
()
const
{
return
m_nodes
; }
139
142
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE
143
IndexIterator
occ
()
const
{
return
m_occ
; }
144
147
NVBIO_HOST_DEVICE
148
index_type
rank
(
const
uint32
l,
const
uint32
node,
const
index_type
node_begin,
const
index_type
r,
const
uint8
b)
const
;
149
152
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE
153
SymbolType
operator[]
(
const
index_type
i)
const
;
154
157
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE
158
SymbolType
operator()
(
const
index_type
i)
const
{
return
this->
operator[]
(i); }
159
160
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE
text_type
&
text
() {
return
*
this
; }
161
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE
const
text_type
&
text
()
const
{
return
*
this
; }
162
163
uint32
m_symbol_size
;
164
index_type
m_size
;
165
BitStreamIterator
m_bits
;
166
IndexIterator
m_nodes
;
167
IndexIterator
m_occ
;
168
};
169
178
template
<
typename
SystemTag,
typename
IndexType = u
int
32,
typename
SymbolType = u
int
8>
179
struct
WaveletTreeStorage
180
{
181
// define the system tag
182
typedef
SystemTag
system_tag
;
183
typedef
IndexType
index_type
;
184
typedef
SymbolType
symbol_type
;
185
186
typedef
PackedVector<system_tag,1u,true,index_type>
bit_vector_type
;
187
typedef
nvbio::vector<system_tag,index_type>
index_vector_type
;
188
typedef
typename
bit_vector_type::iterator
bit_iterator
;
189
typedef
typename
bit_vector_type::const_iterator
const_bit_iterator
;
190
typedef
typename
index_vector_type::iterator
index_iterator
;
191
typedef
typename
index_vector_type::const_iterator
const_index_iterator
;
192
193
typedef
WaveletTree< bit_iterator, index_iterator>
plain_view_type
;
194
typedef
WaveletTree<const_bit_iterator,const_index_iterator>
const_plain_view_type
;
195
198
WaveletTreeStorage
() :
199
m_symbol_size
( 0u ),
200
m_size
( 0u ) {}
201
204
void
resize
(
const
uint32
_size,
const
uint32
_symbol_size)
205
{
206
m_size
= _size;
207
m_symbol_size
= _symbol_size;
208
209
const
uint32
n_symbols = 1u << _symbol_size;
210
211
m_bits
.
resize
(
m_size
*
m_symbol_size
);
212
m_nodes
.resize( n_symbols );
213
m_occ
.resize( n_symbols +
util::divide_ri
(
m_size
* m_symbol_size, 32u ) );
214
}
215
218
NVBIO_HOST_DEVICE
219
uint32
symbol_size
()
const
{
return
m_symbol_size
; }
220
223
NVBIO_HOST_DEVICE
224
index_type
size
()
const
{
return
m_size
; }
225
228
bit_iterator
bits
() {
return
m_bits
.
begin
(); }
229
232
index_iterator
splits
() {
return
m_nodes
.begin(); }
233
236
index_iterator
occ
() {
return
m_occ
.begin(); }
237
240
const_bit_iterator
bits
()
const
{
return
m_bits
.
begin
(); }
241
244
const_index_iterator
splits
()
const
{
return
m_nodes
.begin(); }
245
248
const_index_iterator
occ
()
const
{
return
m_occ
.begin(); }
249
250
operator
plain_view_type
()
251
{
252
return
plain_view_type
(
253
m_symbol_size
,
254
m_size
,
255
bits
(),
256
splits
(),
257
occ
() );
258
}
259
operator
const_plain_view_type
()
const
260
{
261
return
const_plain_view_type
(
262
m_symbol_size
,
263
m_size
,
264
bits
(),
265
splits
(),
266
occ
() );
267
}
268
269
uint32
m_symbol_size
;
270
index_type
m_size
;
271
bit_vector_type
m_bits
;
272
index_vector_type
m_nodes
;
273
index_vector_type
m_occ
;
274
};
275
276
290
template
<
typename
system_tag,
typename
string
_iterator,
typename
index_type,
typename
symbol_type>
291
void
setup
(
292
const
index_type string_len,
293
const
string_iterator
&
string
,
294
WaveletTreeStorage<system_tag,index_type,symbol_type>
& out_tree);
295
302
template
<
typename
BitStreamIterator,
typename
IndexIterator,
typename
IndexType,
typename
SymbolType>
303
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE
304
SymbolType
text
(
const
WaveletTree<BitStreamIterator,IndexIterator,SymbolType>
& tree,
const
IndexType i);
305
313
template
<
typename
BitStreamIterator,
typename
IndexIterator,
typename
SymbolType>
314
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE
315
typename
WaveletTree<BitStreamIterator,IndexIterator,SymbolType>::index_type
316
rank
(
317
const
WaveletTree<BitStreamIterator,IndexIterator,SymbolType>
& tree,
318
const
typename
WaveletTree<BitStreamIterator,IndexIterator,SymbolType>::index_type
i,
319
const
uint32
c);
320
328
template
<
typename
BitStreamIterator,
typename
IndexIterator,
typename
SymbolType>
329
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE
330
typename
WaveletTree<BitStreamIterator,IndexIterator,SymbolType>::range_type
331
rank
(
332
const
WaveletTree<BitStreamIterator,IndexIterator,SymbolType>
& tree,
333
const
typename
WaveletTree<BitStreamIterator,IndexIterator,SymbolType>::range_type
range,
334
const
uint32
c);
335
340
template
<
typename
SystemTag,
typename
IndexType,
typename
SymbolType>
341
typename
WaveletTreeStorage<SystemTag,IndexType,SymbolType>::plain_view_type
plain_view
(
WaveletTreeStorage<SystemTag,IndexType,SymbolType>
& tree)
342
{
343
return
tree;
344
}
349
template
<
typename
SystemTag,
typename
IndexType,
typename
SymbolType>
350
typename
WaveletTreeStorage<SystemTag,IndexType,SymbolType>::const_plain_view_type
plain_view
(
const
WaveletTreeStorage<SystemTag,IndexType,SymbolType>
& tree)
351
{
352
return
tree;
353
}
354
357
358
}
// namespace nvbio
359
360
#include <
nvbio/strings/wavelet_tree_inl.h
>
Generated on Wed Feb 25 2015 08:33:03 for NVBIO by
1.8.4