NVBIO
Main Page
Modules
Classes
Examples
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
nvbio
fmindex
paged_text.h
Go to the documentation of this file.
1
/*
2
* nvbio
3
* Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4
*
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions are met:
7
* * Redistributions of source code must retain the above copyright
8
* notice, this list of conditions and the following disclaimer.
9
* * Redistributions in binary form must reproduce the above copyright
10
* notice, this list of conditions and the following disclaimer in the
11
* documentation and/or other materials provided with the distribution.
12
* * Neither the name of the NVIDIA CORPORATION nor the
13
* names of its contributors may be used to endorse or promote products
14
* derived from this software without specific prior written permission.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
*/
27
28
#pragma once
29
30
#include <
nvbio/basic/vector.h
>
31
#include <
nvbio/basic/packedstream.h
>
32
#include <
nvbio/basic/primitives.h
>
33
#include <
nvbio/basic/popcount.h
>
34
#include <
nvbio/basic/algorithms.h
>
35
#include <
nvbio/basic/exceptions.h
>
36
#include <
nvbio/basic/timer.h
>
37
#include <
nvbio/basic/cast_iterator.h
>
38
#include <thrust/adjacent_difference.h>
39
#include <thrust/iterator/counting_iterator.h>
40
#include <thrust/iterator/permutation_iterator.h>
41
#include <thrust/iterator/transform_iterator.h>
42
#include <stack>
43
#include <numeric>
44
45
namespace
nvbio {
46
49
void
build_buckets
(
const
uint64
key_range,
const
uint32
n_keys,
const
uint64
* keys,
const
uint32
bucket_size,
nvbio::vector<host_tag,uint32>
& buckets,
const
bool
upper =
true
);
50
59
template
<u
int
32 SYMBOL_SIZE_T,
bool
BIG_ENDIAN_T>
60
struct
PagedText
61
{
62
typedef
uint64
word_type
;
63
typedef
PackedStream<const word_type*,uint8,SYMBOL_SIZE_T,BIG_ENDIAN_T>
const_packed_page_type
;
64
typedef
PackedStream< word_type*,uint8,SYMBOL_SIZE_T,BIG_ENDIAN_T>
packed_page_type
;
65
66
static
const
uint32
SYMBOL_SIZE
= SYMBOL_SIZE_T;
67
static
const
uint32
SYMBOL_COUNT
= 1u <<
SYMBOL_SIZE
;
68
static
const
uint32
WORD_SIZE
= (8u*
sizeof
(
word_type
));
69
static
const
uint32
SYMBOLS_PER_WORD
=
WORD_SIZE
/
SYMBOL_SIZE
;
70
static
const
uint32
BIG_ENDIAN
= BIG_ENDIAN_T;
71
72
static
const
uint32
LOG_BUCKET_SIZE
= 20u;
73
static
const
uint32
BUCKET_SIZE
= 1u <<
LOG_BUCKET_SIZE
;
74
77
PagedText
(
78
const
uint32
page_size = 512 * 1024,
79
const
uint32
segment_size = 128 * 1024 * 1024,
80
const
uint32
occ_intv = 128);
81
84
~PagedText
();
85
88
uint32
page_count
()
const
{
return
uint32
(
m_offsets
.size() - 1u ); }
89
92
uint64
size
()
const
{
return
m_offsets
.size() ?
m_offsets
.back() : 0u; }
93
96
void
grow
();
97
100
word_type
*
alloc_page
();
101
104
void
release_page
(
word_type
* page);
105
108
const
word_type
*
get_page
(
const
uint32
i)
const
{
return
m_pages
[i]; }
109
112
uint32
get_page_size
(
const
uint32
i)
const
{
return
uint32
(
m_offsets
[i+1] -
m_offsets
[i] ); }
113
116
uint64
get_page_offset
(
const
uint32
i)
const
{
return
m_offsets
[i]; }
117
120
const
uint32
*
get_occ_page
(
const
uint32
i)
const
{
return
(
uint32
*)(
m_pages
[i] +
m_page_size
); }
121
124
uint32
find_page
(
const
uint64
i)
const
;
125
128
uint8
operator[]
(
const
uint64
i)
const
;
129
132
uint64
rank
(
const
uint64
i,
const
uint8
c)
const
;
133
136
uint64
rank
(
const
uint32
page_idx,
const
uint64
i,
const
uint8
c)
const
;
137
140
void
reserve_pages
(
const
uint32
n_pages);
141
144
void
reserve_free_pages
(
const
uint32
n_pages);
145
148
void
reserve
(
const
uint64
n);
149
152
uint64
needed_host_memory
(
const
uint64
n)
const
;
153
156
uint64
needed_device_memory
(
const
uint64
n)
const
{
return
0u; }
157
160
void
resize
(
const
uint64
n,
const
uint8
* c = NULL);
161
164
void
insert
(
const
uint32
n,
const
uint64
* g,
const
uint8
* c);
165
168
const
uint64
*
symbol_frequencies
()
const
;
169
172
uint64
symbol_frequency
(
const
uint8
c)
const
{
return
symbol_frequencies
()[c]; }
173
176
void
defrag
();
177
178
uint32
m_page_size
;
179
uint32
m_segment_size
;
180
uint32
m_occ_intv
;
181
uint32
m_occ_intv_w
;
182
uint32
m_occ_intv_log
;
183
uint32
m_page_count
;
184
nvbio::vector<host_tag,word_type*>
m_segments
;
185
nvbio::vector<host_tag,word_type*>
m_pages
;
186
nvbio::vector<host_tag,word_type*>
m_new_pages
;
187
nvbio::vector<host_tag,uint64>
m_offsets
;
188
nvbio::vector<host_tag,uint64>
m_new_offsets
;
189
nvbio::vector<host_tag,uint64>
m_counters
;
190
nvbio::vector<host_tag,uint64>
m_new_counters
;
191
nvbio::vector<host_tag,uint32>
m_buckets
;
192
std::vector<word_type*>
m_pool
;
193
uint32
m_pool_size
;
194
omp_lock_t
m_lock
;
195
uint32
m_count_table
[256];
196
};
197
201
struct
SparseSymbolSet
202
{
203
static
const
uint32
LOG_BUCKET_SIZE
= 20u;
204
static
const
uint32
BUCKET_SIZE
= 1u <<
LOG_BUCKET_SIZE
;
205
210
SparseSymbolSet
() :
m_n
( 0 ),
m_n_special
( 0 ) {}
211
214
uint32
size
()
const
{
return
m_n_special
; }
215
218
void
reserve
(
const
uint64
n,
const
uint32
n_special);
219
228
void
set
(
const
uint64
range,
const
uint32
n_special,
const
uint32
* p,
const
uint32
*
id
);
229
245
void
insert
(
246
const
uint64
range,
247
const
uint32
n_block,
248
const
uint64
* g,
249
const
uint32
n_special,
250
const
uint32
* p_special,
251
const
uint64
* g_special,
252
const
uint32
* id_special);
253
256
void
set_range
(
const
uint64
n);
257
260
uint32
rank
(
const
uint64
i)
const
261
{
262
if
(i <
m_n
)
263
{
264
const
uint64
ii = i+1;
265
const
uint32
b =
uint32
( ii >>
LOG_BUCKET_SIZE
);
266
const
uint32
lo =
m_buckets
[b];
267
const
uint32
hi =
m_buckets
[b+1];
268
return
lower_bound_index
( ii, &
m_pos
[lo], hi - lo ) + lo;
269
}
270
else
if
(i ==
uint64
(-1))
271
return
0;
272
else
// i >= m_n
273
return
m_n_special
;
274
}
275
276
const
uint64
*
pos
()
const
{
return
raw_pointer
(
m_pos
); }
277
const
uint64
*
ids
()
const
{
return
raw_pointer
(
m_id
); }
278
279
uint64
m_n
;
280
uint32
m_n_special
;
281
nvbio::vector<host_tag,uint64>
m_pos
;
282
nvbio::vector<host_tag,uint64>
m_new_pos
;
283
nvbio::vector<host_tag,uint64>
m_id
;
284
nvbio::vector<host_tag,uint64>
m_new_id
;
285
nvbio::vector<host_tag,uint32>
m_buckets
;
286
};
287
288
}
// namespace nvbio
289
290
#include <
nvbio/fmindex/paged_text_inl.h
>
Generated on Wed Feb 25 2015 08:32:59 for NVBIO by
1.8.4