NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
bwte.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
30 #include <cub/cub.cuh>
31 #include <mgpuhost.cuh>
32 #include <moderngpu.cuh>
33 #include <nvbio/basic/vector.h>
35 #include <nvbio/sufsort/sufsort.h>
39 
40 namespace nvbio {
41 
42 //#define QUICK_CHECK
43 //#define QUICK_CHECK_REPORT
44 //#define CHECK_COPY
45 //#define CHECK_INSERTION
46 //#define CHECK_SORTING
47 #if defined(QUICK_CHECK_REPORT) || defined(CHECK_SORTING)
48  #define HOST_STRING_IDS
49 #endif
50 
53 
60 
63 
67 struct BWTEBlock
68 {
69  uint32 max_block_suffixes; // reserved space
70  uint32 max_block_strings; // reserved space
71  uint32 n_strings; // number of strings
72  uint32 n_suffixes; // number of suffixes
74  nvbio::vector<host_tag,uint32> h_cum_lengths; // host block string lengths
75  nvbio::vector<host_tag,uint32> h_string_ids; // host block string ids
76  nvbio::vector<host_tag,uint32> h_dollar_off; // host block dollar offsets
77  nvbio::vector<host_tag,uint32> h_dollar_id; // host block dollar ids
78  nvbio::vector<host_tag,uint64> h_dollar_pos; // host block dollar insertion positions
79  nvbio::vector<host_tag,uint8> h_BWT; // host BWT block storage
80 
83  void reserve(const uint32 _max_block_strings, const uint32 _max_block_suffixes);
84 };
85 
94 template <
95  uint32 SYMBOL_SIZE,
96  bool BIG_ENDIAN,
97  typename storage_type = const uint32*,
98  typename offsets_iterator = const uint64*>
100 {
101  typedef typename std::iterator_traits<offsets_iterator>::value_type index_type;
102  typedef ConcatenatedStringSet<
104  offsets_iterator> string_set_type;
105 
108  BWTEContext(const int device);
109 
112  uint64 needed_device_memory(const uint32 _max_block_strings, const uint32 _max_block_suffixes) const;
113 
116  void reserve(const uint32 _max_block_strings, const uint32 _max_block_suffixes);
117 
127  void append_block(
128  const uint32 block_begin,
129  const uint32 block_end,
130  const string_set_type string_set,
132  SparseSymbolSet& BWT_ext_dollars,
133  const bool forward);
134 
135  // sort the given block
136  //
137  void sort_block(
138  const uint32 block_begin,
139  const uint32 block_end,
140  const string_set_type string_set,
141  BWTEBlock& block);
142 
143  // merge the given sorted block
144  //
145  void merge_block(
146  const uint32 block_begin,
147  const uint32 block_end,
148  const string_set_type string_set,
149  BWTEBlock& block,
151  SparseSymbolSet& BWT_ext_dollars,
152  const bool forward);
153 
154 private:
155  typedef typename std::iterator_traits<storage_type>::value_type radix_type;
156 
157  static const uint32 SYMBOL_COUNT = 1u << SYMBOL_SIZE;
158  static const uint32 RADIX_BITS = uint32( 8u * sizeof(radix_type) );
159  static const uint32 DOLLAR_BITS = RADIX_BITS <= 32 ? 4 : 5;
160 
163 
166 
167  static const uint32 SORTING_SLICE_SIZE = 2u; // determines how frequently sorted suffixes are pruned
168 
169  // rank the block suffixes wrt BWT_ext
170  //
171  void rank_block(
172  const uint32 block_begin,
173  const uint32 block_end,
174  const string_set_type string_set,
175  const BWTEBlock& block,
177  SparseSymbolSet& BWT_ext_dollars,
178  const bool forward);
179 
180  // insert the block
181  //
182  void insert_block(
183  BWTEBlock& block,
185  SparseSymbolSet& BWT_ext_dollars);
186 
187  uint32 max_block_suffixes;
188  uint32 max_block_strings;
189 
190  uint64 n_strings_ext;
191  uint64 n_suffixes_ext;
192 
193  uint64 n_processed_strings;
194  uint64 n_processed_suffixes;
195 
196  mgpu::ContextPtr mgpu_ctxt;
197  string_set_handler_type string_set_handler;
198  cuda::CompressionSort string_sorter;
199  suffix_flattener_type suffixes;
200  chunk_loader_type chunk_loader;
201 
202  BWTEBlock block; // sorted block
203  nvbio::vector<host_tag,uint64> g; // host insertion positions
204  nvbio::vector<host_tag,uint64> g_sorted; // host sorted insertions
205 
206  nvbio::vector<device_tag,uint8> d_BWT_block; // device block bwt
207  nvbio::vector<device_tag,uint32> d_dollar_off; // device block dollar offsets
208  nvbio::vector<device_tag,uint32> d_dollar_id; // device block dollar ids
209 
210  nvbio::vector<device_tag,uint2> d_suffixes; // device localized suffixes
211 
212  nvbio::vector<device_tag,uint8> d_temp_storage; // device temporary storage
213  nvbio::vector<host_tag, uint8> h_temp_storage; // host temporary storage
214 
215  float load_time;
216  float sort_time;
217  float copy_time;
218  float rank_time;
219  float insert_time;
220  float insert_dollars_time;
221 };
222 
231 template <uint32 SYMBOL_SIZE, bool BIG_ENDIAN, typename storage_type, typename offsets_iterator>
232 void bwte(
233  const ConcatenatedStringSet<
234  PackedStream<storage_type,uint8,SYMBOL_SIZE,BIG_ENDIAN,typename std::iterator_traits<offsets_iterator>::value_type>,
235  offsets_iterator> string_set,
237  SparseSymbolSet& BWT_ext_dollars,
238  BWTParams* params = NULL);
239 
242 
243 } // namespace nvbio
244 
245 #include <nvbio/sufsort/bwte_inl.h>