NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
wavelet_tree.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
30 #include <nvbio/strings/string.h>
32 #include <nvbio/basic/primitives.h>
33 #include <nvbio/basic/cuda/sort.h>
34 #include <thrust/sort.h>
35 #include <algorithm>
36 #include <stack>
37 
38 namespace nvbio {
39 
42 
57 
60 
76 template <typename BitStreamIterator, typename IndexIterator, typename SymbolType = uint8>
78 {
79  // define the system tag
82  typedef SymbolType symbol_type;
83  typedef SymbolType value_type;
84  typedef BitStreamIterator bit_iterator;
85  typedef IndexIterator index_iterator;
86  typedef WaveletTree<BitStreamIterator,IndexIterator> text_type; // the text is the wavelet tree itself
87 
89  typedef null_type vector_type; // unsupported, would require knowing alphabet size
90 
95  const uint32 _symbol_size = 0,
96  const index_type _size = 0,
97  const BitStreamIterator _bits = BitStreamIterator(),
98  const IndexIterator _nodes = IndexIterator(),
99  const IndexIterator _occ = IndexIterator()) :
100  m_symbol_size( _symbol_size ),
101  m_size ( _size ),
102  m_bits ( _bits ),
103  m_nodes ( _nodes ),
104  m_occ ( _occ ) {}
105 
109  void resize(const uint32 _size, const uint32 _symbol_size)
110  {
111  m_size = _size;
112  m_symbol_size = _symbol_size;
113  }
114 
118  uint32 symbol_count() const { return 1u << m_symbol_size; }
119 
123  uint32 symbol_size() const { return m_symbol_size; }
124 
128  index_type size() const { return m_size; }
129 
133  BitStreamIterator bits() const { return m_bits; }
134 
138  IndexIterator splits() const { return m_nodes; }
139 
143  IndexIterator occ() const { return m_occ; }
144 
148  index_type rank(const uint32 l, const uint32 node, const index_type node_begin, const index_type r, const uint8 b) const;
149 
153  SymbolType operator[] (const index_type i) const;
154 
158  SymbolType operator() (const index_type i) const { return this->operator[](i); }
159 
161  NVBIO_FORCEINLINE NVBIO_HOST_DEVICE const text_type& text() const { return *this; }
162 
165  BitStreamIterator m_bits;
166  IndexIterator m_nodes;
167  IndexIterator m_occ;
168 };
169 
178 template <typename SystemTag, typename IndexType = uint32, typename SymbolType = uint8>
180 {
181  // define the system tag
182  typedef SystemTag system_tag;
183  typedef IndexType index_type;
184  typedef SymbolType symbol_type;
185 
190  typedef typename index_vector_type::iterator index_iterator;
191  typedef typename index_vector_type::const_iterator const_index_iterator;
192 
195 
199  m_symbol_size( 0u ),
200  m_size( 0u ) {}
201 
204  void resize(const uint32 _size, const uint32 _symbol_size)
205  {
206  m_size = _size;
207  m_symbol_size = _symbol_size;
208 
209  const uint32 n_symbols = 1u << _symbol_size;
210 
212  m_nodes.resize( n_symbols );
213  m_occ.resize( n_symbols + util::divide_ri( m_size * m_symbol_size, 32u ) );
214  }
215 
219  uint32 symbol_size() const { return m_symbol_size; }
220 
224  index_type size() const { return m_size; }
225 
228  bit_iterator bits() { return m_bits.begin(); }
229 
232  index_iterator splits() { return m_nodes.begin(); }
233 
236  index_iterator occ() { return m_occ.begin(); }
237 
240  const_bit_iterator bits() const { return m_bits.begin(); }
241 
244  const_index_iterator splits() const { return m_nodes.begin(); }
245 
248  const_index_iterator occ() const { return m_occ.begin(); }
249 
250  operator plain_view_type()
251  {
252  return plain_view_type(
254  m_size,
255  bits(),
256  splits(),
257  occ() );
258  }
259  operator const_plain_view_type() const
260  {
261  return const_plain_view_type(
263  m_size,
264  bits(),
265  splits(),
266  occ() );
267  }
268 
274 };
275 
276 
290 template <typename system_tag, typename string_iterator, typename index_type, typename symbol_type>
291 void setup(
292  const index_type string_len,
293  const string_iterator& string,
295 
302 template <typename BitStreamIterator, typename IndexIterator, typename IndexType, typename SymbolType>
304 SymbolType text(const WaveletTree<BitStreamIterator,IndexIterator,SymbolType>& tree, const IndexType i);
305 
313 template <typename BitStreamIterator, typename IndexIterator, typename SymbolType>
316 rank(
319  const uint32 c);
320 
328 template <typename BitStreamIterator, typename IndexIterator, typename SymbolType>
331 rank(
334  const uint32 c);
335 
340 template <typename SystemTag, typename IndexType, typename SymbolType>
342 {
343  return tree;
344 }
349 template <typename SystemTag, typename IndexType, typename SymbolType>
351 {
352  return tree;
353 }
354 
357 
358 } // namespace nvbio
359