NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
paged_text.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
30 #include <nvbio/basic/vector.h>
32 #include <nvbio/basic/primitives.h>
33 #include <nvbio/basic/popcount.h>
34 #include <nvbio/basic/algorithms.h>
35 #include <nvbio/basic/exceptions.h>
36 #include <nvbio/basic/timer.h>
38 #include <thrust/adjacent_difference.h>
39 #include <thrust/iterator/counting_iterator.h>
40 #include <thrust/iterator/permutation_iterator.h>
41 #include <thrust/iterator/transform_iterator.h>
42 #include <stack>
43 #include <numeric>
44 
45 namespace nvbio {
46 
49 void build_buckets(const uint64 key_range, const uint32 n_keys, const uint64* keys, const uint32 bucket_size, nvbio::vector<host_tag,uint32>& buckets, const bool upper = true);
50 
59 template <uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T>
60 struct PagedText
61 {
62  typedef uint64 word_type;
65 
66  static const uint32 SYMBOL_SIZE = SYMBOL_SIZE_T;
67  static const uint32 SYMBOL_COUNT = 1u << SYMBOL_SIZE;
68  static const uint32 WORD_SIZE = (8u*sizeof(word_type));
70  static const uint32 BIG_ENDIAN = BIG_ENDIAN_T;
71 
72  static const uint32 LOG_BUCKET_SIZE = 20u;
73  static const uint32 BUCKET_SIZE = 1u << LOG_BUCKET_SIZE;
74 
77  PagedText(
78  const uint32 page_size = 512 * 1024,
79  const uint32 segment_size = 128 * 1024 * 1024,
80  const uint32 occ_intv = 128);
81 
84  ~PagedText();
85 
88  uint32 page_count() const { return uint32( m_offsets.size() - 1u ); }
89 
92  uint64 size() const { return m_offsets.size() ? m_offsets.back() : 0u; }
93 
96  void grow();
97 
101 
104  void release_page(word_type* page);
105 
108  const word_type* get_page(const uint32 i) const { return m_pages[i]; }
109 
112  uint32 get_page_size(const uint32 i) const { return uint32( m_offsets[i+1] - m_offsets[i] ); }
113 
116  uint64 get_page_offset(const uint32 i) const { return m_offsets[i]; }
117 
120  const uint32* get_occ_page(const uint32 i) const { return (uint32*)( m_pages[i] + m_page_size ); }
121 
124  uint32 find_page(const uint64 i) const;
125 
128  uint8 operator[] (const uint64 i) const;
129 
132  uint64 rank(const uint64 i, const uint8 c) const;
133 
136  uint64 rank(const uint32 page_idx, const uint64 i, const uint8 c) const;
137 
140  void reserve_pages(const uint32 n_pages);
141 
144  void reserve_free_pages(const uint32 n_pages);
145 
148  void reserve(const uint64 n);
149 
152  uint64 needed_host_memory(const uint64 n) const;
153 
156  uint64 needed_device_memory(const uint64 n) const { return 0u; }
157 
160  void resize(const uint64 n, const uint8* c = NULL);
161 
164  void insert(const uint32 n, const uint64* g, const uint8* c);
165 
168  const uint64* symbol_frequencies() const;
169 
172  uint64 symbol_frequency(const uint8 c) const { return symbol_frequencies()[c]; }
173 
176  void defrag();
177 
192  std::vector<word_type*> m_pool;
194  omp_lock_t m_lock;
196 };
197 
202 {
203  static const uint32 LOG_BUCKET_SIZE = 20u;
204  static const uint32 BUCKET_SIZE = 1u << LOG_BUCKET_SIZE;
205 
210  SparseSymbolSet() : m_n( 0 ), m_n_special( 0 ) {}
211 
214  uint32 size() const { return m_n_special; }
215 
218  void reserve(const uint64 n, const uint32 n_special);
219 
228  void set(const uint64 range, const uint32 n_special, const uint32* p, const uint32* id);
229 
245  void insert(
246  const uint64 range,
247  const uint32 n_block,
248  const uint64* g,
249  const uint32 n_special,
250  const uint32* p_special,
251  const uint64* g_special,
252  const uint32* id_special);
253 
256  void set_range(const uint64 n);
257 
260  uint32 rank(const uint64 i) const
261  {
262  if (i < m_n)
263  {
264  const uint64 ii = i+1;
265  const uint32 b = uint32( ii >> LOG_BUCKET_SIZE );
266  const uint32 lo = m_buckets[b];
267  const uint32 hi = m_buckets[b+1];
268  return lower_bound_index( ii, &m_pos[lo], hi - lo ) + lo;
269  }
270  else if (i == uint64(-1))
271  return 0;
272  else // i >= m_n
273  return m_n_special;
274  }
275 
276  const uint64* pos() const { return raw_pointer( m_pos ); }
277  const uint64* ids() const { return raw_pointer( m_id ); }
278 
286 };
287 
288 } // namespace nvbio
289