NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
suffix_trie_inl.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
30 #include <nvbio/basic/numbers.h>
31 #include <nvbio/basic/popcount.h>
32 
33 namespace nvbio {
34 
35 template <uint32 ALPHABET_SIZE_T, TrieType TYPE_T>
37 TrieNode<ALPHABET_SIZE_T,TYPE_T>::TrieNode() : m_child(invalid_node), m_mask(0u), m_size(1u) {}
38 
39 template <uint32 ALPHABET_SIZE_T, TrieType TYPE_T>
42  m_child( _child ), m_mask( _mask ), m_size(1u)
43 {
44  assert( (_child <= invalid_node) && (m_child == _child) );
45 }
46 
47 template <uint32 ALPHABET_SIZE_T, TrieType TYPE_T>
49 bool TrieNode<ALPHABET_SIZE_T,TYPE_T>::is_leaf() const { return m_mask == 0u; }
50 
51 template <uint32 ALPHABET_SIZE_T, TrieType TYPE_T>
54 
55 template <uint32 ALPHABET_SIZE_T, TrieType TYPE_T>
58 {
59  if (TYPE_T == CompressedTrie)
60  return m_child + nvbio::popc( m_mask & ((1u << c)-1u) );
61  else
62  return m_child + c;
63 }
64 
65 template <uint32 ALPHABET_SIZE_T, TrieType TYPE_T>
68 {
69  if (TYPE_T == CompressedTrie)
70  return m_child + c;
71  else
72  return m_child + find_nthbit( m_mask, c+1u );
73 }
74 
75 template <uint32 ALPHABET_SIZE_T, TrieType TYPE_T>
78 {
79  if (TYPE_T == CompressedTrie)
80  return m_child;
81  else
82  return m_child + ffs( m_mask );
83 }
84 
85 template <uint32 ALPHABET_SIZE_T, TrieType TYPE_T>
88 
89 template <uint32 ALPHABET_SIZE_T, TrieType TYPE_T>
91 void TrieNode<ALPHABET_SIZE_T,TYPE_T>::set_child_bit(const uint32 c) { m_mask |= (1u << c); }
92 
93 template <uint32 ALPHABET_SIZE_T, TrieType TYPE_T>
95 uint32 TrieNode<ALPHABET_SIZE_T,TYPE_T>::child_bit(const uint32 c) const { return m_mask & (1u << c); }
96 
97 template <uint32 ALPHABET_SIZE_T, TrieType TYPE_T>
99 void TrieNode<ALPHABET_SIZE_T,TYPE_T>::set_size(const uint32 size) { m_size = size; }
100 
101 template <uint32 ALPHABET_SIZE_T, TrieType TYPE_T>
104 
105 
106 
107 template <TrieType TYPE_T>
109 TrieNode5<TYPE_T>::TrieNode5() : m_child(invalid_node), m_mask(0u), m_size(1u) {}
110 
111 template <TrieType TYPE_T>
113 TrieNode5<TYPE_T>::TrieNode5(const uint32 _child, const uint32 _mask) :
114  m_child( _child ), m_mask( _mask ), m_size(1u)
115 {
116  assert( (_child <= invalid_node) && (m_child == _child) );
117 }
118 
119 template <TrieType TYPE_T>
121 bool TrieNode5<TYPE_T>::is_leaf() const { return m_mask == 0u; }
122 
123 template <TrieType TYPE_T>
125 uint32 TrieNode5<TYPE_T>::child() const { return m_child; }
126 
127 template <TrieType TYPE_T>
130 {
131  if (TYPE_T == CompressedTrie)
132  return m_child + nvbio::popc( m_mask & ((1u << c)-1u) );
133  else
134  return m_child + c;
135 }
136 
137 template <TrieType TYPE_T>
140 {
141  if (TYPE_T == CompressedTrie)
142  return m_child + c;
143  else
144  return m_child + find_nthbit8( m_mask, c+1u );
145 }
146 
147 template <TrieType TYPE_T>
150 {
151  if (TYPE_T == CompressedTrie)
152  return m_child;
153  else
154  return m_child + ffs( m_mask );
155 }
156 
157 template <TrieType TYPE_T>
159 uint32 TrieNode5<TYPE_T>::mask() const { return m_mask; }
160 
161 template <TrieType TYPE_T>
163 void TrieNode5<TYPE_T>::set_child_bit(const uint32 c) { m_mask |= (1u << c); }
164 
165 template <TrieType TYPE_T>
167 uint32 TrieNode5<TYPE_T>::child_bit(const uint32 c) const { return m_mask & (1u << c); }
168 
169 template <TrieType TYPE_T>
171 void TrieNode5<TYPE_T>::set_size(const uint32 size) { m_size = size; }
172 
173 template <TrieType TYPE_T>
175 uint32 TrieNode5<TYPE_T>::size() const { return m_size; }
176 
177 
178 
179 // constructor
180 //
181 template <uint32 ALPHABET_SIZE_T, typename NodeIterator>
184  m_seq( seq )
185 {}
186 
187 // return the root node of the dictionary seen as a trie
188 //
189 template <uint32 ALPHABET_SIZE_T, typename NodeIterator>
193 {
194  return m_seq[0];
195 }
196 
197 // visit the children of a given node
198 //
199 template <uint32 ALPHABET_SIZE_T, typename NodeIterator>
200 template <typename Visitor>
202 void SuffixTrie<ALPHABET_SIZE_T,NodeIterator>::children(const node_type node, Visitor& visitor) const
203 {
204  for (uint8 c = 0; c < ALPHABET_SIZE; ++c)
205  {
206  if (node.child_bit(c))
207  visitor.visit( c, node.child(c) );
208  }
209 }
210 
211 template <uint32 ALPHABET_SIZE_T, typename NodeType>
212 struct Collector
213 {
215  Collector() : count(0), mask(0) {}
216 
218  void visit(const uint8 c, const NodeType node)
219  {
220  nodes[count++] = node;
221  mask |= (1u << c);
222  }
223 
226  NodeType nodes[ALPHABET_SIZE_T];
227 };
228 
229 template <uint32 ALPHABET_SIZE_T, TrieType TYPE_T>
230 struct trie_copy {};
231 
232 template <uint32 ALPHABET_SIZE_T>
233 struct trie_copy<ALPHABET_SIZE_T, CompressedTrie>
234 {
235  template <typename InTrieType, typename OutVectorType>
236  static void enact(
237  const InTrieType& in_trie,
238  const typename InTrieType::node_type in_node,
239  OutVectorType& out_nodes,
240  const uint32 out_node_index)
241  {
242  typedef typename InTrieType::node_type in_node_type;
243 
244  // check if the input node is a leaf
245  if (in_trie.is_leaf( in_node ))
246  {
247  out_nodes[ out_node_index ].set_size( in_trie.size( in_node ) );
248  return;
249  }
250 
252 
253  in_trie.children( in_node, children );
254 
255  const uint32 child_offset = uint32( out_nodes.size() );
256  out_nodes.resize( child_offset + children.count );
257 
258  out_nodes[ out_node_index ] = TrieNode<ALPHABET_SIZE_T,CompressedTrie>(
259  child_offset,
260  children.mask );
261 
262  for (uint32 i = 0; i < children.count; ++i)
263  {
264  enact(
265  in_trie,
266  children.nodes[i],
267  out_nodes,
268  child_offset + i );
269  }
270  }
271 };
272 
273 template <typename TrieType, typename NodeVector>
275  const TrieType& in_trie,
276  NodeVector& out_nodes)
277 {
278  typedef typename NodeVector::value_type out_node_type;
279 
280  // alloc the root node
281  out_nodes.resize(1u);
282 
284  in_trie,
285  in_trie.root(),
286  out_nodes,
287  0u );
288 }
289 
290 } // namespace nvbio