NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
qgroup_inl.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
30 namespace nvbio {
31 
32 // setup an internal namespace to avoid polluting the global environment
33 namespace qgroup {
34 
35 // a functor to set the q-group's I vector
36 //
37 template <typename string_type>
39 {
41 
42  // constructor
43  //
46  const QGroupIndexView _qgroup,
47  const uint32 _string_len,
48  const string_type _string)
49  : qgroup ( _qgroup ),
50  string_len ( _string_len ),
51  string ( _string ) {}
52 
53  // operator functor
54  //
56  void operator() (const uint32 p) const
57  {
58  const qgram_functor_type qgram( qgroup.Q, qgroup.symbol_size, string_len, string );
59 
60  // set the bit corresponding to the i-th qgram
61  const uint64 g = qgram(p);
62 
63  const uint32 word = g / 32u;
64  const uint32 bit = g & 31u;
65 
66  atomicOr( qgroup.I + word, 1u << bit );
67  }
68 
71  const string_type string;
72 };
73 
74 // a functor to set the q-group's SS vector
75 //
76 template <typename string_type>
78 {
80 
81  static const uint32 WORD_SIZE = 32;
82 
83  // constructor
84  //
87  const QGroupIndexView _qgroup,
88  const uint32 _string_len,
89  const string_type _string)
90  : qgroup ( _qgroup ),
91  string_len ( _string_len ),
92  string ( _string ) {}
93 
94  // operator functor
95  //
97  void operator() (const uint32 p) const
98  {
99  const qgram_functor_type qgram( qgroup.Q, qgroup.symbol_size, string_len, string );
100 
101  // compute the qgram g
102  const uint64 g = qgram(p);
103 
104  // compute (i,j) from g
105  const uint32 i = uint32( g / WORD_SIZE );
106  const uint32 j = uint32( g % WORD_SIZE );
107 
108  // compute j' such that bit j is the j'-th set bit in I[i]
109  const uint32 j_prime = popc( qgroup.I[i] & ((1u << j) - 1u) );
110 
111  // atomically increase the appropriate counter in SS
112  atomicAdd( qgroup.SS + qgroup.S[i] + j_prime, 1u );
113  }
114 
117  const string_type string;
118 };
119 
120 // a functor to set the q-group's SS vector
121 //
122 template <typename string_type>
124 {
126 
127  static const uint32 WORD_SIZE = 32;
128 
129  // constructor
130  //
133  const QGroupIndexView _qgroup,
134  const uint32 _string_len,
135  const string_type _string)
136  : qgroup ( _qgroup ),
137  string_len ( _string_len ),
138  string ( _string ) {}
139 
140  // operator functor
141  //
143  void operator() (const uint32 p) const
144  {
145  const qgram_functor_type qgram( qgroup.Q, qgroup.symbol_size, string_len, string );
146 
147  // compute the qgram g
148  const uint64 g = qgram(p);
149 
150  // compute (i,j) from g
151  const uint32 i = uint32( g / WORD_SIZE );
152  const uint32 j = uint32( g % WORD_SIZE );
153 
154  // compute j' such that bit j is the j'-th set bit in I[i]
155  const uint32 j_prime = popc( qgroup.I[i] & ((1u << j) - 1u) );
156 
157  // atomically increase the appropriate counter in SS to get the next free slot
158  const uint32 slot = atomicAdd( qgroup.SS + qgroup.S[i] + j_prime, 1u );
159 
160  // and fill the corresponding slot of P
161  qgroup.P[ slot ] = p;
162  }
163 
166  const string_type string;
167 };
168 
169 } // namespace qgroup
170 
171 // build a q-group index from a given string
172 //
173 // \param q the q parameter
174 // \param string_len the size of the string
175 // \param string the string iterator
176 //
177 template <typename string_type>
179  const uint32 q,
180  const uint32 symbol_sz,
181  const uint32 string_len,
182  const string_type string)
183 {
184  typedef qgroup::qgroup_setup_I<string_type> setup_I_type;
185  typedef qgroup::qgroup_setup_SS<string_type> setup_SS_type;
186  typedef qgroup::qgroup_setup_P<string_type> setup_P_type;
187 
188  thrust::device_vector<uint8> d_temp_storage;
189 
190  Q = q;
191  symbol_size = symbol_sz;
192  n_qgrams = string_len;
193 
194  const uint32 ALPHABET_SIZE = 1u << symbol_size;
195 
196  uint64 n_max_qgrams = 1;
197  for (uint32 i = 0; i < q; ++i)
198  n_max_qgrams *= ALPHABET_SIZE;
199 
200  const uint32 n_qblocks = uint32( n_max_qgrams / WORD_SIZE );
201 
202  I.resize( n_qblocks+1 );
203  S.resize( n_qblocks+1 );
204 
205  //
206  // setup I
207  //
208 
209  // fill I with zeros
210  thrust::fill(
211  I.begin(),
212  I.begin() + n_qblocks + 1u,
213  uint32(0) );
214 
215  const setup_I_type setup_I( nvbio::plain_view( *this ), string_len, string );
216 
217  // set the bits in I corresponding to the used qgram slots
219  thrust::make_counting_iterator<uint32>(0),
220  thrust::make_counting_iterator<uint32>(0) + string_len,
221  setup_I );
222 
223  //
224  // setup S
225  //
226 
227  // compute the exclusive prefix sum of the popcount of the words in I
229  n_qblocks + 1u,
231  S.begin(),
232  thrust::plus<uint32>(),
233  uint32(0),
234  d_temp_storage );
235 
236  // fetch the number of used qgrams
237  n_unique_qgrams = S[ n_qblocks ];
238 
239  //
240  // setup SS
241  //
242  SS.resize( n_unique_qgrams + 1u );
243 
244  thrust::fill(
245  SS.begin(),
246  SS.begin() + n_unique_qgrams + 1u,
247  uint32(0) );
248 
249  const setup_SS_type setup_SS( nvbio::plain_view( *this ), string_len, string );
250 
252  thrust::make_counting_iterator<uint32>(0),
253  thrust::make_counting_iterator<uint32>(0) + string_len,
254  setup_SS );
255 
256  // compute the exclusive prefix sum of SS
258  n_unique_qgrams + 1u,
259  SS.begin(),
260  SS.begin(),
261  thrust::plus<uint32>(),
262  uint32(0),
263  d_temp_storage );
264 
265  //
266  // setup P
267  //
268  P.resize( string_len );
269 
270  // copy SS into a temporary vector for the purpose of slot allocation
271  thrust::device_vector<uint32> SS_copy( SS );
272 
273  const setup_P_type setup_P( nvbio::plain_view( *this ), string_len, string );
274 
276  thrust::make_counting_iterator<uint32>(0),
277  thrust::make_counting_iterator<uint32>(0) + string_len,
278  setup_P );
279 
280  // and swap the slots with their previous copy
281  SS.swap( SS_copy );
282 
283  const uint32 n_slots = SS[ n_unique_qgrams ];
284  if (n_slots != string_len)
285  throw runtime_error( "mismatching number of q-grams: inserted %u q-grams, got: %u\n" );
286 }
287 
288 } // namespace nvbio