NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
sufsort_utils.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
33 #include <nvbio/basic/vector.h>
34 #include <thrust/host_vector.h>
35 #include <thrust/device_vector.h>
36 
37 namespace nvbio {
38 
41 
46 
51 
54 
58 {
61  virtual ~SetBWTHandler() {}
62 
65  virtual void process(
66  const uint32 n_suffixes,
67  const uint32 bits_per_symbol,
68  const uint32* bwt,
69  const uint32 n_dollars,
70  const uint64* dollar_pos,
71  const uint64* dollar_ids) {}
72 
75  virtual void process(
76  const uint32 n_suffixes,
77  const uint8* bwt,
78  const uint32 n_dollars,
79  const uint64* dollar_pos,
80  const uint64* dollar_ids) {}
81 };
82 
85 template <typename OutputIterator>
87 {
90  DeviceBWTHandler(OutputIterator _output) : output(_output), offset(0u) {}
91 
94  void process(
95  const uint32 n_suffixes,
96  const uint32 bits_per_symbol,
97  const uint32* bwt,
98  const uint32 n_dollars,
99  const uint64* dollar_pos,
100  const uint64* dollar_ids)
101  {
102  // NOTE: not supported!
103  }
104 
107  void process(
108  const uint32 n_suffixes,
109  const uint8* bwt,
110  const uint32 n_dollars,
111  const uint64* dollar_pos,
112  const uint64* dollar_ids)
113  {
114  // TODO: this may be redundant as we often have a device copy of 'bwt already
115  priv::alloc_storage( d_bwt, n_suffixes );
116 
117  thrust::copy( bwt, bwt + n_suffixes, d_bwt.begin() );
118 
120  n_suffixes,
121  raw_pointer( d_bwt ),
122  output,
123  offset );
124 
125  offset += n_suffixes;
126  }
127 
128  OutputIterator output;
131 };
132 
135 template <typename OutputIterator>
137 {
138  HostBWTHandler(OutputIterator _output) : output(_output) {}
139 
142  template <typename bwt_iterator>
144  const uint32 n_suffixes,
145  const bwt_iterator bwt)
146  {
147  for (uint32 i = 0; i < n_suffixes; ++i)
148  output[i] = bwt[i];
149 
150  output += n_suffixes;
151  }
152 
155  void process(
156  const uint32 n_suffixes,
157  const uint32 bits_per_symbol,
158  const uint32* bwt,
159  const uint32 n_dollars,
160  const uint64* dollar_pos,
161  const uint64* dollar_ids)
162  {
163  if (bits_per_symbol == 2)
165  else if (bits_per_symbol == 4)
167  else if (bits_per_symbol == 8)
169  }
170 
173  void process(
174  const uint32 n_suffixes,
175  const uint8* bwt,
176  const uint32 n_dollars,
177  const uint64* dollar_pos,
178  const uint64* dollar_ids)
179  {
180  do_process( n_suffixes, bwt );
181  }
182 
183  OutputIterator output;
184 };
185 
188 template <uint32 SYMBOL_SIZE, bool BIG_ENDIAN, typename word_type>
189 struct HostBWTHandler< PackedStream<word_type*,uint8,SYMBOL_SIZE,BIG_ENDIAN,uint64> > : public SetBWTHandler
190 {
192 
193  static const uint32 WORD_SIZE = uint32( 8u * sizeof(word_type) );
194  static const uint32 SYMBOLS_PER_WORD = WORD_SIZE / SYMBOL_SIZE;
195 
196 
199  HostBWTHandler(OutputIterator _output) : output(_output), offset(0) {}
200 
203  template <typename bwt_iterator>
205  const uint32 n_suffixes,
206  const bwt_iterator bwt)
207  {
208  const uint32 word_offset = offset & (SYMBOLS_PER_WORD-1);
209  uint32 word_rem = 0;
210  uint32 word_idx = offset / SYMBOLS_PER_WORD;
211 
212  word_type* words = output.stream();
213 
214  if (word_offset)
215  {
216  // compute how many symbols we still need to encode to fill the current word
217  word_rem = SYMBOLS_PER_WORD - word_offset;
218 
219  // fetch the word in question
220  word_type word = words[ word_idx ];
221 
222  for (uint32 i = 0; i < word_rem; ++i)
223  {
224  const uint32 bit_idx = (word_offset + i) * SYMBOL_SIZE;
225  const uint32 symbol_offset = BIG_ENDIAN ? (WORD_SIZE - SYMBOL_SIZE - bit_idx) : bit_idx;
226  const word_type symbol = word_type(bwt[i]) << symbol_offset;
227 
228  // set bits
229  word |= symbol;
230  }
231 
232  // write out the word
233  words[ word_idx ] = word;
234 
235  // advance word_idx
236  ++word_idx;
237  }
238 
239  for (uint32 i = word_rem; i < n_suffixes; i += SYMBOLS_PER_WORD)
240  {
241  // encode a word's worth of characters
242  word_type word = 0u;
243 
244  const uint32 n_symbols = nvbio::min( SYMBOLS_PER_WORD, n_suffixes - i );
245 
246  for (uint32 j = 0; j < n_symbols; ++j)
247  {
248  const uint32 bit_idx = j * SYMBOL_SIZE;
249  const uint32 symbol_offset = BIG_ENDIAN ? (WORD_SIZE - SYMBOL_SIZE - bit_idx) : bit_idx;
250  const word_type symbol = word_type(bwt[i + j]) << symbol_offset;
251 
252  // set bits
253  word |= symbol;
254  }
255 
256  // write out the word and advance word_idx
257  words[ ++word_idx ] = word;
258  }
259 
260  // advance the offset
261  offset += n_suffixes;
262  }
263 
266  void process(
267  const uint32 n_suffixes,
268  const uint32 bits_per_symbol,
269  const uint32* bwt,
270  const uint32 n_dollars,
271  const uint64* dollar_pos,
272  const uint64* dollar_ids)
273  {
274  if (bits_per_symbol == 2)
275  do_process( n_suffixes, PackedStream<const uint32*,uint8,2,true>(bwt) );
276  else if (bits_per_symbol == 4)
277  do_process( n_suffixes, PackedStream<const uint32*,uint8,4,true>(bwt) );
278  else if (bits_per_symbol == 8)
279  do_process( n_suffixes, PackedStream<const uint32*,uint8,8,true>(bwt) );
280  }
281 
284  void process(
285  const uint32 n_suffixes,
286  const uint8* bwt,
287  const uint32 n_dollars,
288  const uint64* dollar_pos,
289  const uint64* dollar_ids)
290  {
291  do_process( n_suffixes, bwt );
292  }
293 
294  OutputIterator output;
296 };
297 
301 {
302 };
303 
305 
308 
311 template <typename string_type, typename output_iterator>
313 {
314  typedef typename string_type::index_type index_type;
315 
316  static const uint32 NULL_PRIMARY = uint32(-1); // TODO: switch to index_type
317 
318  // constructor
319  //
321  const index_type _string_len,
322  const string_type _string,
323  output_iterator _output) :
324  string_len ( _string_len ),
325  string ( _string ),
326  primary ( NULL_PRIMARY ),
327  n_output ( 0 ),
328  output ( _output )
329  {
330  // encode the first BWT symbol explicitly
331  priv::device_copy( 1u, string + string_len-1, output, index_type(0u) );
332  }
333 
334  // process the next batch of suffixes
335  //
337  const uint32 n_suffixes,
338  const uint32* d_suffixes)
339  {
340  priv::alloc_storage( d_block_bwt, n_suffixes );
341 
342  // compute the bwt of the block
344  thrust::device_ptr<const uint32>( d_suffixes ),
345  thrust::device_ptr<const uint32>( d_suffixes ) + n_suffixes,
346  d_block_bwt.begin(),
348 
349  // check if there is a $ sign
350  const uint32 block_primary = uint32( thrust::find(
351  d_block_bwt.begin(),
352  d_block_bwt.begin() + n_suffixes,
353  255u ) - d_block_bwt.begin() );
354 
355  if (block_primary < n_suffixes)
356  {
357  // keep track of the global primary position
358  primary = n_output + block_primary + 1u; // +1u for the implicit empty suffix
359  }
360 
361  // and copy the transformed block to the output
363  n_suffixes,
364  d_block_bwt.begin(),
365  output,
366  n_output + 1u ); // +1u for the implicit empty suffix
367 
368  // advance the output counter
369  n_output += n_suffixes;
370  }
371 
372  // process a sparse set of suffixes
373  //
375  const uint32 n_suffixes,
376  const uint32* d_suffixes,
377  const uint32* d_slots)
378  {
379  priv::alloc_storage( d_block_bwt, n_suffixes );
380 
381  // compute the bwt of the block
383  thrust::device_ptr<const uint32>( d_suffixes ),
384  thrust::device_ptr<const uint32>( d_suffixes ) + n_suffixes,
385  d_block_bwt.begin(),
387 
388  // check if there is a $ sign
389  const uint32 block_primary = uint32( thrust::find(
390  d_block_bwt.begin(),
391  d_block_bwt.begin() + n_suffixes,
392  255u ) - d_block_bwt.begin() );
393 
394  if (block_primary < n_suffixes)
395  {
396  // keep track of the global primary position
397  primary = thrust::device_ptr<const uint32>( d_slots )[ block_primary ] + 1u; // +1u for the implicit empty suffix
398  }
399 
400  // and scatter the resulting symbols in the proper place
402  n_suffixes,
403  d_block_bwt.begin(),
405  thrust::device_ptr<const uint32>( d_slots ),
406  priv::offset_functor(1u) ), // +1u for the implicit empty suffix
407  output );
408  }
409 
410  // remove the dollar symbol
411  //
413  {
414  // shift back all symbols following the primary
415  const uint32 max_block_size = 32*1024*1024;
416 
417  priv::alloc_storage( d_block_bwt, max_block_size );
418 
419  for (index_type block_begin = primary; block_begin < string_len; block_begin += max_block_size)
420  {
421  const index_type block_end = nvbio::min( block_begin + max_block_size, string_len );
422 
423  // copy all symbols to a temporary buffer
425  block_end - block_begin,
426  output + block_begin + 1u,
427  d_block_bwt.begin(),
428  uint32(0) );
429 
430  // and copy the shifted block to the output
432  block_end - block_begin,
433  d_block_bwt.begin(),
434  output,
435  block_begin );
436  }
437  }
438 
440  const string_type string;
443  output_iterator output;
444  thrust::device_vector<uint8> d_block_bwt;
445 };
446 
449 template <typename output_iterator>
451 {
452  // constructor
453  //
455  const uint32 _string_len,
456  const uint32 _mod,
457  output_iterator _output) :
458  string_len ( _string_len ),
459  mod ( _mod ),
460  n_output ( 1 ),
461  output ( _output )
462  {
463  // encode the implicit empty suffix directly
464  output[0] = uint32(-1);
465  }
466 
467  // process the next batch of suffixes
468  //
470  const uint32 n_suffixes,
471  const uint32* d_suffixes)
472  {
473  priv::alloc_storage( h_suffixes, n_suffixes );
474 
475  thrust::copy(
476  thrust::device_ptr<const uint32>(d_suffixes),
477  thrust::device_ptr<const uint32>(d_suffixes) + n_suffixes,
478  h_suffixes.begin() );
479 
480  // copy_if
481  #pragma omp parallel for
482  for (int i = 0; i < int( n_suffixes ); ++i)
483  {
484  const uint32 slot = i + n_output;
485 
486  if ((slot & (mod-1)) == 0)
487  output[slot / mod] = h_suffixes[i];
488  }
489 
490  // advance the output counter
491  n_output += n_suffixes;
492  }
493 
494  // process a sparse set of suffixes
495  //
497  const uint32 n_suffixes,
498  const uint32* d_suffixes,
499  const uint32* d_slots)
500  {
501  priv::alloc_storage( h_slots, n_suffixes );
502  priv::alloc_storage( h_suffixes, n_suffixes );
503 
504  thrust::copy(
505  thrust::device_ptr<const uint32>(d_slots),
506  thrust::device_ptr<const uint32>(d_slots) + n_suffixes,
507  h_slots.begin() );
508 
509  thrust::copy(
510  thrust::device_ptr<const uint32>(d_suffixes),
511  thrust::device_ptr<const uint32>(d_suffixes) + n_suffixes,
512  h_suffixes.begin() );
513 
514  // scatter_if
515  #pragma omp parallel for
516  for (int i = 0; i < int( n_suffixes ); ++i)
517  {
518  const uint32 slot = h_slots[i] + 1u; // +1 for the implicit empty suffix
519 
520  if ((slot & (mod-1)) == 0)
521  output[slot / mod] = h_suffixes[i];
522  }
523  }
524 
526  const uint32 mod;
528  output_iterator output;
529  thrust::host_vector<uint32> h_slots;
530  thrust::host_vector<uint32> h_suffixes;
531 };
532 
535 template <typename string_type, typename output_bwt_iterator, typename output_ssa_iterator>
537 {
539  const uint32 _string_len,
540  const string_type _string,
541  const uint32 _mod,
542  output_bwt_iterator _bwt,
543  output_ssa_iterator _ssa) :
544  bwt_handler( _string_len, _string, _bwt ),
545  ssa_handler( _string_len, _mod, _ssa ) {}
546 
547  // process the next batch of suffixes
548  //
550  const uint32 n_suffixes,
551  const uint32* d_suffixes)
552  {
553  bwt_handler.process_batch( n_suffixes, d_suffixes );
554  ssa_handler.process_batch( n_suffixes, d_suffixes );
555  }
556 
557  // process a sparse set of suffixes
558  //
560  const uint32 n_suffixes,
561  const uint32* d_suffixes,
562  const uint32* d_slots)
563  {
564  bwt_handler.process_scattered( n_suffixes, d_suffixes, d_slots );
565  ssa_handler.process_scattered( n_suffixes, d_suffixes, d_slots );
566  }
567 
568  // return the primary
569  //
570  uint32 primary() const { return bwt_handler.primary; }
571 
572  // remove the dollar symbol
573  //
575  {
577  }
578 
581 };
582 
584 
586 
587 } // namespace nvbio