NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
gotoh_inl.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
30 #include <nvbio/alignment/sink.h>
31 #include <nvbio/alignment/utils.h>
33 #include <nvbio/basic/iterator.h>
35 
36 namespace nvbio {
37 namespace aln {
38 
39 // ----------------------------- Gotoh functions ---------------------------- //
40 
41 #define NVBIO_SW_VECTOR_LOADING
42 
43 namespace priv
44 {
45 
46 template <typename string_type> struct gotoh_use_vectorization { static const bool VALUE = false; };
47 template <typename T> struct gotoh_use_vectorization< vector_view< T*> > { static const bool VALUE = true; };
48 template <typename T> struct gotoh_use_vectorization< vector_view<const T*> > { static const bool VALUE = true; };
49 
50 //
51 // A helper scoring context class, which can be used to adapt the basic
52 // algorithm to various situations, such as:
53 // scoring
54 // scoring within a window (i.e. saving only the last band within the window)
55 // computing checkpoints
56 // computing a flow submatrix
57 //
58 template <uint32 BAND_LEN, AlignmentType TYPE, typename algorithm_tag>
60 {
61  // initialize the j-th column of the DP matrix
62  //
63  // \param j column index
64  // \param N column size
65  // \param column column values
66  // \param scoring scoring scheme
67  // \param zero zero value
68  // \param infimum infimum value
69  template <typename column_type, typename scoring_type, typename score_type>
71  void init(
72  const uint32 j,
73  const uint32 N,
75  const scoring_type& scoring,
76  const score_type zero,
77  const score_type infimum)
78  {
79  if (j == 0) // ensure this context can be used for multi-pass scoring
80  {
81  for (uint32 i = 0; i < N; ++i)
82  {
83  column[i].x = equal<algorithm_tag,PatternBlockingTag>() ?
84  TYPE == GLOBAL ? scoring.text_gap_open() + scoring.text_gap_extension() * i : zero :
85  TYPE != LOCAL ? scoring.text_gap_open() + scoring.text_gap_extension() * i : zero;
86  column[i].y = TYPE == LOCAL ? zero : infimum;
87  }
88  }
89  }
90 
91  // do something with the previous column
92  //
93  // \param j column index
94  // \param N column size
95  // \param column column values
96  template <typename column_type>
99  const uint32 j,
100  const uint32 N,
101  const column_type column) {}
102 
103  // do something with the last column
104  //
105  // \param j column index
106  // \param N column size
107  // \param column column values
108  template <typename column_type>
111  const uint32 j,
112  const uint32 M,
113  const uint32 N,
114  const column_type column) {}
115 
116  // do something with the newly computed cell
117  //
118  // \param i row index
119  // \param N number of rows (column size)
120  // \param j column index
121  // \param M number of columns (row size)
122  // \param score computed score
123  // \param dir direction flow
124  template <typename score_type>
126  void new_cell(
127  const uint32 i,
128  const uint32 N,
129  const uint32 j,
130  const uint32 M,
131  const score_type score,
132  const DirectionVector dir,
133  const DirectionVector edir,
134  const DirectionVector fdir) {}
135 };
136 
137 //
138 // A helper checkpointed-scoring context class which allows to perform scoring in multiple
139 // passes, saving & restoring a checkpoint each time.
140 //
141 template <uint32 BAND_LEN, AlignmentType TYPE, typename algorithm_tag, typename checkpoint_type>
143 {
144  // constructor
145  //
146  // \param checkpoints input checkpoints array
148  GotohCheckpointedScoringContext(checkpoint_type checkpoint) :
149  m_checkpoint( checkpoint ) {}
150 
151  // initialize the j-th column of the DP matrix
152  //
153  // \param j column index
154  // \param N column size
155  // \param column column values
156  // \param scoring scoring scheme
157  // \param zero zero value
158  // \param infimum infimum value
159  template <typename column_type, typename scoring_type, typename score_type>
161  void init(
162  const uint32 j,
163  const uint32 N,
165  const scoring_type& scoring,
166  const score_type zero,
167  const score_type infimum)
168  {
169  if (j == 0)
170  {
171  for (uint32 i = 0; i < N; ++i)
172  {
173  column[i].x = equal<algorithm_tag,PatternBlockingTag>() ?
174  TYPE == GLOBAL ? scoring.text_gap_open() + scoring.text_gap_extension() * i : zero :
175  TYPE != LOCAL ? scoring.text_gap_open() + scoring.text_gap_extension() * i : zero;
176  column[i].y = TYPE == LOCAL ? zero : infimum;
177  }
178  }
179  else
180  {
181  for (uint32 i = 0; i < N; ++i)
182  {
183  column[i].x = m_checkpoint[i].x;
184  column[i].y = m_checkpoint[i].y;
185  }
186  }
187  }
188 
189  // do something with the previous column
190  //
191  // \param j column index
192  // \param N column size
193  // \param column column values
194  template <typename column_type>
197  const uint32 j,
198  const uint32 N,
199  const column_type column) {}
200 
201  // do something with the last column
202  //
203  // \param j column index
204  // \param N column size
205  // \param column column values
206  template <typename column_type>
209  const uint32 j,
210  const uint32 M,
211  const uint32 N,
212  const column_type column)
213  {
214  for (uint32 i = 0; i < N; ++i)
215  {
216  m_checkpoint[i].x = column[i].x;
217  m_checkpoint[i].y = column[i].y;
218  }
219  }
220 
221  // do something with the newly computed cell
222  //
223  // \param i row index
224  // \param N number of rows (column size)
225  // \param j column index
226  // \param M number of columns (row size)
227  // \param score computed score
228  // \param dir direction flow
229  template <typename score_type>
231  void new_cell(
232  const uint32 i,
233  const uint32 N,
234  const uint32 j,
235  const uint32 M,
236  const score_type score,
237  const DirectionVector dir,
238  const DirectionVector edir,
239  const DirectionVector fdir) {}
240 
241  checkpoint_type m_checkpoint;
242 };
243 
244 //
245 // A helper scoring context class, instantiated to keep track of checkpoints
246 //
247 template <uint32 BAND_LEN, AlignmentType TYPE, uint32 CHECKPOINTS, typename checkpoint_type>
249 {
250  // constructor
251  //
252  // \param checkpoints input checkpoints array
255  m_checkpoints( checkpoints ) {}
256 
257  // initialize the j-th column of the DP matrix
258  //
259  // \param j column index
260  // \param N column size
261  // \param column column values
262  // \param scoring scoring scheme
263  // \param zero zero value
264  // \param infimum infimum value
265  template <typename column_type, typename scoring_type, typename score_type>
267  void init(
268  const uint32 j,
269  const uint32 N,
271  const scoring_type& scoring,
272  const score_type zero,
273  const score_type infimum)
274  {
275  for (uint32 i = 0; i < N; ++i)
276  {
277  column[i].x = TYPE == GLOBAL ? scoring.text_gap_open() + scoring.text_gap_extension() * i : zero;
278  column[i].y = TYPE == LOCAL ? zero : infimum;
279  }
280  }
281 
282  // do something with the previous column
283  //
284  // \param j column index
285  // \param N column size
286  // \param column column values
287  template <typename column_type>
290  const uint32 j,
291  const uint32 N,
292  const column_type column)
293  {
294  typedef typename std::iterator_traits<column_type>::value_type vector_type;
295  typedef typename vector_traits<vector_type>::value_type value_type;
296 
297  // save checkpoint
298  if ((j & (CHECKPOINTS-1)) == 0u)
299  {
300  const uint32 checkpoint_id = j / CHECKPOINTS;
301 
302  for (uint32 i = 0; i < N; ++i)
303  m_checkpoints[ checkpoint_id*N + i ] = make_vector<value_type>( column[i].x, column[i].y );
304  }
305  }
306 
307  // do something with the last column
308  //
309  // \param j column index
310  // \param N column size
311  // \param column column values
312  template <typename column_type>
315  const uint32 j,
316  const uint32 M,
317  const uint32 N,
318  const column_type column) {}
319 
320  // do something with the newly computed cell
321  //
322  // \param i row index
323  // \param N number of rows (column size)
324  // \param j column index
325  // \param M number of columns (row size)
326  // \param score computed score
327  // \param dir direction flow
328  template <typename score_type>
330  void new_cell(
331  const uint32 i,
332  const uint32 N,
333  const uint32 j,
334  const uint32 M,
335  const score_type score,
336  const DirectionVector dir,
337  const DirectionVector edir,
338  const DirectionVector fdir) {}
339 
340  checkpoint_type m_checkpoints;
341 };
342 
343 //
344 // A helper scoring context class, instantiated to keep track of the direction vectors
345 // of a DP submatrix between given checkpoints
346 //
347 template <uint32 BAND_LEN, AlignmentType TYPE, uint32 CHECKPOINTS, typename checkpoint_type, typename submatrix_type>
349 {
350  // constructor
351  //
352  // \param checkpoints input checkpoints array
353  // \param checkpoint_id id of the checkpoint defining the first column of the submatrix
354  // \param submatrix submatrix output storage
357  const checkpoint_type checkpoints,
358  const uint32 checkpoint_id,
359  const submatrix_type submatrix) :
360  m_checkpoints( checkpoints ),
361  m_checkpoint_id( checkpoint_id ),
362  m_submatrix( submatrix ) {}
363 
364  // initialize the j-th column of the DP matrix
365  //
366  // \param j column index
367  // \param N column size
368  // \param column column values
369  // \param scoring scoring scheme
370  // \param zero zero value
371  // \param infimum infimum value
372  template <typename column_type, typename scoring_type, typename score_type>
374  void init(
375  const uint32 j,
376  const uint32 N,
378  const scoring_type& scoring,
379  const score_type zero,
380  const score_type infimum)
381  {
382  typedef typename std::iterator_traits<column_type>::value_type value_type;
383 
384  // restore the checkpoint
385  for (uint32 i = 0; i < N; ++i)
386  {
387  const value_type f = m_checkpoints[ m_checkpoint_id*N + i ];
388  column[i].x = f.x;
389  column[i].y = f.y;
390  }
391  }
392 
393  // do something with the previous column
394  //
395  // \param j column index
396  // \param N column size
397  // \param column column values
398  template <typename column_type>
401  const uint32 j,
402  const uint32 N,
403  const column_type column) {}
404 
405  // do something with the last column
406  //
407  // \param j column index
408  // \param N column size
409  // \param column column values
410  template <typename column_type>
413  const uint32 j,
414  const uint32 M,
415  const uint32 N,
416  const column_type column) {}
417 
418  // do something with the newly computed cell
419  //
420  // \param i row index
421  // \param N number of rows (column size)
422  // \param j column index
423  // \param M number of columns (row size)
424  // \param score computed score
425  // \param dir direction flow
426  template <typename score_type>
428  void new_cell(
429  const uint32 i,
430  const uint32 N,
431  const uint32 j,
432  const uint32 M,
433  const score_type score,
434  const DirectionVector hdir,
435  const DirectionVector edir,
436  const DirectionVector fdir)
437  {
438  // save the direction vector
439  const uint32 offset = m_checkpoint_id * CHECKPOINTS;
440 
441  const uint8 cdir =
442  (TYPE == LOCAL ? (score == 0 ? SINK : hdir) : hdir) | edir | fdir;
443 
444  m_submatrix[ i * CHECKPOINTS + (j - offset) ] = cdir;
445  }
446 
447  checkpoint_type m_checkpoints;
449  submatrix_type m_submatrix;
450 };
451 
452 template <uint32 BAND_LEN, AlignmentType TYPE, typename algorithm_tag, typename symbol_type>
454 
455 //
456 // A template struct used to possibly specialize the implementation of the Gotoh-based alignment based on
457 // the template parameters.
458 //
459 template <uint32 BAND_LEN, AlignmentType TYPE, typename symbol_type>
461 {
462  // update a DP row
463  //
464  template <
465  bool CHECK_M,
466  typename context_type,
467  typename query_cache,
468  typename score_type,
469  typename temp_iterator,
470  typename sink_type,
471  typename scoring_type>
473  static void update_row(
474  context_type& context,
475  const uint32 block,
476  const uint32 M,
477  const uint32 i,
478  const uint32 N,
479  const symbol_type r_i,
480  const query_cache q_cache,
481  temp_iterator temp,
482  score_type& temp_i,
483  score_type* H_band,
484  score_type* F_band,
485  sink_type& sink,
486  const score_type min_score,
487  score_type& max_score,
488  const score_type G_o,
489  const score_type G_e,
490  const score_type zero,
491  const scoring_type scoring)
492  {
493  typedef typename std::iterator_traits<temp_iterator>::value_type temp_cell_type;
494  typedef typename vector_traits<temp_cell_type>::value_type temp_scalar_type;
495 
496  //
497  // NOTE:
498  // It might look as if we were going to make lots of out-of-bounds accesses here,
499  // as we loop across BAND_LEN cells irrespectively of whether the band straddles
500  // the end of the pattern.
501  // However, these accesses don't cause any page faults as they refer to properly
502  // sized temporary arrays (that in practice are placed in registers), and don't
503  // affect the results as they contribute to unused portions of the DP matrices.
504  // The reporting of the scores is properly guarded.
505  //
506 
507  // set the 0-th coefficient in the band to be equal to the (i-1)-th row of the left column (diagonal term)
508  score_type H_diag = temp_i;
509 
510  H_band[0] = temp_i = temp[i].x;
511  score_type E = temp[i].y;
512 
513  #pragma unroll
514  for (uint32 j = 1; j <= BAND_LEN; ++j)
515  {
516  // update F
517  const score_type ftop = F_band[j] + G_e;
518  const score_type htop = H_band[j] + G_o;
519  F_band[j] = nvbio::max( ftop, htop );
520  const DirectionVector fdir = ftop > htop ? DELETION_EXT : SUBSTITUTION;
521 
522  // update E
523  const score_type eleft = E + G_e;
524  const score_type hleft = H_band[j-1] + G_o;
525  E = nvbio::max( eleft, hleft );
526  const DirectionVector edir = eleft > hleft ? INSERTION_EXT : SUBSTITUTION;
527 
528  const int2 info_j = q_cache[ j-1 ];
529  //const int4 info_j = q_cache[ j-1 ];
530  const symbol_type q_j = symbol_type(info_j.x);
531  const score_type qq_j = score_type(info_j.y);
532  //const score_type m_j = score_type(info_j.z);
533  //const score_type S_ij = (r_i == q_j) ? m_j : scoring.mismatch( r_i, q_j, qq_j );
534  const score_type S_ij = scoring.substitution( i, block + j, r_i, q_j, qq_j );
535  const score_type diagonal = H_diag + S_ij;
536  const score_type top = F_band[j];
537  const score_type left = E;
538  score_type hi = nvbio::max3( left, top, diagonal );
539  hi = (TYPE == LOCAL)? nvbio::max( hi, zero ) : hi; // clamp to zero
540  H_diag = H_band[j];
541  H_band[j] = hi;
542 
543  if ((CHECK_M == false) || (block + j <= M))
544  {
545  context.new_cell(
546  i, N,
547  block + j - 1, M,
548  hi,
549  top > left ?
550  (top > diagonal ? DELETION : SUBSTITUTION) :
551  (left > diagonal ? INSERTION : SUBSTITUTION),
552  edir,
553  fdir );
554  }
555  }
556 
557  // save the last entry of the band
558  temp[i] = make_vector<temp_scalar_type>( H_band[ BAND_LEN ], E );
559 
560  NVBIO_CUDA_ASSERT( H_band[ BAND_LEN ] >= Field_traits<temp_scalar_type>::min() );
561  NVBIO_CUDA_ASSERT( H_band[ BAND_LEN ] <= Field_traits<temp_scalar_type>::max() );
564 
565  max_score = nvbio::max( max_score, H_band[ BAND_LEN ] );
566 
567  if (TYPE == LOCAL)
568  {
569  if (CHECK_M)
570  {
571  // during local alignment we save the best score across all bands
572  for (uint32 j = 1; j <= BAND_LEN; ++j)
573  {
574  if (block + j <= M)
575  sink.report( H_band[j], make_uint2( i+1, block+j ) );
576  }
577  }
578  else
579  {
580  // during local alignment we save the best score across all bands
581  for (uint32 j = 1; j <= BAND_LEN; ++j)
582  sink.report( H_band[j], make_uint2( i+1, block+j ) );
583  }
584  }
585  else if (CHECK_M)
586  {
587  if (TYPE == SEMI_GLOBAL)
588  {
589  // during semi-global alignment we save the best score across the last column H[*][M], at each row
590  save_boundary<BAND_LEN>( block, M, H_band, i, sink );
591  }
592  }
593  }
594 
595  //
596  // Calculate the alignment score between a string and a reference, using the Smith-Waterman algorithm,
597  // using a templated column storage.
598  //
599  // This function is templated over:
600  // - a context that is passed the computed DP matrix values, and can be
601  // used to specialize its behavior.
602  // - a sink that is used to report successful alignments
603  //
604  // Furthermore, the function can be called on a window of the pattern, assuming that the context
605  // will provide the proper initialization for the first column of the corresponding DP matrix window.
606  //
607  // \param context template context class, used to specialize the behavior of the aligner
608  // \param query input pattern (horizontal string)
609  // \param quals input pattern qualities (horizontal string)
610  // \param ref input text (vertical string)
611  // \param scoring scoring scheme
612  // \param min_score minimum output score
613  // \param sink alignment sink
614  // \param window_begin beginning of pattern window
615  // \param window_end end of pattern window
616  //
617  // \return false if early-exited, true otherwise
618  //
619  template <
620  typename context_type,
621  typename query_type,
622  typename qual_type,
623  typename ref_type,
624  typename scoring_type,
625  typename sink_type,
626  typename column_type>
628  static
629  bool run(
630  const scoring_type& scoring,
631  context_type& context,
632  query_type query,
633  qual_type quals,
634  ref_type ref,
635  const int32 min_score,
636  sink_type& sink,
637  const uint32 window_begin,
638  const uint32 window_end,
639  column_type temp)
640  {
641  //
642  // This function breaks the DP matrix in vertical stripes of BAND_LEN cells,
643  // so as to keep the current reduced row (a band of coefficients) in registers
644  // throughout the entire computation.
645  // Within each stripe, the matrix is updated top-to-bottom, and the right-most
646  // border of the stripe is saved to a local memory array so as to allow resuming
647  // its values when processing the next stripe.
648  //
649  const uint32 M = query.length();
650  const uint32 N = ref.length();
651 
652  typedef int32 score_type;
653  score_type H_band[BAND_LEN+1];
654  score_type F_band[BAND_LEN+1];
655 
656  typedef typename std::iterator_traits<column_type>::value_type temp_cell_type;
657  typedef typename vector_traits<temp_cell_type>::value_type temp_scalar_type;
658 
659  const score_type G_o = scoring.pattern_gap_open();
660  const score_type G_e = scoring.pattern_gap_extension();
661  const score_type zero = score_type(0);
662  const score_type infimum = Field_traits<temp_scalar_type>::min() - nvbio::min( G_o, G_e );
663 
664  int2 q_cache[ BAND_LEN ];
665  //int4 q_cache[ BAND_LEN ];
666 
667  // initialize the first column
668  context.init( window_begin, N, temp, scoring, zero, infimum );
669 
670  const uint32 end_block = (window_end == M) ?
671  nvbio::max( BAND_LEN, BAND_LEN * ((M + BAND_LEN-1) / BAND_LEN) ) :
672  window_end + BAND_LEN;
673 
674  // loop across the short edge of the DP matrix (i.e. the columns)
675  for (uint32 block = window_begin; block + BAND_LEN < end_block; block += BAND_LEN)
676  {
677  // save the previous column
678  context.previous_column( block, N, temp );
679 
680  // load a block of entries from each query
681  #pragma unroll
682  for (uint32 t = 0; t < BAND_LEN; ++t)
683  {
684  const symbol_type q = query[ block + t ];
685  const uint8 qq = quals[ block + t ];
686 
687  q_cache[ t ] = make_int2( q, qq );
688  //q_cache[ t ] = make_int4( q, qq, scoring.match(qq), 0 );
689  }
690 
691  // initialize the first band
692  #pragma unroll
693  for (uint32 j = 0; j <= BAND_LEN; ++j)
694  {
695  H_band[j] = (TYPE != LOCAL) ? (block + j > 0 ? G_o + G_e * (block + j - 1u) : zero) : zero;
696  F_band[j] = infimum;
697  }
698 
699  score_type max_score = Field_traits<score_type>::min();
700 
701  score_type temp_i = H_band[0];
702 
703  //typedef typename string_traits<ref_type>::forward_iterator forward_ref_iterator;
704 
705  #if defined(NVBIO_SW_VECTOR_LOADING)
706  // check whether we should use vectorized loads
708  {
709  //
710  // loop across the long edge of the DP matrix (i.e. the rows)
711  //
712  const uint32 REF_VECTOR_WIDTH = vectorized_string<ref_type>::VECTOR_WIDTH;
713 
714  const uint2 vec_range = vectorized_string_range( ref );
715 
716  for (uint32 i = 0; i < vec_range.x; ++i)
717  {
718  // load the new character from the reference
719  const uint8 r_i = ref[i];
720 
721  update_row<false>(
722  context,
723  block, M,
724  i, N,
725  r_i,
726  q_cache,
727  temp,
728  temp_i,
729  H_band,
730  F_band,
731  sink,
732  min_score,
733  max_score,
734  G_o,G_e,
735  zero,
736  scoring );
737  }
738  for (uint32 i = vec_range.x; i < vec_range.y; i += REF_VECTOR_WIDTH)
739  {
740  uint8 r[REF_VECTOR_WIDTH];
741 
742  // load REF_VECTOR_WIDTH new characters from the reference
743  vectorized_string_load( ref, i, r );
744 
745  for (uint32 j = 0; j < REF_VECTOR_WIDTH; ++j)
746  {
747  // load the new character from the reference
748  const uint8 r_i = r[j];
749 
750  update_row<false>(
751  context,
752  block, M,
753  i + j, N,
754  r_i,
755  q_cache,
756  temp,
757  temp_i,
758  H_band,
759  F_band,
760  sink,
761  min_score,
762  max_score,
763  G_o,G_e,
764  zero,
765  scoring );
766  }
767  }
768 
769  for (uint32 i = vec_range.y; i < N; ++i)
770  {
771  // load the new character from the reference
772  const uint8 r_i = ref[i];
773 
774  update_row<false>(
775  context,
776  block, M,
777  i, N,
778  r_i,
779  q_cache,
780  temp,
781  temp_i,
782  H_band,
783  F_band,
784  sink,
785  min_score,
786  max_score,
787  G_o,G_e,
788  zero,
789  scoring );
790  }
791  }
792  else
793  #endif
794  {
795  //
796  // loop across the long edge of the DP matrix (i.e. the rows)
797  //
798 
799  //forward_ref_iterator ref_it( ref.begin() );
800 
801  for (uint32 i = 0; i < N; ++i)
802  {
803  // load the new character from the reference
804  const uint8 r_i = ref[i];
805  //const uint8 r_i = *ref_it; ++ref_it;
806 
807  update_row<false>(
808  context,
809  block, M,
810  i, N,
811  r_i,
812  q_cache,
813  temp,
814  temp_i,
815  H_band,
816  F_band,
817  sink,
818  min_score,
819  max_score,
820  G_o,G_e,
821  zero,
822  scoring );
823  }
824  }
825 
826  // we are now (M - block - BAND_LEN) columns from the last one: check whether
827  // we could theoretically reach the minimum score
828  const score_type missing_cols = score_type(M - block - BAND_LEN);
829  if (max_score + missing_cols * scoring.match(255) < score_type( min_score ))
830  return false;
831  }
832 
833  // process the very last stripe
834  if (window_end == M)
835  {
836  const uint32 block = end_block - BAND_LEN;
837 
838  // save the previous column
839  context.previous_column( block, N, temp );
840 
841  // load a block of entries from each query
842  const uint32 block_end = nvbio::min( block + BAND_LEN, M );
843  #pragma unroll
844  for (uint32 t = 0; t < BAND_LEN; ++t)
845  {
846  if (block + t < block_end)
847  {
848  const symbol_type q = query[ block + t ];
849  const uint8 qq = quals[ block + t ];
850 
851  q_cache[ t ] = make_int2( q, qq );
852  //q_cache[ t ] = make_int4( q, qq, scoring.match(qq), 0 );
853  }
854  }
855 
856  // initialize the first band
857  #pragma unroll
858  for (uint32 j = 0; j <= BAND_LEN; ++j)
859  {
860  H_band[j] = (TYPE != LOCAL) ? (block + j > 0 ? G_o + G_e * (block + j - 1u) : zero) : zero;
861  F_band[j] = infimum;
862  }
863 
864  score_type max_score = Field_traits<score_type>::min();
865 
866  score_type temp_i = H_band[0];
867 
868  // loop across the long edge of the DP matrix (i.e. the rows)
869  for (uint32 i = 0; i < N; ++i)
870  {
871  // load the new character from the reference
872  const uint8 r_i = ref[i];
873 
874  update_row<true>(
875  context,
876  block, M,
877  i, N,
878  r_i,
879  q_cache,
880  temp,
881  temp_i,
882  H_band,
883  F_band,
884  sink,
885  min_score,
886  max_score,
887  G_o,G_e,
888  zero,
889  scoring );
890  }
891  }
892 
893  // save the last column
894  context.last_column( window_end, M, N, temp );
895 
896  if (TYPE == GLOBAL)
897  save_Mth<BAND_LEN>( M, H_band, N-1, sink );
898 
899  return true;
900  }
901 
902  //
903  // Calculate the alignment score between a string and a reference, using the Smith-Waterman algorithm,
904  // using local memory storage for the boundary columns.
905  //
906  // This function is templated over:
907  // 1. a context that is passed the computed DP matrix values, and can be
908  // used to specialize its behavior.
909  // 2. a sink that is used to report successful alignments
910  //
911  // Furthermore, the function can be called on a window of the pattern, assuming that the context
912  // will provide the proper initialization for the first column of the corresponding DP matrix window.
913  //
914  // \param context template context class, used to specialize the behavior of the aligner
915  // \param query input pattern (horizontal string)
916  // \param quals input pattern qualities (horizontal string)
917  // \param ref input text (vertical string)
918  // \param scoring scoring scheme
919  // \param min_score minimum output score
920  // \param sink alignment sink
921  // \param window_begin beginning of pattern window
922  // \param window_end end of pattern window
923  //
924  // \return false if early-exited, true otherwise
925  //
926  template <
927  uint32 MAX_REF_LEN,
928  typename context_type,
929  typename query_type,
930  typename qual_type,
931  typename ref_type,
932  typename scoring_type,
933  typename sink_type>
935  static
936  bool run(
937  const scoring_type& scoring,
938  context_type& context,
939  query_type query,
940  qual_type quals,
941  ref_type ref,
942  const int32 min_score,
943  sink_type& sink,
944  const uint32 window_begin,
945  const uint32 window_end)
946  {
947  // instantiated a local memory array
948  short2 temp[MAX_REF_LEN];
949  short2* temp_ptr = temp;
950 
951  return run(
952  context,
953  query,
954  quals,
955  ref,
956  scoring,
957  min_score,
958  sink,
959  window_begin,
960  window_end,
961  temp_ptr );
962  }
963 };
964 
965 //
966 // A template struct used to possibly specialize the implementation of the Gotoh-based alignment based on
967 // the template parameters.
968 //
969 template <uint32 BAND_LEN, AlignmentType TYPE, typename symbol_type>
971 {
972  // update a DP row
973  //
974  template <
975  bool CHECK_N,
976  typename context_type,
977  typename ref_cache,
978  typename score_type,
979  typename temp_iterator,
980  typename sink_type,
981  typename scoring_type>
983  static void update_row(
984  context_type& context,
985  const uint32 block,
986  const uint32 N,
987  const uint32 i,
988  const uint32 M,
989  const symbol_type q_i,
990  const uint8 qq_i,
991  //const score_type m_i,
992  const ref_cache r_cache,
993  temp_iterator temp,
994  score_type& temp_i,
995  score_type* H_band,
996  score_type* F_band,
997  sink_type& sink,
998  const score_type min_score,
999  score_type& max_score,
1000  const score_type G_o,
1001  const score_type G_e,
1002  const score_type zero,
1003  const scoring_type scoring)
1004  {
1005  typedef typename std::iterator_traits<temp_iterator>::value_type temp_cell_type;
1006  typedef typename vector_traits<temp_cell_type>::value_type temp_scalar_type;
1007 
1008  //
1009  // NOTE:
1010  // It might look as if we were going to make lots of out-of-bounds accesses here,
1011  // as we loop across BAND_LEN cells irrespectively of whether the band straddles
1012  // the end of the pattern.
1013  // However, these accesses don't cause any page faults as they refer to properly
1014  // sized temporary arrays (that in practice are placed in registers), and don't
1015  // affect the results as they contribute to unused portions of the DP matrices.
1016  // The reporting of the scores is properly guarded.
1017  //
1018 
1019  // set the 0-th coefficient in the band to be equal to the (i-1)-th row of the left column (diagonal term)
1020  score_type H_diag = temp_i;
1021 
1022  H_band[0] = temp_i = temp[i].x;
1023  score_type E = temp[i].y;
1024 
1025  #pragma unroll
1026  for (uint32 j = 1; j <= BAND_LEN; ++j)
1027  {
1028  // update F
1029  const score_type ftop = F_band[j] + G_e;
1030  const score_type htop = H_band[j] + G_o;
1031  F_band[j] = nvbio::max( ftop, htop );
1032  const DirectionVector fdir = ftop > htop ? INSERTION_EXT : SUBSTITUTION;
1033 
1034  // update E
1035  const score_type eleft = E + G_e;
1036  const score_type hleft = H_band[j-1] + G_o;
1037  E = nvbio::max( eleft, hleft );
1038  const DirectionVector edir = eleft > hleft ? DELETION_EXT : SUBSTITUTION;
1039 
1040  const symbol_type r_j = r_cache[ j-1 ];
1041  //const score_type S_ij = (r_j == q_i) ? m_i : scoring.mismatch( r_j, q_i, qq_i );
1042  const score_type S_ij = scoring.substitution( block + j, i, r_j, q_i, qq_i );
1043  const score_type diagonal = H_diag + S_ij;
1044  const score_type top = F_band[j];
1045  const score_type left = E;
1046  score_type hi = nvbio::max3( left, top, diagonal );
1047  hi = (TYPE == LOCAL)? nvbio::max( hi, zero ) : hi; // clamp to zero
1048  H_diag = H_band[j];
1049  H_band[j] = hi;
1050 
1051  if ((CHECK_N == false) || (block + j <= N))
1052  {
1053  context.new_cell(
1054  i, M,
1055  block + j - 1, N,
1056  hi,
1057  top > left ?
1058  (top > diagonal ? INSERTION : SUBSTITUTION) :
1059  (left > diagonal ? DELETION : SUBSTITUTION),
1060  edir,
1061  fdir );
1062  }
1063  }
1064 
1065  // save the last entry of the band
1066  temp[i] = make_vector<temp_scalar_type>( H_band[ BAND_LEN ], E );
1067 
1068  NVBIO_CUDA_ASSERT( H_band[ BAND_LEN ] >= Field_traits<temp_scalar_type>::min() );
1069  NVBIO_CUDA_ASSERT( H_band[ BAND_LEN ] <= Field_traits<temp_scalar_type>::max() );
1072 
1073  max_score = nvbio::max( max_score, H_band[ BAND_LEN ] );
1074 
1075  if (TYPE == LOCAL)
1076  {
1077  if (CHECK_N)
1078  {
1079  // during local alignment we save the best score across all bands
1080  for (uint32 j = 1; j <= BAND_LEN; ++j)
1081  {
1082  if (block + j <= N)
1083  sink.report( H_band[j], make_uint2( block+j, i+1 ) );
1084  }
1085  }
1086  else
1087  {
1088  // during local alignment we save the best score across all bands
1089  for (uint32 j = 1; j <= BAND_LEN; ++j)
1090  sink.report( H_band[j], make_uint2( block+j, i+1 ) );
1091  }
1092  }
1093  }
1094 
1095  //
1096  // Calculate the alignment score between a string and a reference, using the Smith-Waterman algorithm,
1097  // using a templated column storage.
1098  //
1099  // This function is templated over:
1100  // - a context that is passed the computed DP matrix values, and can be
1101  // used to specialize its behavior.
1102  // - a sink that is used to report successful alignments
1103  //
1104  // Furthermore, the function can be called on a window of the pattern, assuming that the context
1105  // will provide the proper initialization for the first column of the corresponding DP matrix window.
1106  //
1107  // \param context template context class, used to specialize the behavior of the aligner
1108  // \param query input pattern (horizontal string)
1109  // \param quals input pattern qualities (horizontal string)
1110  // \param ref input text (vertical string)
1111  // \param scoring scoring scheme
1112  // \param min_score minimum output score
1113  // \param sink alignment sink
1114  // \param window_begin beginning of pattern window
1115  // \param window_end end of pattern window
1116  //
1117  // \return false if early-exited, true otherwise
1118  //
1119  template <
1120  typename context_type,
1121  typename query_type,
1122  typename qual_type,
1123  typename ref_type,
1124  typename scoring_type,
1125  typename sink_type,
1126  typename column_type>
1128  static
1129  bool run(
1130  const scoring_type& scoring,
1131  context_type& context,
1132  query_type query,
1133  qual_type quals,
1134  ref_type ref,
1135  const int32 min_score,
1136  sink_type& sink,
1137  const uint32 window_begin,
1138  const uint32 window_end,
1139  column_type temp)
1140  {
1141  //
1142  // This function breaks the DP matrix in vertical stripes of BAND_LEN cells,
1143  // so as to keep the current reduced row (a band of coefficients) in registers
1144  // throughout the entire computation.
1145  // Within each stripe, the matrix is updated top-to-bottom, and the right-most
1146  // border of the stripe is saved to a local memory array so as to allow resuming
1147  // its values when processing the next stripe.
1148  //
1149  const uint32 M = query.length();
1150  const uint32 N = ref.length();
1151 
1152  typedef int32 score_type;
1153  score_type H_band[BAND_LEN+1];
1154  score_type F_band[BAND_LEN+1];
1155 
1156  typedef typename std::iterator_traits<column_type>::value_type temp_cell_type;
1157  typedef typename vector_traits<temp_cell_type>::value_type temp_scalar_type;
1158 
1159  const score_type G_o = scoring.pattern_gap_open();
1160  const score_type G_e = scoring.pattern_gap_extension();
1161  const score_type zero = score_type(0);
1162  const score_type infimum = Field_traits<temp_scalar_type>::min() - nvbio::min( G_o, G_e );
1163 
1164  uint8 r_cache[ BAND_LEN ];
1165 
1166  // initialize the first column
1167  context.init( window_begin, M, temp, scoring, zero, infimum );
1168 
1169  const uint32 end_block = (window_end == N) ?
1170  nvbio::max( BAND_LEN, BAND_LEN * ((N + BAND_LEN-1) / BAND_LEN) ) :
1171  window_end + BAND_LEN;
1172 
1173  // loop across the short edge of the DP matrix (i.e. the columns)
1174  for (uint32 block = window_begin; block + BAND_LEN < end_block; block += BAND_LEN)
1175  {
1176  // save the previous column
1177  context.previous_column( block, M, temp );
1178 
1179  // load a block of entries from the reference
1180  #pragma unroll
1181  for (uint32 t = 0; t < BAND_LEN; ++t)
1182  r_cache[ t ] = ref[ block + t ];
1183 
1184  // initialize the first band
1185  #pragma unroll
1186  for (uint32 j = 0; j <= BAND_LEN; ++j)
1187  {
1188  H_band[j] = (TYPE == GLOBAL) ? (block + j > 0 ? G_o + G_e * (block + j - 1u) : zero) : zero;
1189  F_band[j] = infimum;
1190  }
1191 
1192  score_type max_score = Field_traits<score_type>::min();
1193 
1194  score_type temp_i = H_band[0];
1195 
1196  // loop across the short edge of the DP matrix (i.e. the rows)
1197  for (uint32 i = 0; i < M; ++i)
1198  {
1199  // load the new character from the query
1200  const uint8 q_i = query[i];
1201  const uint8 qq_i = quals[i];
1202 
1203  //const int32 m_i = scoring.match(qq_i);
1204 
1205  update_row<false>(
1206  context,
1207  block, N,
1208  i, M,
1209  q_i,
1210  qq_i,
1211  //m_i,
1212  r_cache,
1213  temp,
1214  temp_i,
1215  H_band,
1216  F_band,
1217  sink,
1218  min_score,
1219  max_score,
1220  G_o,G_e,
1221  zero,
1222  scoring );
1223  }
1224  if (TYPE == SEMI_GLOBAL)
1225  {
1226  // during semi-global alignment we save the best score across the last row
1227  for (uint32 j = 1; j <= BAND_LEN; ++j)
1228  sink.report( H_band[j], make_uint2( block+j, M ) );
1229  }
1230 
1231  // we are now (N - block - BAND_LEN) columns from the last one: check whether
1232  // we could theoretically reach the minimum score
1233  const score_type missing_cols = score_type(N - block - BAND_LEN);
1234  if (max_score + missing_cols * scoring.match(255) < score_type( min_score ))
1235  return false;
1236  }
1237 
1238  // process the very last stripe
1239  if (window_end == N)
1240  {
1241  const uint32 block = end_block - BAND_LEN;
1242 
1243  // save the previous column
1244  context.previous_column( block, M, temp );
1245 
1246  // load a block of entries from each query
1247  const uint32 block_end = nvbio::min( block + BAND_LEN, N );
1248  #pragma unroll
1249  for (uint32 t = 0; t < BAND_LEN; ++t)
1250  {
1251  if (block + t < block_end)
1252  r_cache[ t ] = ref[ block + t ];
1253  }
1254 
1255  // initialize the first band
1256  #pragma unroll
1257  for (uint32 j = 0; j <= BAND_LEN; ++j)
1258  {
1259  H_band[j] = (TYPE == GLOBAL) ? (block + j > 0 ? G_o + G_e * (block + j - 1u) : zero) : zero;
1260  F_band[j] = infimum;
1261  }
1262 
1263  score_type max_score = Field_traits<score_type>::min();
1264 
1265  score_type temp_i = H_band[0];
1266 
1267  #if defined(NVBIO_SW_VECTOR_LOADING)
1268  // check whether we should use vectorized loads
1270  {
1271  //
1272  // loop across the long edge of the DP matrix (i.e. the rows)
1273  //
1274  const uint32 QUERY_VECTOR_WIDTH = vectorized_string<query_type>::VECTOR_WIDTH;
1275 
1276  const uint2 vec_range = vectorized_string_range( query );
1277 
1278  for (uint32 i = 0; i < vec_range.x; ++i)
1279  {
1280  // load the new character from the query
1281  const uint8 q_i = query[i];
1282  const uint8 qq_i = quals[i];
1283 
1284  update_row<true>(
1285  context,
1286  block, N,
1287  i, M,
1288  q_i,
1289  qq_i,
1290  r_cache,
1291  temp,
1292  temp_i,
1293  H_band,
1294  F_band,
1295  sink,
1296  min_score,
1297  max_score,
1298  G_o,G_e,
1299  zero,
1300  scoring );
1301  }
1302  for (uint32 i = vec_range.x; i < vec_range.y; i += QUERY_VECTOR_WIDTH)
1303  {
1304  uint8 q[QUERY_VECTOR_WIDTH];
1305  uint8 qq[QUERY_VECTOR_WIDTH];
1306 
1307  // load QUERY_VECTOR_WIDTH new characters from the query
1308  vectorized_string_load( query, i, q );
1309 
1310  for (uint32 j = 0; j < QUERY_VECTOR_WIDTH; ++j)
1311  qq[j] = quals[i + j];
1312 
1313  for (uint32 j = 0; j < QUERY_VECTOR_WIDTH; ++j)
1314  {
1315  // load the new character from the reference
1316  const uint8 q_i = q[j];
1317  const uint8 qq_i = qq[j];
1318 
1319  update_row<true>(
1320  context,
1321  block, N,
1322  i + j, M,
1323  q_i,
1324  qq_i,
1325  r_cache,
1326  temp,
1327  temp_i,
1328  H_band,
1329  F_band,
1330  sink,
1331  min_score,
1332  max_score,
1333  G_o,G_e,
1334  zero,
1335  scoring );
1336  }
1337  }
1338  for (uint32 i = vec_range.y; i < M; ++i)
1339  {
1340  // load the new character from the query
1341  const uint8 q_i = query[i];
1342  const uint8 qq_i = quals[i];
1343 
1344  update_row<true>(
1345  context,
1346  block, N,
1347  i, M,
1348  q_i,
1349  qq_i,
1350  r_cache,
1351  temp,
1352  temp_i,
1353  H_band,
1354  F_band,
1355  sink,
1356  min_score,
1357  max_score,
1358  G_o,G_e,
1359  zero,
1360  scoring );
1361  }
1362  }
1363  else
1364  #endif
1365  {
1366  //typedef typename string_traits<query_type>::forward_iterator forward_query_iterator;
1367  //typedef typename string_traits<qual_type>::forward_iterator forward_qual_iterator;
1368 
1369  //forward_query_iterator query_it( query.begin() );
1370  //forward_qual_iterator quals_it( quals.begin() );
1371 
1372  //
1373  // loop across the short edge of the DP matrix (i.e. the query)
1374  //
1375  for (uint32 i = 0; i < M; ++i)
1376  {
1377  // load the new character from the query
1378  const uint8 q_i = query[i];
1379  const uint8 qq_i = quals[i];
1380  //const uint8 q_i = *query_it; ++query_it;
1381  //const uint8 qq_i = *quals_it; ++quals_it;
1382 
1383  update_row<true>(
1384  context,
1385  block, N,
1386  i, M,
1387  q_i,
1388  qq_i,
1389  r_cache,
1390  temp,
1391  temp_i,
1392  H_band,
1393  F_band,
1394  sink,
1395  min_score,
1396  max_score,
1397  G_o,G_e,
1398  zero,
1399  scoring );
1400  }
1401  }
1402 
1403  if (TYPE == SEMI_GLOBAL)
1404  {
1405  // during semi-global alignment we save the best score across the last row
1406  for (uint32 j = 1; j <= BAND_LEN; ++j)
1407  {
1408  if (block + j <= N)
1409  sink.report( H_band[j], make_uint2( block+j, M ) );
1410  }
1411  }
1412  else if (TYPE == GLOBAL)
1413  {
1414  // during global alignment we save the best score at cell [N][M]
1415  for (uint32 j = 1; j <= BAND_LEN; ++j)
1416  {
1417  if (block + j == N)
1418  sink.report( H_band[j], make_uint2( block+j, M ) );
1419  }
1420  }
1421  }
1422 
1423  // save the last column
1424  context.last_column( window_end, N, M, temp );
1425  return true;
1426  }
1427 
1428  //
1429  // Calculate the alignment score between a string and a reference, using the Smith-Waterman algorithm,
1430  // using local memory storage for the boundary columns.
1431  //
1432  // This function is templated over:
1433  // 1. a context that is passed the computed DP matrix values, and can be
1434  // used to specialize its behavior.
1435  // 2. a sink that is used to report successful alignments
1436  //
1437  // Furthermore, the function can be called on a window of the pattern, assuming that the context
1438  // will provide the proper initialization for the first column of the corresponding DP matrix window.
1439  //
1440  // \param context template context class, used to specialize the behavior of the aligner
1441  // \param query input pattern (horizontal string)
1442  // \param quals input pattern qualities (horizontal string)
1443  // \param ref input text (vertical string)
1444  // \param scoring scoring scheme
1445  // \param min_score minimum output score
1446  // \param sink alignment sink
1447  // \param window_begin beginning of pattern window
1448  // \param window_end end of pattern window
1449  //
1450  // \return false if early-exited, true otherwise
1451  //
1452  template <
1453  uint32 MAX_PATTERN_LEN,
1454  typename context_type,
1455  typename string_type,
1456  typename qual_type,
1457  typename ref_type,
1458  typename scoring_type,
1459  typename sink_type>
1461  static
1462  bool run(
1463  const scoring_type& scoring,
1464  context_type& context,
1465  string_type query,
1466  qual_type quals,
1467  ref_type ref,
1468  const int32 min_score,
1469  sink_type& sink,
1470  const uint32 window_begin,
1471  const uint32 window_end)
1472  {
1473  // instantiate a local memory array
1474  short2 temp[MAX_PATTERN_LEN];
1475  short2* temp_ptr = temp;
1476 
1477  return run(
1478  context,
1479  query,
1480  quals,
1481  ref,
1482  scoring,
1483  min_score,
1484  sink,
1485  window_begin,
1486  window_end,
1487  temp_ptr );
1488  }
1489 };
1490 
1491 template <AlignmentType TYPE, uint32 DIM, typename symbol_type>
1493 {
1494  static const uint32 BAND_LEN = 8u / DIM;
1495 };
1496 
1497 template <AlignmentType TYPE, uint32 DIM>
1499 {
1500 #if __CUDA_ARCH__ >= 300
1501  static const uint32 BAND_LEN = 8u;
1502 #else
1503  static const uint32 BAND_LEN = 1u;
1504 #endif
1505 };
1506 
1507 //
1508 // Calculate the alignment score between a pattern and a text, using the Gotoh algorithm.
1509 //
1510 // \param aligner scoring scheme
1511 // \param pattern pattern string (horizontal
1512 // \param quals pattern qualities
1513 // \param text text string (vertical)
1514 // \param min_score minimum score
1515 //
1516 template <
1518  typename scoring_type,
1519  typename algorithm_tag,
1520  typename pattern_string,
1521  typename qual_string,
1522  typename text_string,
1523  typename column_type>
1525  GotohAligner<TYPE,scoring_type,algorithm_tag>,
1526  pattern_string,
1527  qual_string,
1528  text_string,
1529  column_type>
1530 {
1532 
1546  template <typename sink_type>
1548  static bool dispatch(
1549  const aligner_type aligner,
1550  const pattern_string pattern,
1551  const qual_string quals,
1552  const text_string text,
1553  const int32 min_score,
1554  sink_type& sink,
1556  {
1557  typedef typename pattern_string::value_type symbol_type;
1558 
1560 
1562 
1563  const uint32 length = equal<algorithm_tag,PatternBlockingTag>() ? pattern.length() : text.length();
1564 
1565  return gotoh_alignment_score_dispatch<BAND_LEN,TYPE,algorithm_tag,symbol_type>::run( aligner.scheme, context, pattern, quals, text, min_score, sink, 0, length, column );
1566  }
1567 
1583  template <
1584  typename sink_type,
1585  typename checkpoint_type>
1587  static bool dispatch(
1588  const aligner_type aligner,
1589  const pattern_string pattern,
1590  const qual_string quals,
1591  const text_string text,
1592  const int32 min_score,
1593  const uint32 window_begin,
1594  const uint32 window_end,
1595  sink_type& sink,
1596  checkpoint_type checkpoint,
1598  {
1599  typedef typename pattern_string::value_type symbol_type;
1600 
1602 
1604 
1605  return gotoh_alignment_score_dispatch<BAND_LEN,TYPE,algorithm_tag,symbol_type>::run( aligner.scheme, context, pattern, quals, text, min_score, sink, window_begin, window_end, column );
1606  }
1607 
1621  template <typename sink_type>
1623  static bool dispatch(
1624  const aligner_type aligner,
1625  const pattern_string pattern,
1626  const qual_string quals,
1627  const text_string text,
1628  const int32 min_score,
1629  const uint32 window_begin,
1630  const uint32 window_end,
1631  sink_type& sink,
1633  {
1634  typedef typename pattern_string::value_type symbol_type;
1635 
1637 
1639 
1640  return gotoh_alignment_score_dispatch<BAND_LEN,TYPE,algorithm_tag,symbol_type>::run( aligner.scheme, context, pattern, quals, text, min_score, sink, window_begin, window_end, column );
1641  }
1642 };
1643 
1644 //
1645 // Calculate the alignment score between a pattern and a text, using the Gotoh algorithm.
1646 //
1647 // \tparam CHECKPOINTS number of columns between each checkpoint
1648 //
1649 // \param aligner scoring scheme
1650 // \param pattern pattern string (horizontal
1651 // \param quals pattern qualities
1652 // \param text text string (vertical)
1653 // \param min_score minimum score
1654 //
1655 template <
1656  uint32 CHECKPOINTS,
1658  typename scoring_type,
1659  typename pattern_string,
1660  typename qual_string,
1661  typename text_string,
1662  typename column_type>
1664  CHECKPOINTS,
1665  GotohAligner<TYPE,scoring_type>,
1666  pattern_string,
1667  qual_string,
1668  text_string,
1669  column_type>
1670 {
1672 
1673  //
1674  // Calculate a set of checkpoints of the DP matrix for the alignment between a pattern
1675  // and a text, using the Gotoh-Smith-Waterman algorithm.
1676  //
1677  // \tparam checkpoint_type a class to represent the collection of checkpoints,
1678  // represented as a linear array storing each checkpointed
1679  // band contiguously.
1680  // The class has to provide the const indexing operator[].
1681  //
1682  template <
1683  typename sink_type,
1684  typename checkpoint_type>
1686  static
1688  const aligner_type aligner,
1689  const pattern_string pattern,
1690  const qual_string quals,
1691  const text_string text,
1692  const int32 min_score,
1693  sink_type& sink,
1694  checkpoint_type checkpoints,
1696  {
1697  typedef typename pattern_string::value_type symbol_type;
1698 
1700 
1702 
1703  gotoh_alignment_score_dispatch<BAND_LEN,TYPE,PatternBlockingTag,symbol_type>::run( aligner.scheme, context, pattern, quals, text, min_score, sink, 0, pattern.length(), column );
1704  }
1705 
1706  //
1707  // Compute the banded Dynamic Programming submatrix between two given checkpoints,
1708  // storing its flow at each cell.
1709  // The function returns the submatrix width.
1710  //
1711  // \tparam BAND_LEN size of the DP band
1712  //
1713  // \tparam checkpoint_type a class to represent the collection of checkpoints,
1714  // represented as a linear array storing each checkpointed
1715  // band contiguously.
1716  // The class has to provide the const indexing operator[].
1717  //
1718  // \tparam submatrix_type a class to store the flow H, E and F submatrix, represented
1719  // as a linear array of size (BAND_LEN*CHECKPOINTS).
1720  // The class has to provide the non-const indexing operator[].
1721  // Note that the H submatrix entries can assume only 3 values,
1722  // while the E and F only 2 - hence the aggregate needs 4 bits
1723  // per cell.
1724  //
1725  // \param checkpoints the set of checkpointed rows
1726  // \param checkpoint_id the starting checkpoint used as the beginning of the submatrix
1727  // \param submatrix the output submatrix
1728  //
1729  // \return the submatrix width
1730  //
1731  template <
1732  typename checkpoint_type,
1733  typename submatrix_type>
1735  static
1737  const aligner_type aligner,
1738  const pattern_string pattern,
1739  const qual_string quals,
1740  const text_string text,
1741  const int32 min_score,
1742  checkpoint_type checkpoints,
1743  const uint32 checkpoint_id,
1744  submatrix_type submatrix,
1746  {
1747  typedef typename pattern_string::value_type symbol_type;
1748 
1750 
1752  context( checkpoints, checkpoint_id, submatrix );
1753 
1754  const uint32 window_begin = checkpoint_id * CHECKPOINTS;
1755  const uint32 window_end = nvbio::min( window_begin + CHECKPOINTS, uint32(pattern.length()) );
1756 
1757  NullSink null_sink;
1758  gotoh_alignment_score_dispatch<BAND_LEN,TYPE,PatternBlockingTag,symbol_type>::run( aligner.scheme, context, pattern, quals, text, min_score, null_sink, window_begin, window_end, column );
1759 
1760  return window_end - window_begin;
1761  }
1762 };
1763 
1764 //
1765 // Given the Dynamic Programming submatrix between two checkpoints,
1766 // backtrace from a given destination cell, using Gotoh's algorithm.
1767 // The function returns the resulting source cell.
1768 //
1769 // \tparam BAND_LEN size of the DP band
1770 //
1771 // \tparam CHECKPOINTS number of DP rows between each checkpoint
1772 //
1773 // \tparam checkpoint_type a class to represent the collection of checkpoints,
1774 // represented as a linear array storing each checkpointed
1775 // band contiguously.
1776 // The class has to provide the const indexing operator[].
1777 //
1778 // \tparam submatrix_type a class to store the flow submatrix, represented
1779 // as a linear array of size (BAND_LEN*CHECKPOINTS).
1780 // The class has to provide the const indexing operator[].
1781 // Note that the submatrix entries can assume only 3 values,
1782 // and could hence be packed in 2 bits.
1783 //
1784 // \tparam backtracer_type a class to store the resulting list of backtracking operations.
1785 // A model of \ref Backtracer.
1786 //
1787 // \param checkpoints precalculated checkpoints
1788 // \param checkpoint_id index of the first checkpoint defining the DP submatrix,
1789 // storing all bands between checkpoint_id and checkpoint_id+1.
1790 // \param submatrix precalculated flow submatrix
1791 // \param submatrix_height submatrix width
1792 // \param submatrix_height submatrix height
1793 // \param sink in/out sink of the DP solution
1794 // \param output backtracking output handler
1795 //
1796 // \return true if the alignment source has been found, false otherwise
1797 //
1798 template <
1799  uint32 CHECKPOINTS,
1801  typename scoring_type,
1802  typename checkpoint_type,
1803  typename submatrix_type,
1804  typename backtracer_type>
1807  const GotohAligner<TYPE,scoring_type> aligner,
1808  checkpoint_type checkpoints,
1809  const uint32 checkpoint_id,
1810  submatrix_type submatrix,
1811  const uint32 submatrix_width,
1812  const uint32 submatrix_height,
1813  uint8& state,
1814  uint2& sink,
1815  backtracer_type& backtracer)
1816 {
1817  //
1818  // Backtrace from the second checkpoint to the first looking up the flow submatrix.
1819  //
1820  int32 current_row = sink.x;
1821  int32 current_col = sink.y - checkpoint_id*CHECKPOINTS - 1u;
1822 
1823  NVBIO_CUDA_DEBUG_ASSERT( current_row > 0 &&
1824  current_row <= submatrix_height, "sw::alignment_backtrack(): sink (%u,%u) -> local x coordinate %d not in [0,%d[\n", sink.x, sink.y, current_row, submatrix_height );
1825  NVBIO_CUDA_DEBUG_ASSERT( current_col >= 0, "sw::alignment_backtrack(): sink (%u,%u) -> local y coordinate %d not in [0,%u[ (checkpt %u)\n", sink.x, sink.y, current_col, submatrix_width, checkpoint_id );
1826  NVBIO_CUDA_DEBUG_ASSERT( current_col < submatrix_width, "sw::alignment_backtrack(): sink (%u,%u) -> local y coordinate %d not in [0,%u[ (checkpt %u)\n", sink.x, sink.y, current_col, submatrix_width, checkpoint_id );
1827 
1828  while (current_row > 0 &&
1829  current_col >= 0)
1830  {
1831  const uint32 submatrix_cell = (current_row-1u) * CHECKPOINTS + current_col;
1832  const uint8 op = submatrix[ submatrix_cell ];
1833  const uint8 h_op = op & HMASK;
1834 
1835  if (TYPE == LOCAL)
1836  {
1837  if (state == HSTATE && h_op == SINK)
1838  {
1839  sink.x = current_row;
1840  sink.y = current_col + checkpoint_id*CHECKPOINTS + 1u;
1841  return true;
1842  }
1843  }
1844 
1845  if (state == ESTATE)
1846  {
1847  if ((op & INSERTION_EXT) == 0u) state = HSTATE;
1848  --current_col; backtracer.push( INSERTION );
1849  }
1850  else if (state == FSTATE)
1851  {
1852  if ((op & DELETION_EXT) == 0u) state = HSTATE;
1853  --current_row; backtracer.push( DELETION );
1854  }
1855  else
1856  {
1857  if (h_op == INSERTION)
1858  state = ESTATE;
1859  else if (h_op == DELETION)
1860  state = FSTATE; // NOTE: commenting this line reverts backtracking to plain SW behavior
1861  else
1862  {
1863  --current_col;
1864  --current_row;
1865  backtracer.push( SUBSTITUTION );
1866  }
1867  }
1868  }
1869  sink.x = current_row;
1870  sink.y = current_col + checkpoint_id*CHECKPOINTS + 1u;
1871  return current_row ? false : true; // if current_row == 0 we reached the end of the alignment along the text
1872 }
1873 
1874 } // namespace priv
1875 
1876 } // namespace aln
1877 } // namespace nvbio