NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
sw_inl.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
31 #include <nvbio/alignment/sink.h>
32 #include <nvbio/alignment/utils.h>
34 #include <nvbio/basic/iterator.h>
35 
36 
37 namespace nvbio {
38 namespace aln {
39 
40 namespace priv
41 {
42 
45 
54 template <uint32 BAND_LEN, AlignmentType TYPE, typename algorithm_tag = PatternBlockingTag>
56 {
57 
65  template <typename column_type, typename scoring_type, typename score_type>
67  void init(
68  const uint32 j,
69  const uint32 N,
71  const scoring_type& scoring,
72  const score_type zero)
73  {
74  if (j == 0) // ensure this context can be used for multi-pass scoring
75  {
76  for (uint32 i = 0; i < N; ++i)
77  column[i] = equal<algorithm_tag,PatternBlockingTag>() ?
78  TYPE == GLOBAL ? scoring.deletion() * (i+1) : zero :
79  TYPE != LOCAL ? scoring.insertion() * (i+1) : zero;
80  }
81  }
82 
88  template <typename column_type>
91  const uint32 j,
92  const uint32 N,
93  const column_type column) {}
94 
100  template <typename column_type>
103  const uint32 j,
104  const uint32 M,
105  const uint32 N,
106  const column_type column) {}
107 
116  template <typename score_type>
118  void new_cell(
119  const uint32 i,
120  const uint32 N,
121  const uint32 j,
122  const uint32 M,
123  const score_type score,
124  const DirectionVector dir) {}
125 };
126 
131 template <uint32 BAND_LEN, AlignmentType TYPE, typename algorithm_tag, typename checkpoint_type = PatternBlockingTag>
133 {
134  // constructor
135  //
136  // \param checkpoints input checkpoints array
138  SWCheckpointedScoringContext(checkpoint_type checkpoint) :
139  m_checkpoint( checkpoint ) {}
140 
148  template <typename column_type, typename scoring_type, typename score_type>
150  void init(
151  const uint32 j,
152  const uint32 N,
154  const scoring_type& scoring,
155  const score_type zero)
156  {
157  if (j == 0)
158  {
159  for (uint32 i = 0; i < N; ++i)
160  column[i] = equal<algorithm_tag,PatternBlockingTag>() ?
161  TYPE == GLOBAL ? scoring.deletion() * (i+1) : zero :
162  TYPE != LOCAL ? scoring.insertion() * (i+1) : zero;
163  }
164  else
165  {
166  for (uint32 i = 0; i < N; ++i)
167  column[i] = m_checkpoint[i];
168  }
169  }
170 
176  template <typename column_type>
179  const uint32 j,
180  const uint32 N,
181  const column_type column) {}
182 
188  template <typename column_type>
191  const uint32 j,
192  const uint32 M,
193  const uint32 N,
194  const column_type column)
195  {
196  for (uint32 i = 0; i < N; ++i)
197  {
198  m_checkpoint[i].x = column[i].x;
199  m_checkpoint[i].y = column[i].y;
200  }
201  }
202 
211  template <typename score_type>
213  void new_cell(
214  const uint32 i,
215  const uint32 N,
216  const uint32 j,
217  const uint32 M,
218  const score_type score,
219  const DirectionVector dir) {}
220 
221  checkpoint_type m_checkpoint;
222 };
223 
227 template <uint32 BAND_LEN, AlignmentType TYPE, uint32 CHECKPOINTS, typename checkpoint_type>
229 {
231  SWCheckpointContext(checkpoint_type checkpoints) :
232  m_checkpoints( checkpoints ) {}
233 
241  template <typename column_type, typename scoring_type, typename score_type>
243  void init(
244  const uint32 j,
245  const uint32 N,
247  const scoring_type& scoring,
248  const score_type zero)
249  {
250  for (uint32 i = 0; i < N; ++i)
251  column[i] = TYPE == GLOBAL ? scoring.deletion() * (i+1) : zero;
252  }
253 
259  template <typename column_type>
262  const uint32 j,
263  const uint32 N,
264  const column_type column)
265  {
266  // save checkpoint
267  if ((j & (CHECKPOINTS-1)) == 0u)
268  {
269  const uint32 checkpoint_id = j / CHECKPOINTS;
270 
271  for (uint32 i = 0; i < N; ++i)
272  m_checkpoints[ checkpoint_id*N + i ] = column[i];
273  }
274  }
275 
281  template <typename column_type>
284  const uint32 j,
285  const uint32 M,
286  const uint32 N,
287  const column_type column) {}
288 
297  template <typename score_type>
299  void new_cell(
300  const uint32 i,
301  const uint32 N,
302  const uint32 j,
303  const uint32 M,
304  const score_type score,
305  const DirectionVector dir) {}
306 
307  checkpoint_type m_checkpoints;
308 };
309 
314 template <uint32 BAND_LEN, AlignmentType TYPE, uint32 CHECKPOINTS, typename checkpoint_type, typename submatrix_type>
316 {
319  const checkpoint_type checkpoints,
320  const uint32 checkpoint_id,
321  const submatrix_type submatrix) :
322  m_checkpoints( checkpoints ),
323  m_checkpoint_id( checkpoint_id ),
324  m_submatrix( submatrix ) {}
325 
333  template <typename column_type, typename scoring_type, typename score_type>
335  void init(
336  const uint32 j,
337  const uint32 N,
339  const scoring_type& scoring,
340  const score_type zero)
341  {
342  // restore the checkpoint
343  for (uint32 i = 0; i < N; ++i)
344  column[i] = m_checkpoints[ m_checkpoint_id*N + i ];
345  }
346 
352  template <typename column_type>
355  const uint32 j,
356  const uint32 N,
357  const column_type column) {}
358 
364  template <typename column_type>
367  const uint32 j,
368  const uint32 M,
369  const uint32 N,
370  const column_type column) {}
371 
372  // do something with the newly computed cell
373  //
374  // \param i row index
375  // \param N number of rows (column size)
376  // \param j column index
377  // \param M number of columns (row size)
378  // \param score computed score
379  // \param dir direction flow
380  template <typename score_type>
382  void new_cell(
383  const uint32 i,
384  const uint32 N,
385  const uint32 j,
386  const uint32 M,
387  const score_type score,
388  const DirectionVector dir)
389  {
390  // save the direction vector
391  const uint32 offset = m_checkpoint_id * CHECKPOINTS;
392 
393  if (TYPE == LOCAL)
394  m_submatrix[ i * CHECKPOINTS + (j - offset) ] = score ? dir : SINK;
395  else
396  m_submatrix[ i * CHECKPOINTS + (j - offset) ] = dir;
397  }
398 
399  checkpoint_type m_checkpoints;
401  submatrix_type m_submatrix;
402 };
403 
404 // -------------------------- Basic Smith-Waterman functions ---------------------------- //
405 
406 template <uint32 BAND_LEN, AlignmentType TYPE, typename algorithm_tag, typename symbol_type>
408 
416 template <uint32 BAND_LEN, AlignmentType TYPE, typename symbol_type>
418 {
419  template <
420  bool CHECK_M,
421  typename context_type,
422  typename query_cache,
423  typename score_type,
424  typename temp_iterator,
425  typename sink_type,
426  typename scoring_type>
428  static void update_row(
429  context_type& context,
430  const uint32 block,
431  const uint32 M,
432  const uint32 i,
433  const uint32 N,
434  const symbol_type r_i,
435  const query_cache q_cache,
436  temp_iterator temp,
437  score_type& temp_i,
438  score_type* band,
439  sink_type& sink,
440  score_type& max_score,
441  const score_type G,
442  const score_type I,
443  const score_type zero,
444  const scoring_type scoring)
445  {
446  // set the 0-th coefficient in the band to be equal to the (i-1)-th row of the left column (diagonal term)
447  score_type prev = temp_i;
448 
449  // set the 0-th coefficient in the band to be equal to the i-th row of the left column (left term)
450  band[0] = temp_i = temp[i];
451 
452 #if 0
453  // do top-diagonal cell updates
454  #pragma unroll
455  for (uint32 j = 1; j <= BAND_LEN; ++j)
456  {
457  const symbol_type q_j = q_cache[ j-1 ].x;
458  const uint8 qq_j = q_cache[ j-1 ].y;
459  const score_type S_ij = (r_i == q_j) ? scoring.match(qq_j) : scoring.mismatch( r_i, q_j, qq_j );
460  const score_type diagonal = prev + S_ij;
461  const score_type top = band[j] + G;
462  prev = band[j];
463  band[j] = nvbio::max( top, diagonal );
464  }
465 
466  // do left cell updates
467  #pragma unroll
468  for (uint32 j = 1; j <= BAND_LEN; ++j)
469  {
470  const score_type left = band[j-1] + I;
471  score_type hi = nvbio::max( band[j], left );
472  hi = TYPE == LOCAL ? nvbio::max( hi, zero ) : hi; // clamp to zero
473  band[j] = hi;
474  }
475 #else
476  // update all cells
477  #pragma unroll
478  for (uint32 j = 1; j <= BAND_LEN; ++j)
479  {
480  const symbol_type q_j = q_cache[ j-1 ].x;
481  const uint8 qq_j = q_cache[ j-1 ].y;
482  const score_type S_ij = (r_i == q_j) ? scoring.match(qq_j) : scoring.mismatch( r_i, q_j, qq_j );
483  const score_type diagonal = prev + S_ij;
484  const score_type top = band[j] + G;
485  const score_type left = band[j-1] + I;
486  score_type hi = nvbio::max3( top, left, diagonal );
487  hi = TYPE == LOCAL ? nvbio::max( hi, zero ) : hi; // clamp to zero
488  prev = band[j];
489  band[j] = hi;
490 
491  if ((CHECK_M == false) || (block + j <= M))
492  {
493  context.new_cell(
494  i, N,
495  block + j - 1, M,
496  hi,
497  top > left ?
498  (top > diagonal ? DELETION : SUBSTITUTION) :
499  (left > diagonal ? INSERTION : SUBSTITUTION) );
500  }
501  }
502 #endif
503 
504  // save the last entry of the band
505  temp[i] = band[ BAND_LEN ];
506 
507  max_score = nvbio::max( max_score, band[ BAND_LEN ] );
508 
509  if (TYPE == LOCAL)
510  {
511  if (CHECK_M)
512  {
513  // during local alignment we save the best score across all bands
514  for (uint32 j = 1; j <= BAND_LEN; ++j)
515  {
516  if (block + j <= M)
517  sink.report( band[j], make_uint2( i+1, block+j ) );
518  }
519  }
520  else
521  {
522  // during local alignment we save the best score across all bands
523  for (uint32 j = 1; j <= BAND_LEN; ++j)
524  sink.report( band[j], make_uint2( i+1, block+j ) );
525  }
526  }
527  else if (CHECK_M)
528  {
529  if (TYPE == SEMI_GLOBAL)
530  {
531  // during semi-global alignment we save the best score across the last column H[*][M], at each row
532  save_boundary<BAND_LEN>( block, M, band, i, sink );
533  }
534  }
535  }
536 
609  template <
610  typename context_type,
611  typename string_type,
612  typename qual_type,
613  typename ref_type,
614  typename scoring_type,
615  typename sink_type,
616  typename column_type>
618  static
619  bool run(
620  const scoring_type& scoring,
621  context_type& context,
622  string_type query,
623  qual_type quals,
624  ref_type ref,
625  const int32 min_score,
626  sink_type& sink,
627  const uint32 window_begin,
628  const uint32 window_end,
629  column_type temp)
630  {
631  const uint32 M = query.length();
632  const uint32 N = ref.length();
633 
634  typedef int32 score_type;
635  score_type band[BAND_LEN+1];
636 
637  const score_type G = scoring.deletion();
638  const score_type I = scoring.insertion();
639  const score_type zero = score_type(0);
640 
641  // we are going to process the matrix in stripes, where each partial row within each stripe is processed
642  // in registers: hence, we need to save the right-boundary of each stripe (a full matrix column) in some
643  // temporary storage.
644 
645  // fill row 0 with zeros, this will never be updated
646  uchar2 q_cache[ BAND_LEN ];
647 
648  // initialize the first column
649  context.init( window_begin, N, temp, scoring, zero );
650 
651  const uint32 end_block = (window_end == M) ?
652  nvbio::max( BAND_LEN, BAND_LEN * ((M + BAND_LEN-1) / BAND_LEN) ) :
653  window_end + BAND_LEN;
654 
655  // loop across the short edge of the DP matrix (i.e. the columns)
656  for (uint32 block = window_begin; block + BAND_LEN < end_block; block += BAND_LEN)
657  {
658  // save the previous column
659  context.previous_column( block, N, temp );
660 
661  // initialize the first band (corresponding to the 0-th row of the DP matrix)
662  #pragma unroll
663  for (uint32 j = 0; j <= BAND_LEN; ++j)
664  band[j] = (TYPE != LOCAL) ? I * (block + j) : zero;
665 
666  // load a block of entries from each query
667  #pragma unroll
668  for (uint32 t = 0; t < BAND_LEN; ++t)
669  q_cache[ t ] = make_uchar2( query[ block + t ], quals[ block + t ] );
670 
671  score_type max_score = Field_traits<score_type>::min();
672 
673  score_type temp_i = band[0];
674 
675  // loop across the long edge of the DP matrix (i.e. the rows)
676  for (uint32 i = 0; i < N; ++i)
677  {
678  // load the new character from the reference
679  const uint8 r_i = ref[i];
680 
681  update_row<false>(
682  context,
683  block, M,
684  i, N,
685  r_i,
686  q_cache,
687  temp,
688  temp_i,
689  band,
690  sink,
691  max_score,
692  G,I,
693  zero,
694  scoring );
695  }
696 
697  // we are now (M - block - BAND_LEN) columns from the last one: check whether
698  // we could theoretically reach the minimum score
699  const score_type missing_cols = score_type(M - block - BAND_LEN);
700  if (max_score + missing_cols * scoring.match(255) < score_type( min_score ))
701  return false;
702  }
703 
704  if (window_end == M)
705  {
706  const uint32 block = end_block - BAND_LEN;
707 
708  // save the previous column
709  context.previous_column( block, N, temp );
710 
711  // initialize the first band (corresponding to the 0-th row of the DP matrix)
712  for (uint32 j = 0; j <= BAND_LEN; ++j)
713  band[j] = (TYPE != LOCAL) ? I * (block + j) : zero;
714 
715  // load a block of entries from each query
716  const uint32 block_end = nvbio::min( block + BAND_LEN, M );
717  #pragma unroll
718  for (uint32 t = 0; t < BAND_LEN; ++t)
719  {
720  if (block + t < block_end)
721  q_cache[ t ] = make_uchar2( query[ block + t ], quals[ block + t ] );
722  }
723 
724  score_type max_score = Field_traits<score_type>::min();
725 
726  score_type temp_i = band[0];
727 
728  // loop across the long edge of the DP matrix (i.e. the rows)
729  for (uint32 i = 0; i < N; ++i)
730  {
731  // load the new character from the reference
732  const uint8 r_i = ref[i];
733 
734  update_row<true>(
735  context,
736  block, M,
737  i, N,
738  r_i,
739  q_cache,
740  temp,
741  temp_i,
742  band,
743  sink,
744  max_score,
745  G,I,
746  zero,
747  scoring );
748  }
749  }
750 
751  // save the last column
752  context.last_column( window_end, M, N, temp );
753 
754  if (TYPE == GLOBAL)
755  save_Mth<BAND_LEN>( M, band, N-1, sink );
756 
757  return true;
758  }
759 
834  template <
835  uint32 MAX_REF_LEN,
836  typename context_type,
837  typename string_type,
838  typename qual_type,
839  typename ref_type,
840  typename scoring_type,
841  typename sink_type>
843  static
844  bool run(
845  const scoring_type& scoring,
846  context_type& context,
847  string_type query,
848  qual_type quals,
849  ref_type ref,
850  const int32 min_score,
851  sink_type& sink,
852  const uint32 window_begin,
853  const uint32 window_end)
854  {
855  // instantiated a local memory array
856  int16 temp[MAX_REF_LEN];
857  int16* temp_ptr = temp;
858 
859  return run(
860  scoring,
861  context,
862  query,
863  quals,
864  ref,
865  min_score,
866  sink,
867  window_begin,
868  window_end,
869  temp_ptr );
870  }
871 };
872 
880 template <uint32 BAND_LEN, AlignmentType TYPE, typename symbol_type>
882 {
883  template <
884  bool CHECK_N,
885  typename context_type,
886  typename ref_cache,
887  typename score_type,
888  typename temp_iterator,
889  typename sink_type,
890  typename scoring_type>
892  static void update_row(
893  context_type& context,
894  const uint32 block,
895  const uint32 N,
896  const uint32 i,
897  const uint32 M,
898  const symbol_type q_i,
899  const uint8 qq_i,
900  const score_type m_i,
901  const ref_cache r_cache,
902  temp_iterator temp,
903  score_type& temp_i,
904  score_type* band,
905  sink_type& sink,
906  score_type& max_score,
907  const score_type G,
908  const score_type I,
909  const score_type zero,
910  const scoring_type scoring)
911  {
912  // set the 0-th coefficient in the band to be equal to the (i-1)-th row of the left column (diagonal term)
913  score_type prev = temp_i;
914 
915  // set the 0-th coefficient in the band to be equal to the i-th row of the left column (left term)
916  band[0] = temp_i = temp[i];
917 
918  // update all cells
919  #pragma unroll
920  for (uint32 j = 1; j <= BAND_LEN; ++j)
921  {
922  const symbol_type r_j = r_cache[ j-1 ];
923  const score_type S_ij = (r_j == q_i) ? m_i : scoring.mismatch( r_j, q_i, qq_i );
924  const score_type diagonal = prev + S_ij;
925  const score_type top = band[j] + I;
926  const score_type left = band[j-1] + G;
927  score_type hi = nvbio::max3( top, left, diagonal );
928  hi = TYPE == LOCAL ? nvbio::max( hi, zero ) : hi; // clamp to zero
929  prev = band[j];
930  band[j] = hi;
931 
932  if ((CHECK_N == false) || (block + j <= N))
933  {
934  context.new_cell(
935  i, M,
936  block + j - 1, N,
937  hi,
938  top > left ?
939  (top > diagonal ? INSERTION : SUBSTITUTION) :
940  (left > diagonal ? DELETION : SUBSTITUTION) );
941  }
942  }
943 
944  // save the last entry of the band
945  temp[i] = band[ BAND_LEN ];
946 
947  max_score = nvbio::max( max_score, band[ BAND_LEN ] );
948 
949  if (TYPE == LOCAL)
950  {
951  if (CHECK_N)
952  {
953  // during local alignment we save the best score across all bands
954  for (uint32 j = 1; j <= BAND_LEN; ++j)
955  {
956  if (block + j <= N)
957  sink.report( band[j], make_uint2( block+j, i+1 ) );
958  }
959  }
960  else
961  {
962  // during local alignment we save the best score across all bands
963  for (uint32 j = 1; j <= BAND_LEN; ++j)
964  sink.report( band[j], make_uint2( block+j, i+1 ) );
965  }
966  }
967  }
968 
1041  template <
1042  typename context_type,
1043  typename string_type,
1044  typename qual_type,
1045  typename ref_type,
1046  typename scoring_type,
1047  typename sink_type,
1048  typename column_type>
1050  static
1051  bool run(
1052  const scoring_type& scoring,
1053  context_type& context,
1054  string_type query,
1055  qual_type quals,
1056  ref_type ref,
1057  const int32 min_score,
1058  sink_type& sink,
1059  const uint32 window_begin,
1060  const uint32 window_end,
1061  column_type temp)
1062  {
1063  const uint32 M = query.length();
1064  const uint32 N = ref.length();
1065 
1066  typedef int32 score_type;
1067  score_type band[BAND_LEN+1];
1068 
1069  const score_type G = scoring.deletion();
1070  const score_type I = scoring.insertion();
1071  const score_type zero = score_type(0);
1072 
1073  // we are going to process the matrix in stripes, where each partial row within each stripe is processed
1074  // in registers: hence, we need to save the right-boundary of each stripe (a full matrix column) in some
1075  // temporary storage.
1076 
1077  uint8 r_cache[ BAND_LEN ];
1078 
1079  // initialize the first column
1080  context.init( window_begin, M, temp, scoring, zero );
1081 
1082  const uint32 end_block = (window_end == N) ?
1083  nvbio::max( BAND_LEN, BAND_LEN * ((N + BAND_LEN-1) / BAND_LEN) ) :
1084  window_end + BAND_LEN;
1085 
1086  // loop across the short edge of the DP matrix (i.e. the columns)
1087  for (uint32 block = window_begin; block + BAND_LEN < end_block; block += BAND_LEN)
1088  {
1089  // save the previous column
1090  context.previous_column( block, M, temp );
1091 
1092  // initialize the first band (corresponding to the 0-th row of the DP matrix)
1093  #pragma unroll
1094  for (uint32 j = 0; j <= BAND_LEN; ++j)
1095  band[j] = (TYPE == GLOBAL) ? G * (block + j) : zero;
1096 
1097  // load a block of entries from the reference
1098  #pragma unroll
1099  for (uint32 t = 0; t < BAND_LEN; ++t)
1100  r_cache[ t ] = ref[ block + t ];
1101 
1102  score_type max_score = Field_traits<score_type>::min();
1103 
1104  score_type temp_i = band[0];
1105 
1106  // loop across the long edge of the DP matrix (i.e. the rows)
1107  for (uint32 i = 0; i < M; ++i)
1108  {
1109  // load the new character from the query
1110  const uint8 q_i = query[i];
1111  const uint8 qq_i = quals[i];
1112 
1113  const int32 m_i = scoring.match(qq_i);
1114 
1115  update_row<false>(
1116  context,
1117  block, N,
1118  i, M,
1119  q_i,
1120  qq_i,
1121  m_i,
1122  r_cache,
1123  temp,
1124  temp_i,
1125  band,
1126  sink,
1127  max_score,
1128  G,I,
1129  zero,
1130  scoring );
1131  }
1132  if (TYPE == SEMI_GLOBAL)
1133  {
1134  // during semi-global alignment we save the best score across the last row
1135  for (uint32 j = 1; j <= BAND_LEN; ++j)
1136  sink.report( band[j], make_uint2( block+j, M ) );
1137  }
1138  }
1139 
1140  if (window_end == N)
1141  {
1142  const uint32 block = end_block - BAND_LEN;
1143 
1144  // save the previous column
1145  context.previous_column( block, M, temp );
1146 
1147  // initialize the first band (corresponding to the 0-th row of the DP matrix)
1148  #pragma unroll
1149  for (uint32 j = 0; j <= BAND_LEN; ++j)
1150  band[j] = (TYPE == GLOBAL) ? G * (block + j) : zero;
1151 
1152  // load a block of entries from each query
1153  const uint32 block_end = nvbio::min( block + BAND_LEN, N );
1154  #pragma unroll
1155  for (uint32 t = 0; t < BAND_LEN; ++t)
1156  {
1157  if (block + t < block_end)
1158  r_cache[ t ] = ref[ block + t ];
1159  }
1160 
1161  score_type max_score = Field_traits<score_type>::min();
1162 
1163  score_type temp_i = band[0];
1164 
1165  // loop across the long edge of the DP matrix (i.e. the rows)
1166  for (uint32 i = 0; i < M; ++i)
1167  {
1168  // load the new character from the reference
1169  const uint8 q_i = query[i];
1170  const uint8 qq_i = quals[i];
1171 
1172  const int32 m_i = scoring.match(qq_i);
1173 
1174  update_row<true>(
1175  context,
1176  block, N,
1177  i, M,
1178  q_i,
1179  qq_i,
1180  m_i,
1181  r_cache,
1182  temp,
1183  temp_i,
1184  band,
1185  sink,
1186  max_score,
1187  G,I,
1188  zero,
1189  scoring );
1190  }
1191 
1192  if (TYPE == SEMI_GLOBAL)
1193  {
1194  // during semi-global alignment we save the best score across the last row
1195  for (uint32 j = 1; j <= BAND_LEN; ++j)
1196  {
1197  if (block + j <= N)
1198  sink.report( band[j], make_uint2( block+j, M ) );
1199  }
1200  }
1201  else if (TYPE == GLOBAL)
1202  {
1203  // during global alignment we save the best score at cell [N][M]
1204  for (uint32 j = 1; j <= BAND_LEN; ++j)
1205  {
1206  if (block + j == N)
1207  sink.report( band[j], make_uint2( block+j, M ) );
1208  }
1209  }
1210  }
1211 
1212  // save the last column
1213  context.last_column( window_end, N, M, temp );
1214  return true;
1215  }
1216 
1291  template <
1292  uint32 MAX_PATTERN_LEN,
1293  typename context_type,
1294  typename string_type,
1295  typename qual_type,
1296  typename ref_type,
1297  typename scoring_type,
1298  typename sink_type>
1300  static
1301  bool run(
1302  const scoring_type& scoring,
1303  context_type& context,
1304  string_type query,
1305  qual_type quals,
1306  ref_type ref,
1307  const int32 min_score,
1308  sink_type& sink,
1309  const uint32 window_begin,
1310  const uint32 window_end)
1311  {
1312  // instantiated a local memory array
1313  int16 temp[MAX_PATTERN_LEN];
1314  int16* temp_ptr = temp;
1315 
1316  return run(
1317  scoring,
1318  context,
1319  query,
1320  quals,
1321  ref,
1322  min_score,
1323  sink,
1324  window_begin,
1325  window_end,
1326  temp_ptr );
1327  }
1328 };
1329 
1330 template <AlignmentType TYPE, uint32 DIM, typename symbol_type>
1332 {
1333  static const uint32 BAND_LEN = 16u / DIM;
1334 };
1335 
1336 template <AlignmentType TYPE, uint32 DIM>
1338 {
1339 #if __CUDA_ARCH__ >= 300
1340  static const uint32 BAND_LEN = 8u;
1341 #else
1342  static const uint32 BAND_LEN = 1u;
1343 #endif
1344 };
1345 
1355 template <
1357  typename scoring_type,
1358  typename algorithm_tag,
1359  typename pattern_string,
1360  typename qual_string,
1361  typename text_string,
1362  typename column_type>
1364  SmithWatermanAligner<TYPE,scoring_type,algorithm_tag>,
1365  pattern_string,
1366  qual_string,
1367  text_string,
1368  column_type>
1369 {
1371 
1384  template <typename sink_type>
1386  static bool dispatch(
1387  const aligner_type aligner,
1388  const pattern_string pattern,
1389  const qual_string quals,
1390  const text_string text,
1391  const int32 min_score,
1392  sink_type& sink,
1394  {
1395  typedef typename pattern_string::value_type symbol_type;
1396 
1398 
1400 
1401  const uint32 length = equal<algorithm_tag,PatternBlockingTag>() ? pattern.length() : text.length();
1402 
1403  return sw_alignment_score_dispatch<BAND_LEN,TYPE,algorithm_tag,symbol_type>::run( aligner.scheme, context, pattern, quals, text, min_score, sink, 0, length, column );
1404  }
1405 
1421  template <
1422  typename sink_type,
1423  typename checkpoint_type>
1425  static bool dispatch(
1426  const aligner_type aligner,
1427  const pattern_string pattern,
1428  const qual_string quals,
1429  const text_string text,
1430  const int32 min_score,
1431  const uint32 window_begin,
1432  const uint32 window_end,
1433  sink_type& sink,
1434  checkpoint_type checkpoint,
1436  {
1437  typedef typename pattern_string::value_type symbol_type;
1438 
1440 
1442 
1443  return sw_alignment_score_dispatch<BAND_LEN,TYPE,algorithm_tag,symbol_type>::run( aligner.scheme, context, pattern, quals, text, min_score, sink, window_begin, window_end, column );
1444  }
1445 
1459  template <typename sink_type>
1461  static bool dispatch(
1462  const aligner_type aligner,
1463  const pattern_string pattern,
1464  const qual_string quals,
1465  const text_string text,
1466  const int32 min_score,
1467  const uint32 window_begin,
1468  const uint32 window_end,
1469  sink_type& sink,
1471  {
1472  typedef typename pattern_string::value_type symbol_type;
1473 
1475 
1477 
1478  return sw_alignment_score_dispatch<BAND_LEN,TYPE,algorithm_tag,symbol_type>::run( aligner.scheme, context, pattern, quals, text, min_score, sink, window_begin, window_end, column );
1479  }
1480 };
1481 
1492 template <
1493  uint32 CHECKPOINTS,
1495  typename scoring_type,
1496  typename pattern_string,
1497  typename qual_string,
1498  typename text_string,
1499  typename column_type>
1501  CHECKPOINTS,
1502  SmithWatermanAligner<TYPE,scoring_type>,
1503  pattern_string,
1504  qual_string,
1505  text_string,
1506  column_type>
1507 {
1509 
1529  template <
1530  typename sink_type,
1531  typename checkpoint_type>
1533  static
1535  const aligner_type aligner,
1536  const pattern_string pattern,
1537  const qual_string quals,
1538  const text_string text,
1539  const int32 min_score,
1540  sink_type& sink,
1541  checkpoint_type checkpoints,
1543  {
1544  typedef typename pattern_string::value_type symbol_type;
1545 
1547 
1549 
1550  sw_alignment_score_dispatch<BAND_LEN,TYPE,PatternBlockingTag,symbol_type>::run( aligner.scheme, context, pattern, quals, text, min_score, sink, 0, pattern.length(), column );
1551  }
1552 
1577  template <
1578  typename checkpoint_type,
1579  typename submatrix_type>
1581  static
1583  const aligner_type aligner,
1584  const pattern_string pattern,
1585  const qual_string quals,
1586  const text_string text,
1587  const int32 min_score,
1588  checkpoint_type checkpoints,
1589  const uint32 checkpoint_id,
1590  submatrix_type submatrix,
1592  {
1593  typedef typename pattern_string::value_type symbol_type;
1594 
1596 
1598  context( checkpoints, checkpoint_id, submatrix );
1599 
1600  const uint32 window_begin = checkpoint_id * CHECKPOINTS;
1601  const uint32 window_end = nvbio::min( window_begin + CHECKPOINTS, uint32(pattern.length()) );
1602 
1603  NullSink null_sink;
1604  sw_alignment_score_dispatch<BAND_LEN,TYPE,PatternBlockingTag,symbol_type>::run( aligner.scheme, context, pattern, quals, text, min_score, null_sink, window_begin, window_end, column );
1605 
1606  return window_end - window_begin;
1607  }
1608 };
1609 
1645 template <
1646  uint32 CHECKPOINTS,
1648  typename scoring_type,
1649  typename checkpoint_type,
1650  typename submatrix_type,
1651  typename output_type>
1655  checkpoint_type checkpoints,
1656  const uint32 checkpoint_id,
1657  submatrix_type submatrix,
1658  const uint32 submatrix_width,
1659  const uint32 submatrix_height,
1660  uint8& state,
1661  uint2& sink,
1662  output_type& output)
1663 {
1664  //
1665  // Backtrack from the second checkpoint to the first looking up the flow submatrix.
1666  //
1667  int32 current_row = sink.x;
1668  int32 current_col = sink.y - checkpoint_id*CHECKPOINTS - 1u;
1669 
1670  NVBIO_CUDA_DEBUG_ASSERT( current_row > 0 &&
1671  current_row <= submatrix_height, "sw::alignment_backtrack(): sink (%u,%u) -> local x coordinate %d not in [0,%d[\n", sink.x, sink.y, current_row, submatrix_height );
1672  NVBIO_CUDA_DEBUG_ASSERT( current_col >= 0, "sw::alignment_backtrack(): sink (%u,%u) -> local y coordinate %d not in [0,%u[ (checkpt %u)\n", sink.x, sink.y, current_col, submatrix_width, checkpoint_id );
1673  NVBIO_CUDA_DEBUG_ASSERT( current_col < submatrix_width, "sw::alignment_backtrack(): sink (%u,%u) -> local y coordinate %d not in [0,%u[ (checkpt %u)\n", sink.x, sink.y, current_col, submatrix_width, checkpoint_id );
1674 
1675  while (current_row > 0 &&
1676  current_col >= 0)
1677  {
1678  const uint32 submatrix_cell = (current_row-1u) * CHECKPOINTS + current_col;
1679  const uint8 op = submatrix[ submatrix_cell ];
1680 
1681  if (TYPE == LOCAL)
1682  {
1683  if (op == SINK)
1684  {
1685  sink.x = current_row;
1686  sink.y = current_col + checkpoint_id*CHECKPOINTS + 1u;
1687  return true;
1688  }
1689  }
1690 
1691  if (op != DELETION)
1692  --current_col; // move to the previous column
1693 
1694  if (op != INSERTION)
1695  --current_row; // move to the previous row
1696 
1697  output.push( op );
1698  }
1699  sink.x = current_row;
1700  sink.y = current_col + checkpoint_id*CHECKPOINTS + 1u;
1701  return current_row ? false : true; // if current_row == 0 we reached the end of the alignment along the text
1702 }
1703 
1705 
1706 } // namespace priv
1707 
1708 } // namespace aln
1709 } // namespace nvbio