54 template <u
int32 BAND_LEN, AlignmentType TYPE,
typename algorithm_tag = PatternBlockingTag>
65 template <
typename column_type,
typename scoring_type,
typename score_type>
71 const scoring_type& scoring,
72 const score_type zero)
76 for (
uint32 i = 0; i < N; ++i)
77 column[i] = equal<algorithm_tag,PatternBlockingTag>() ?
78 TYPE ==
GLOBAL ? scoring.deletion() * (i+1) : zero :
79 TYPE !=
LOCAL ? scoring.insertion() * (i+1) : zero;
88 template <
typename column_type>
100 template <
typename column_type>
116 template <
typename score_type>
123 const score_type score,
131 template <u
int32 BAND_LEN, AlignmentType TYPE,
typename algorithm_tag,
typename checkpo
int_type = PatternBlockingTag>
148 template <
typename column_type,
typename scoring_type,
typename score_type>
154 const scoring_type& scoring,
155 const score_type zero)
159 for (
uint32 i = 0; i < N; ++i)
160 column[i] = equal<algorithm_tag,PatternBlockingTag>() ?
161 TYPE ==
GLOBAL ? scoring.deletion() * (i+1) : zero :
162 TYPE !=
LOCAL ? scoring.insertion() * (i+1) : zero;
166 for (
uint32 i = 0; i < N; ++i)
176 template <
typename column_type>
188 template <
typename column_type>
196 for (
uint32 i = 0; i < N; ++i)
211 template <
typename score_type>
218 const score_type score,
227 template <u
int32 BAND_LEN, AlignmentType TYPE, u
int32 CHECKPOINTS,
typename checkpo
int_type>
241 template <
typename column_type,
typename scoring_type,
typename score_type>
247 const scoring_type& scoring,
248 const score_type zero)
250 for (
uint32 i = 0; i < N; ++i)
251 column[i] =
TYPE ==
GLOBAL ? scoring.deletion() * (i+1) : zero;
259 template <
typename column_type>
267 if ((j & (CHECKPOINTS-1)) == 0u)
269 const uint32 checkpoint_id = j / CHECKPOINTS;
271 for (
uint32 i = 0; i < N; ++i)
281 template <
typename column_type>
297 template <
typename score_type>
304 const score_type score,
314 template <u
int32 BAND_LEN, AlignmentType TYPE, u
int32 CHECKPOINTS,
typename checkpo
int_type,
typename submatrix_type>
320 const uint32 checkpoint_id,
321 const submatrix_type submatrix) :
333 template <
typename column_type,
typename scoring_type,
typename score_type>
339 const scoring_type& scoring,
340 const score_type zero)
343 for (
uint32 i = 0; i < N; ++i)
352 template <
typename column_type>
364 template <
typename column_type>
380 template <
typename score_type>
387 const score_type score,
396 m_submatrix[ i * CHECKPOINTS + (j - offset) ] = dir;
406 template <u
int32 BAND_LEN, AlignmentType TYPE,
typename algorithm_tag,
typename symbol_type>
416 template <u
int32 BAND_LEN, AlignmentType TYPE,
typename symbol_type>
421 typename context_type,
422 typename query_cache,
424 typename temp_iterator,
426 typename scoring_type>
429 context_type& context,
434 const symbol_type r_i,
435 const query_cache q_cache,
440 score_type& max_score,
443 const score_type zero,
444 const scoring_type scoring)
447 score_type prev = temp_i;
450 band[0] = temp_i = temp[i];
455 for (
uint32 j = 1; j <= BAND_LEN; ++j)
457 const symbol_type q_j = q_cache[ j-1 ].x;
458 const uint8 qq_j = q_cache[ j-1 ].y;
459 const score_type S_ij = (r_i == q_j) ? scoring.match(qq_j) : scoring.mismatch( r_i, q_j, qq_j );
460 const score_type diagonal = prev + S_ij;
461 const score_type top = band[j] +
G;
468 for (
uint32 j = 1; j <= BAND_LEN; ++j)
470 const score_type left = band[j-1] +
I;
478 for (
uint32 j = 1; j <= BAND_LEN; ++j)
480 const symbol_type q_j = q_cache[ j-1 ].x;
481 const uint8 qq_j = q_cache[ j-1 ].y;
482 const score_type S_ij = (r_i == q_j) ? scoring.match(qq_j) : scoring.mismatch( r_i, q_j, qq_j );
483 const score_type diagonal = prev + S_ij;
484 const score_type top = band[j] +
G;
485 const score_type left = band[j-1] +
I;
486 score_type hi =
nvbio::max3( top, left, diagonal );
491 if ((CHECK_M ==
false) || (block + j <= M))
499 (left > diagonal ?
INSERTION : SUBSTITUTION) );
505 temp[i] = band[ BAND_LEN ];
507 max_score =
nvbio::max( max_score, band[ BAND_LEN ] );
514 for (
uint32 j = 1; j <= BAND_LEN; ++j)
517 sink.report( band[j], make_uint2( i+1, block+j ) );
523 for (
uint32 j = 1; j <= BAND_LEN; ++j)
524 sink.report( band[j], make_uint2( i+1, block+j ) );
532 save_boundary<BAND_LEN>( block, M, band, i, sink );
610 typename context_type,
611 typename string_type,
614 typename scoring_type,
620 const scoring_type& scoring,
621 context_type& context,
625 const int32 min_score,
627 const uint32 window_begin,
631 const uint32 M = query.length();
632 const uint32 N = ref.length();
634 typedef int32 score_type;
635 score_type band[BAND_LEN+1];
637 const score_type
G = scoring.deletion();
638 const score_type
I = scoring.insertion();
639 const score_type zero = score_type(0);
646 uchar2 q_cache[ BAND_LEN ];
649 context.init( window_begin, N, temp, scoring, zero );
651 const uint32 end_block = (window_end == M) ?
652 nvbio::max( BAND_LEN, BAND_LEN * ((M + BAND_LEN-1) / BAND_LEN) ) :
653 window_end + BAND_LEN;
656 for (
uint32 block = window_begin; block + BAND_LEN < end_block; block += BAND_LEN)
659 context.previous_column( block, N, temp );
663 for (
uint32 j = 0; j <= BAND_LEN; ++j)
664 band[j] = (
TYPE !=
LOCAL) ? I * (block + j) : zero;
668 for (
uint32 t = 0; t < BAND_LEN; ++t)
669 q_cache[ t ] = make_uchar2( query[ block + t ], quals[ block + t ] );
673 score_type temp_i = band[0];
676 for (
uint32 i = 0; i < N; ++i)
679 const uint8 r_i = ref[i];
699 const score_type missing_cols = score_type(M - block - BAND_LEN);
700 if (max_score + missing_cols * scoring.match(255) < score_type( min_score ))
706 const uint32 block = end_block - BAND_LEN;
709 context.previous_column( block, N, temp );
712 for (
uint32 j = 0; j <= BAND_LEN; ++j)
713 band[j] = (
TYPE !=
LOCAL) ? I * (block + j) : zero;
718 for (
uint32 t = 0; t < BAND_LEN; ++t)
720 if (block + t < block_end)
721 q_cache[ t ] = make_uchar2( query[ block + t ], quals[ block + t ] );
726 score_type temp_i = band[0];
729 for (
uint32 i = 0; i < N; ++i)
732 const uint8 r_i = ref[i];
752 context.last_column( window_end, M, N, temp );
755 save_Mth<BAND_LEN>( M, band, N-1, sink );
836 typename context_type,
837 typename string_type,
840 typename scoring_type,
845 const scoring_type& scoring,
846 context_type& context,
850 const int32 min_score,
852 const uint32 window_begin,
856 int16 temp[MAX_REF_LEN];
857 int16* temp_ptr = temp;
880 template <u
int32 BAND_LEN, AlignmentType TYPE,
typename symbol_type>
885 typename context_type,
888 typename temp_iterator,
890 typename scoring_type>
893 context_type& context,
898 const symbol_type q_i,
900 const score_type m_i,
901 const ref_cache r_cache,
906 score_type& max_score,
909 const score_type zero,
910 const scoring_type scoring)
913 score_type prev = temp_i;
916 band[0] = temp_i = temp[i];
920 for (
uint32 j = 1; j <= BAND_LEN; ++j)
922 const symbol_type r_j = r_cache[ j-1 ];
923 const score_type S_ij = (r_j == q_i) ? m_i : scoring.mismatch( r_j, q_i, qq_i );
924 const score_type diagonal = prev + S_ij;
925 const score_type top = band[j] +
I;
926 const score_type left = band[j-1] +
G;
927 score_type hi =
nvbio::max3( top, left, diagonal );
932 if ((CHECK_N ==
false) || (block + j <= N))
940 (left > diagonal ?
DELETION : SUBSTITUTION) );
945 temp[i] = band[ BAND_LEN ];
947 max_score =
nvbio::max( max_score, band[ BAND_LEN ] );
954 for (
uint32 j = 1; j <= BAND_LEN; ++j)
957 sink.report( band[j], make_uint2( block+j, i+1 ) );
963 for (
uint32 j = 1; j <= BAND_LEN; ++j)
964 sink.report( band[j], make_uint2( block+j, i+1 ) );
1042 typename context_type,
1043 typename string_type,
1046 typename scoring_type,
1052 const scoring_type& scoring,
1053 context_type& context,
1057 const int32 min_score,
1059 const uint32 window_begin,
1063 const uint32 M = query.length();
1064 const uint32 N = ref.length();
1066 typedef int32 score_type;
1067 score_type band[BAND_LEN+1];
1069 const score_type
G = scoring.deletion();
1070 const score_type
I = scoring.insertion();
1071 const score_type zero = score_type(0);
1077 uint8 r_cache[ BAND_LEN ];
1080 context.init( window_begin, M, temp, scoring, zero );
1082 const uint32 end_block = (window_end == N) ?
1083 nvbio::max( BAND_LEN, BAND_LEN * ((N + BAND_LEN-1) / BAND_LEN) ) :
1084 window_end + BAND_LEN;
1087 for (
uint32 block = window_begin; block + BAND_LEN < end_block; block += BAND_LEN)
1090 context.previous_column( block, M, temp );
1094 for (
uint32 j = 0; j <= BAND_LEN; ++j)
1095 band[j] = (
TYPE ==
GLOBAL) ? G * (block + j) : zero;
1099 for (
uint32 t = 0; t < BAND_LEN; ++t)
1100 r_cache[ t ] = ref[ block + t ];
1104 score_type temp_i = band[0];
1107 for (
uint32 i = 0; i < M; ++i)
1110 const uint8 q_i = query[i];
1111 const uint8 qq_i = quals[i];
1113 const int32 m_i = scoring.match(qq_i);
1135 for (
uint32 j = 1; j <= BAND_LEN; ++j)
1136 sink.report( band[j], make_uint2( block+j, M ) );
1140 if (window_end == N)
1142 const uint32 block = end_block - BAND_LEN;
1145 context.previous_column( block, M, temp );
1149 for (
uint32 j = 0; j <= BAND_LEN; ++j)
1150 band[j] = (
TYPE ==
GLOBAL) ? G * (block + j) : zero;
1155 for (
uint32 t = 0; t < BAND_LEN; ++t)
1157 if (block + t < block_end)
1158 r_cache[ t ] = ref[ block + t ];
1163 score_type temp_i = band[0];
1166 for (
uint32 i = 0; i < M; ++i)
1169 const uint8 q_i = query[i];
1170 const uint8 qq_i = quals[i];
1172 const int32 m_i = scoring.match(qq_i);
1195 for (
uint32 j = 1; j <= BAND_LEN; ++j)
1198 sink.report( band[j], make_uint2( block+j, M ) );
1204 for (
uint32 j = 1; j <= BAND_LEN; ++j)
1207 sink.report( band[j], make_uint2( block+j, M ) );
1213 context.last_column( window_end, N, M, temp );
1293 typename context_type,
1294 typename string_type,
1297 typename scoring_type,
1302 const scoring_type& scoring,
1303 context_type& context,
1307 const int32 min_score,
1309 const uint32 window_begin,
1313 int16 temp[MAX_PATTERN_LEN];
1314 int16* temp_ptr = temp;
1330 template <AlignmentType TYPE, u
int32 DIM,
typename symbol_type>
1336 template <AlignmentType TYPE, u
int32 DIM>
1339 #if __CUDA_ARCH__ >= 300
1357 typename scoring_type,
1358 typename algorithm_tag,
1359 typename pattern_string,
1360 typename qual_string,
1361 typename text_string,
1384 template <
typename sink_type>
1388 const pattern_string pattern,
1389 const qual_string quals,
1390 const text_string
text,
1391 const int32 min_score,
1395 typedef typename pattern_string::value_type symbol_type;
1401 const uint32 length = equal<algorithm_tag,PatternBlockingTag>() ? pattern.length() : text.length();
1403 return sw_alignment_score_dispatch<BAND_LEN,TYPE,algorithm_tag,symbol_type>::run( aligner.
scheme, context, pattern, quals, text, min_score, sink, 0, length, column );
1423 typename checkpoint_type>
1427 const pattern_string pattern,
1428 const qual_string quals,
1429 const text_string
text,
1430 const int32 min_score,
1431 const uint32 window_begin,
1434 checkpoint_type checkpoint,
1437 typedef typename pattern_string::value_type symbol_type;
1443 return sw_alignment_score_dispatch<BAND_LEN,TYPE,algorithm_tag,symbol_type>::run( aligner.
scheme, context, pattern, quals, text, min_score, sink, window_begin, window_end, column );
1459 template <
typename sink_type>
1463 const pattern_string pattern,
1464 const qual_string quals,
1465 const text_string
text,
1466 const int32 min_score,
1467 const uint32 window_begin,
1472 typedef typename pattern_string::value_type symbol_type;
1478 return sw_alignment_score_dispatch<BAND_LEN,TYPE,algorithm_tag,symbol_type>::run( aligner.
scheme, context, pattern, quals, text, min_score, sink, window_begin, window_end, column );
1495 typename scoring_type,
1496 typename pattern_string,
1497 typename qual_string,
1498 typename text_string,
1531 typename checkpoint_type>
1536 const pattern_string pattern,
1537 const qual_string quals,
1538 const text_string
text,
1539 const int32 min_score,
1544 typedef typename pattern_string::value_type symbol_type;
1550 sw_alignment_score_dispatch<BAND_LEN,TYPE,PatternBlockingTag,symbol_type>::run( aligner.
scheme, context, pattern, quals, text, min_score, sink, 0, pattern.length(),
column );
1578 typename checkpoint_type,
1579 typename submatrix_type>
1584 const pattern_string pattern,
1585 const qual_string quals,
1586 const text_string
text,
1587 const int32 min_score,
1589 const uint32 checkpoint_id,
1590 submatrix_type submatrix,
1593 typedef typename pattern_string::value_type symbol_type;
1598 context( checkpoints, checkpoint_id, submatrix );
1600 const uint32 window_begin = checkpoint_id * CHECKPOINTS;
1604 sw_alignment_score_dispatch<BAND_LEN,TYPE,PatternBlockingTag,symbol_type>::run( aligner.
scheme, context, pattern, quals, text, min_score, null_sink, window_begin, window_end, column );
1606 return window_end - window_begin;
1648 typename scoring_type,
1649 typename checkpoint_type,
1650 typename submatrix_type,
1651 typename output_type>
1656 const uint32 checkpoint_id,
1657 submatrix_type submatrix,
1658 const uint32 submatrix_width,
1659 const uint32 submatrix_height,
1662 output_type& output)
1667 int32 current_row = sink.x;
1668 int32 current_col = sink.y - checkpoint_id*CHECKPOINTS - 1u;
1671 current_row <= submatrix_height,
"sw::alignment_backtrack(): sink (%u,%u) -> local x coordinate %d not in [0,%d[\n", sink.x, sink.y, current_row, submatrix_height );
1672 NVBIO_CUDA_DEBUG_ASSERT( current_col >= 0,
"sw::alignment_backtrack(): sink (%u,%u) -> local y coordinate %d not in [0,%u[ (checkpt %u)\n", sink.x, sink.y, current_col, submatrix_width, checkpoint_id );
1673 NVBIO_CUDA_DEBUG_ASSERT( current_col < submatrix_width,
"sw::alignment_backtrack(): sink (%u,%u) -> local y coordinate %d not in [0,%u[ (checkpt %u)\n", sink.x, sink.y, current_col, submatrix_width, checkpoint_id );
1675 while (current_row > 0 &&
1678 const uint32 submatrix_cell = (current_row-1u) * CHECKPOINTS + current_col;
1679 const uint8 op = submatrix[ submatrix_cell ];
1685 sink.x = current_row;
1686 sink.y = current_col + checkpoint_id*CHECKPOINTS + 1u;
1699 sink.x = current_row;
1700 sink.y = current_col + checkpoint_id*CHECKPOINTS + 1u;
1701 return current_row ?
false :
true;