41 #define NVBIO_SW_VECTOR_LOADING
58 template <u
int32 BAND_LEN, AlignmentType TYPE,
typename algorithm_tag>
69 template <
typename column_type,
typename scoring_type,
typename score_type>
75 const scoring_type& scoring,
76 const score_type zero,
77 const score_type infimum)
81 for (
uint32 i = 0; i < N; ++i)
83 column[i].x = equal<algorithm_tag,PatternBlockingTag>() ?
84 TYPE ==
GLOBAL ? scoring.text_gap_open() + scoring.text_gap_extension() * i : zero :
85 TYPE !=
LOCAL ? scoring.text_gap_open() + scoring.text_gap_extension() * i : zero;
86 column[i].y =
TYPE ==
LOCAL ? zero : infimum;
96 template <
typename column_type>
108 template <
typename column_type>
124 template <
typename score_type>
131 const score_type score,
141 template <u
int32 BAND_LEN, AlignmentType TYPE,
typename algorithm_tag,
typename checkpo
int_type>
159 template <
typename column_type,
typename scoring_type,
typename score_type>
165 const scoring_type& scoring,
166 const score_type zero,
167 const score_type infimum)
171 for (
uint32 i = 0; i < N; ++i)
173 column[i].x = equal<algorithm_tag,PatternBlockingTag>() ?
174 TYPE ==
GLOBAL ? scoring.text_gap_open() + scoring.text_gap_extension() * i : zero :
175 TYPE !=
LOCAL ? scoring.text_gap_open() + scoring.text_gap_extension() * i : zero;
176 column[i].y =
TYPE ==
LOCAL ? zero : infimum;
181 for (
uint32 i = 0; i < N; ++i)
194 template <
typename column_type>
206 template <
typename column_type>
214 for (
uint32 i = 0; i < N; ++i)
229 template <
typename score_type>
236 const score_type score,
247 template <u
int32 BAND_LEN, AlignmentType TYPE, u
int32 CHECKPOINTS,
typename checkpo
int_type>
265 template <
typename column_type,
typename scoring_type,
typename score_type>
271 const scoring_type& scoring,
272 const score_type zero,
273 const score_type infimum)
275 for (
uint32 i = 0; i < N; ++i)
277 column[i].x =
TYPE ==
GLOBAL ? scoring.text_gap_open() + scoring.text_gap_extension() * i : zero;
278 column[i].y =
TYPE ==
LOCAL ? zero : infimum;
287 template <
typename column_type>
294 typedef typename std::iterator_traits<column_type>::value_type
vector_type;
298 if ((j & (CHECKPOINTS-1)) == 0u)
300 const uint32 checkpoint_id = j / CHECKPOINTS;
302 for (
uint32 i = 0; i < N; ++i)
303 m_checkpoints[ checkpoint_id*N + i ] = make_vector<value_type>( column[i].x, column[i].y );
312 template <
typename column_type>
328 template <
typename score_type>
335 const score_type score,
347 template <u
int32 BAND_LEN, AlignmentType TYPE, u
int32 CHECKPOINTS,
typename checkpo
int_type,
typename submatrix_type>
358 const uint32 checkpoint_id,
359 const submatrix_type submatrix) :
372 template <
typename column_type,
typename scoring_type,
typename score_type>
378 const scoring_type& scoring,
379 const score_type zero,
380 const score_type infimum)
382 typedef typename std::iterator_traits<column_type>::value_type value_type;
385 for (
uint32 i = 0; i < N; ++i)
398 template <
typename column_type>
410 template <
typename column_type>
426 template <
typename score_type>
433 const score_type score,
442 (
TYPE ==
LOCAL ? (score == 0 ?
SINK : hdir) : hdir) | edir | fdir;
444 m_submatrix[ i * CHECKPOINTS + (j - offset) ] = cdir;
452 template <u
int32 BAND_LEN, AlignmentType TYPE,
typename algorithm_tag,
typename symbol_type>
459 template <u
int32 BAND_LEN, AlignmentType TYPE,
typename symbol_type>
466 typename context_type,
467 typename query_cache,
469 typename temp_iterator,
471 typename scoring_type>
474 context_type& context,
479 const symbol_type r_i,
480 const query_cache q_cache,
486 const score_type min_score,
487 score_type& max_score,
488 const score_type G_o,
489 const score_type G_e,
490 const score_type zero,
491 const scoring_type scoring)
493 typedef typename std::iterator_traits<temp_iterator>::value_type temp_cell_type;
508 score_type H_diag = temp_i;
510 H_band[0] = temp_i = temp[i].x;
511 score_type E = temp[i].y;
514 for (
uint32 j = 1; j <= BAND_LEN; ++j)
517 const score_type ftop = F_band[j] + G_e;
518 const score_type htop = H_band[j] + G_o;
523 const score_type eleft = E + G_e;
524 const score_type hleft = H_band[j-1] + G_o;
528 const int2 info_j = q_cache[ j-1 ];
530 const symbol_type q_j = symbol_type(info_j.x);
531 const score_type qq_j = score_type(info_j.y);
534 const score_type S_ij = scoring.substitution( i, block + j, r_i, q_j, qq_j );
535 const score_type diagonal = H_diag + S_ij;
536 const score_type top = F_band[j];
537 const score_type left = E;
538 score_type hi =
nvbio::max3( left, top, diagonal );
543 if ((CHECK_M ==
false) || (block + j <= M))
551 (left > diagonal ?
INSERTION : SUBSTITUTION),
558 temp[i] = make_vector<temp_scalar_type>( H_band[ BAND_LEN ], E );
565 max_score =
nvbio::max( max_score, H_band[ BAND_LEN ] );
572 for (
uint32 j = 1; j <= BAND_LEN; ++j)
575 sink.report( H_band[j], make_uint2( i+1, block+j ) );
581 for (
uint32 j = 1; j <= BAND_LEN; ++j)
582 sink.report( H_band[j], make_uint2( i+1, block+j ) );
590 save_boundary<BAND_LEN>( block, M, H_band, i, sink );
620 typename context_type,
624 typename scoring_type,
630 const scoring_type& scoring,
631 context_type& context,
635 const int32 min_score,
637 const uint32 window_begin,
649 const uint32 M = query.length();
650 const uint32 N = ref.length();
652 typedef int32 score_type;
653 score_type H_band[BAND_LEN+1];
654 score_type F_band[BAND_LEN+1];
656 typedef typename std::iterator_traits<column_type>::value_type temp_cell_type;
659 const score_type G_o = scoring.pattern_gap_open();
660 const score_type G_e = scoring.pattern_gap_extension();
661 const score_type zero = score_type(0);
664 int2 q_cache[ BAND_LEN ];
668 context.init( window_begin, N, temp, scoring, zero, infimum );
670 const uint32 end_block = (window_end == M) ?
671 nvbio::max( BAND_LEN, BAND_LEN * ((M + BAND_LEN-1) / BAND_LEN) ) :
672 window_end + BAND_LEN;
675 for (
uint32 block = window_begin; block + BAND_LEN < end_block; block += BAND_LEN)
678 context.previous_column( block, N, temp );
682 for (
uint32 t = 0; t < BAND_LEN; ++t)
684 const symbol_type q = query[ block + t ];
685 const uint8 qq = quals[ block + t ];
687 q_cache[ t ] = make_int2( q, qq );
693 for (
uint32 j = 0; j <= BAND_LEN; ++j)
695 H_band[j] = (
TYPE !=
LOCAL) ? (block + j > 0 ? G_o + G_e * (block + j - 1u) : zero) : zero;
701 score_type temp_i = H_band[0];
705 #if defined(NVBIO_SW_VECTOR_LOADING)
716 for (
uint32 i = 0; i < vec_range.x; ++i)
719 const uint8 r_i = ref[i];
738 for (
uint32 i = vec_range.x; i < vec_range.y; i += REF_VECTOR_WIDTH)
740 uint8 r[REF_VECTOR_WIDTH];
745 for (
uint32 j = 0; j < REF_VECTOR_WIDTH; ++j)
748 const uint8 r_i = r[j];
769 for (
uint32 i = vec_range.y; i < N; ++i)
772 const uint8 r_i = ref[i];
801 for (
uint32 i = 0; i < N; ++i)
804 const uint8 r_i = ref[i];
828 const score_type missing_cols = score_type(M - block - BAND_LEN);
829 if (max_score + missing_cols * scoring.match(255) < score_type( min_score ))
836 const uint32 block = end_block - BAND_LEN;
839 context.previous_column( block, N, temp );
844 for (
uint32 t = 0; t < BAND_LEN; ++t)
846 if (block + t < block_end)
848 const symbol_type q = query[ block + t ];
849 const uint8 qq = quals[ block + t ];
851 q_cache[ t ] = make_int2( q, qq );
858 for (
uint32 j = 0; j <= BAND_LEN; ++j)
860 H_band[j] = (
TYPE !=
LOCAL) ? (block + j > 0 ? G_o + G_e * (block + j - 1u) : zero) : zero;
866 score_type temp_i = H_band[0];
869 for (
uint32 i = 0; i < N; ++i)
872 const uint8 r_i = ref[i];
894 context.last_column( window_end, M, N, temp );
897 save_Mth<BAND_LEN>( M, H_band, N-1, sink );
928 typename context_type,
932 typename scoring_type,
937 const scoring_type& scoring,
938 context_type& context,
942 const int32 min_score,
944 const uint32 window_begin,
948 short2 temp[MAX_REF_LEN];
949 short2* temp_ptr = temp;
969 template <u
int32 BAND_LEN, AlignmentType TYPE,
typename symbol_type>
976 typename context_type,
979 typename temp_iterator,
981 typename scoring_type>
984 context_type& context,
989 const symbol_type q_i,
992 const ref_cache r_cache,
998 const score_type min_score,
999 score_type& max_score,
1000 const score_type G_o,
1001 const score_type G_e,
1002 const score_type zero,
1003 const scoring_type scoring)
1005 typedef typename std::iterator_traits<temp_iterator>::value_type temp_cell_type;
1020 score_type H_diag = temp_i;
1022 H_band[0] = temp_i = temp[i].x;
1023 score_type E = temp[i].y;
1026 for (
uint32 j = 1; j <= BAND_LEN; ++j)
1029 const score_type ftop = F_band[j] + G_e;
1030 const score_type htop = H_band[j] + G_o;
1035 const score_type eleft = E + G_e;
1036 const score_type hleft = H_band[j-1] + G_o;
1040 const symbol_type r_j = r_cache[ j-1 ];
1042 const score_type S_ij = scoring.substitution( block + j, i, r_j, q_i, qq_i );
1043 const score_type diagonal = H_diag + S_ij;
1044 const score_type top = F_band[j];
1045 const score_type left = E;
1046 score_type hi =
nvbio::max3( left, top, diagonal );
1051 if ((CHECK_N ==
false) || (block + j <= N))
1059 (left > diagonal ?
DELETION : SUBSTITUTION),
1066 temp[i] = make_vector<temp_scalar_type>( H_band[ BAND_LEN ], E );
1073 max_score =
nvbio::max( max_score, H_band[ BAND_LEN ] );
1080 for (
uint32 j = 1; j <= BAND_LEN; ++j)
1083 sink.report( H_band[j], make_uint2( block+j, i+1 ) );
1089 for (
uint32 j = 1; j <= BAND_LEN; ++j)
1090 sink.report( H_band[j], make_uint2( block+j, i+1 ) );
1120 typename context_type,
1121 typename query_type,
1124 typename scoring_type,
1130 const scoring_type& scoring,
1131 context_type& context,
1135 const int32 min_score,
1137 const uint32 window_begin,
1149 const uint32 M = query.length();
1150 const uint32 N = ref.length();
1152 typedef int32 score_type;
1153 score_type H_band[BAND_LEN+1];
1154 score_type F_band[BAND_LEN+1];
1156 typedef typename std::iterator_traits<column_type>::value_type temp_cell_type;
1159 const score_type G_o = scoring.pattern_gap_open();
1160 const score_type G_e = scoring.pattern_gap_extension();
1161 const score_type zero = score_type(0);
1164 uint8 r_cache[ BAND_LEN ];
1167 context.init( window_begin, M, temp, scoring, zero, infimum );
1169 const uint32 end_block = (window_end == N) ?
1170 nvbio::max( BAND_LEN, BAND_LEN * ((N + BAND_LEN-1) / BAND_LEN) ) :
1171 window_end + BAND_LEN;
1174 for (
uint32 block = window_begin; block + BAND_LEN < end_block; block += BAND_LEN)
1177 context.previous_column( block, M, temp );
1181 for (
uint32 t = 0; t < BAND_LEN; ++t)
1182 r_cache[ t ] = ref[ block + t ];
1186 for (
uint32 j = 0; j <= BAND_LEN; ++j)
1188 H_band[j] = (
TYPE ==
GLOBAL) ? (block + j > 0 ? G_o + G_e * (block + j - 1u) : zero) : zero;
1189 F_band[j] = infimum;
1194 score_type temp_i = H_band[0];
1197 for (
uint32 i = 0; i < M; ++i)
1200 const uint8 q_i = query[i];
1201 const uint8 qq_i = quals[i];
1227 for (
uint32 j = 1; j <= BAND_LEN; ++j)
1228 sink.report( H_band[j], make_uint2( block+j, M ) );
1233 const score_type missing_cols = score_type(N - block - BAND_LEN);
1234 if (max_score + missing_cols * scoring.match(255) < score_type( min_score ))
1239 if (window_end == N)
1241 const uint32 block = end_block - BAND_LEN;
1244 context.previous_column( block, M, temp );
1249 for (
uint32 t = 0; t < BAND_LEN; ++t)
1251 if (block + t < block_end)
1252 r_cache[ t ] = ref[ block + t ];
1257 for (
uint32 j = 0; j <= BAND_LEN; ++j)
1259 H_band[j] = (
TYPE ==
GLOBAL) ? (block + j > 0 ? G_o + G_e * (block + j - 1u) : zero) : zero;
1260 F_band[j] = infimum;
1265 score_type temp_i = H_band[0];
1267 #if defined(NVBIO_SW_VECTOR_LOADING)
1278 for (
uint32 i = 0; i < vec_range.x; ++i)
1281 const uint8 q_i = query[i];
1282 const uint8 qq_i = quals[i];
1302 for (
uint32 i = vec_range.x; i < vec_range.y; i += QUERY_VECTOR_WIDTH)
1304 uint8 q[QUERY_VECTOR_WIDTH];
1305 uint8 qq[QUERY_VECTOR_WIDTH];
1310 for (
uint32 j = 0; j < QUERY_VECTOR_WIDTH; ++j)
1311 qq[j] = quals[i + j];
1313 for (
uint32 j = 0; j < QUERY_VECTOR_WIDTH; ++j)
1316 const uint8 q_i = q[j];
1317 const uint8 qq_i = qq[j];
1338 for (
uint32 i = vec_range.y; i < M; ++i)
1341 const uint8 q_i = query[i];
1342 const uint8 qq_i = quals[i];
1375 for (
uint32 i = 0; i < M; ++i)
1378 const uint8 q_i = query[i];
1379 const uint8 qq_i = quals[i];
1406 for (
uint32 j = 1; j <= BAND_LEN; ++j)
1409 sink.report( H_band[j], make_uint2( block+j, M ) );
1415 for (
uint32 j = 1; j <= BAND_LEN; ++j)
1418 sink.report( H_band[j], make_uint2( block+j, M ) );
1424 context.last_column( window_end, N, M, temp );
1454 typename context_type,
1455 typename string_type,
1458 typename scoring_type,
1463 const scoring_type& scoring,
1464 context_type& context,
1468 const int32 min_score,
1470 const uint32 window_begin,
1474 short2 temp[MAX_PATTERN_LEN];
1475 short2* temp_ptr = temp;
1491 template <AlignmentType TYPE, u
int32 DIM,
typename symbol_type>
1497 template <AlignmentType TYPE, u
int32 DIM>
1500 #if __CUDA_ARCH__ >= 300
1518 typename scoring_type,
1519 typename algorithm_tag,
1520 typename pattern_string,
1521 typename qual_string,
1522 typename text_string,
1546 template <
typename sink_type>
1550 const pattern_string pattern,
1551 const qual_string quals,
1552 const text_string
text,
1553 const int32 min_score,
1557 typedef typename pattern_string::value_type symbol_type;
1563 const uint32 length = equal<algorithm_tag,PatternBlockingTag>() ? pattern.length() : text.length();
1565 return gotoh_alignment_score_dispatch<BAND_LEN,TYPE,algorithm_tag,symbol_type>::run( aligner.
scheme, context, pattern, quals, text, min_score, sink, 0, length, column );
1585 typename checkpoint_type>
1589 const pattern_string pattern,
1590 const qual_string quals,
1591 const text_string
text,
1592 const int32 min_score,
1593 const uint32 window_begin,
1596 checkpoint_type checkpoint,
1599 typedef typename pattern_string::value_type symbol_type;
1605 return gotoh_alignment_score_dispatch<BAND_LEN,TYPE,algorithm_tag,symbol_type>::run( aligner.
scheme, context, pattern, quals, text, min_score, sink, window_begin, window_end, column );
1621 template <
typename sink_type>
1625 const pattern_string pattern,
1626 const qual_string quals,
1627 const text_string
text,
1628 const int32 min_score,
1629 const uint32 window_begin,
1634 typedef typename pattern_string::value_type symbol_type;
1640 return gotoh_alignment_score_dispatch<BAND_LEN,TYPE,algorithm_tag,symbol_type>::run( aligner.
scheme, context, pattern, quals, text, min_score, sink, window_begin, window_end, column );
1658 typename scoring_type,
1659 typename pattern_string,
1660 typename qual_string,
1661 typename text_string,
1684 typename checkpoint_type>
1689 const pattern_string pattern,
1690 const qual_string quals,
1691 const text_string
text,
1692 const int32 min_score,
1697 typedef typename pattern_string::value_type symbol_type;
1703 gotoh_alignment_score_dispatch<BAND_LEN,TYPE,PatternBlockingTag,symbol_type>::run( aligner.
scheme, context, pattern, quals, text, min_score, sink, 0, pattern.length(),
column );
1732 typename checkpoint_type,
1733 typename submatrix_type>
1738 const pattern_string pattern,
1739 const qual_string quals,
1740 const text_string
text,
1741 const int32 min_score,
1743 const uint32 checkpoint_id,
1744 submatrix_type submatrix,
1747 typedef typename pattern_string::value_type symbol_type;
1752 context( checkpoints, checkpoint_id, submatrix );
1754 const uint32 window_begin = checkpoint_id * CHECKPOINTS;
1758 gotoh_alignment_score_dispatch<BAND_LEN,TYPE,PatternBlockingTag,symbol_type>::run( aligner.
scheme, context, pattern, quals, text, min_score, null_sink, window_begin, window_end, column );
1760 return window_end - window_begin;
1801 typename scoring_type,
1802 typename checkpoint_type,
1803 typename submatrix_type,
1804 typename backtracer_type>
1809 const uint32 checkpoint_id,
1810 submatrix_type submatrix,
1811 const uint32 submatrix_width,
1812 const uint32 submatrix_height,
1815 backtracer_type& backtracer)
1820 int32 current_row = sink.x;
1821 int32 current_col = sink.y - checkpoint_id*CHECKPOINTS - 1u;
1824 current_row <= submatrix_height,
"sw::alignment_backtrack(): sink (%u,%u) -> local x coordinate %d not in [0,%d[\n", sink.x, sink.y, current_row, submatrix_height );
1825 NVBIO_CUDA_DEBUG_ASSERT( current_col >= 0,
"sw::alignment_backtrack(): sink (%u,%u) -> local y coordinate %d not in [0,%u[ (checkpt %u)\n", sink.x, sink.y, current_col, submatrix_width, checkpoint_id );
1826 NVBIO_CUDA_DEBUG_ASSERT( current_col < submatrix_width,
"sw::alignment_backtrack(): sink (%u,%u) -> local y coordinate %d not in [0,%u[ (checkpt %u)\n", sink.x, sink.y, current_col, submatrix_width, checkpoint_id );
1828 while (current_row > 0 &&
1831 const uint32 submatrix_cell = (current_row-1u) * CHECKPOINTS + current_col;
1832 const uint8 op = submatrix[ submatrix_cell ];
1839 sink.x = current_row;
1840 sink.y = current_col + checkpoint_id*CHECKPOINTS + 1u;
1848 --current_col; backtracer.push(
INSERTION );
1850 else if (state ==
FSTATE)
1853 --current_row; backtracer.push(
DELETION );
1869 sink.x = current_row;
1870 sink.y = current_col + checkpoint_id*CHECKPOINTS + 1u;
1871 return current_row ?
false :
true;