48 template <
typename stream_type,
typename column_type>
52 typedef typename stream_type::aligner_type aligner_type;
53 typedef typename stream_type::context_type context_type;
54 typedef typename stream_type::strings_type strings_type;
58 if (stream.init_context( work_id, &context ) ==
false)
61 stream.output( work_id, &context );
66 const uint32 len = equal<typename aligner_type::algorithm_tag,PatternBlockingTag>() ?
67 stream.pattern_length( work_id, &context ) :
68 stream.text_length( work_id, &context );
72 stream.load_strings( work_id, 0, len, &context, &strings );
85 stream.output( work_id, &context );
88 template <u
int32 BLOCKDIM, u
int32 MINBLOCKS, u
int32 COLUMN_SIZE,
typename stream_type,
typename cell_type>
95 if (tid >= stream.size())
104 template <u
int32 BLOCKDIM, u
int32 MINBLOCKS,
typename stream_type,
typename cell_type>
111 if (tid >= stream.size())
121 template <u
int32 BLOCKDIM, u
int32 MINBLOCKS,
typename stream_type,
typename cell_type>
124 persistent_batched_alignment_score_kernel(stream_type stream, cell_type* columns,
const uint32 stride)
140 if (work_id < stream_end)
145 template <u
int32 BLOCKDIM,
typename stream_type,
typename cell_type>
149 typedef typename stream_type::aligner_type aligner_type;
150 typedef typename stream_type::context_type context_type;
151 typedef typename stream_type::strings_type strings_type;
154 context_type context;
155 if (stream.init_context( work_id, &context ) ==
false)
160 stream.output( work_id, &context );
166 const uint32 len = equal<typename aligner_type::algorithm_tag,PatternBlockingTag>() ?
167 stream.pattern_length( work_id, &context ) :
168 stream.text_length( work_id, &context );
171 strings_type strings;
172 stream.load_strings( work_id, 0, len, &context, &strings );
180 const int32 score = warp::alignment_score<BLOCKDIM>(
191 context.sink.report( score, sink );
194 stream.output( work_id, &context );
198 template <u
int32 BLOCKDIM,
typename stream_type,
typename cell_type>
203 if (wid >= stream.size())
209 template <u
int32 BLOCKDIM,
typename stream_type,
typename cell_type>
215 const uint32 stream_end = stream.size();
219 warp_batched_alignment_score<BLOCKDIM>( stream, columns, stride,
work_id, wid );
236 template <
typename stream_type>
248 const uint32 column_size = equal<typename aligner_type::algorithm_tag,PatternBlockingTag>() ?
252 return align<4>( column_size );
270 template <
typename stream_type>
273 return column_storage( max_pattern_len, max_text_len ) * MAX_THREADS;
278 template <
typename stream_type>
281 return column_storage( max_pattern_len, max_text_len ) * MAX_THREADS;
286 template <
typename stream_type>
289 const uint32 column_size = equal<typename aligner_type::algorithm_tag,PatternBlockingTag>() ?
290 uint32( stream.max_pattern_length() ) :
291 uint32( stream.max_text_length() );
294 stream.max_pattern_length(),
295 stream.max_text_length(),
302 #pragma omp parallel for
309 const uint32 thread_id = 0;
317 cell_type* column = columns + thread_id * column_size;
329 template <u
int32 BLOCKDIM, u
int32 MINBLOCKS,
typename stream_type>
339 const uint32 column_size = equal<typename aligner_type::algorithm_tag,PatternBlockingTag>() ?
343 return align<4>( column_size );
361 template <u
int32 BLOCKDIM, u
int32 MINBLOCKS,
typename stream_type>
364 return column_storage( max_pattern_len, max_text_len ) * 1024;
369 template <u
int32 BLOCKDIM, u
int32 MINBLOCKS,
typename stream_type>
372 return align<32>( column_storage( max_pattern_len, max_text_len ) *
stream_size );
377 template <u
int32 BLOCKDIM, u
int32 MINBLOCKS,
typename stream_type>
380 const uint32 column_size = equal<typename aligner_type::algorithm_tag,PatternBlockingTag>() ?
381 uint32( stream.max_text_length() ) :
382 uint32( stream.max_pattern_length() );
385 if (column_size <= 1024)
389 lmem_batched_alignment_score_kernel<BLOCKDIM,MINBLOCKS,1024> <<<n_blocks,
BLOCKDIM>>>(
397 const uint64 min_temp_size = min_temp_storage(
398 stream.max_pattern_length(),
399 stream.max_text_length(),
405 temp_size =
nvbio::max( min_temp_size, temp_size );
406 temp_vec.resize( temp_size );
411 const uint32 queue_capacity =
uint32( temp_size / column_storage( stream.max_pattern_length(), stream.max_text_length() ) );
413 if (queue_capacity >= stream.size())
417 batched_alignment_score_kernel<BLOCKDIM,MINBLOCKS> <<<n_blocks,
BLOCKDIM>>>(
429 persistent_batched_alignment_score_kernel<BLOCKDIM,MINBLOCKS> <<<n_blocks,
BLOCKDIM>>>(
442 template <
typename stream_type>
465 template <
typename stream_type>
468 return max_text_len *
sizeof(
cell_type) * 1024;
473 template <
typename stream_type>
476 return max_text_len *
sizeof(
cell_type) * stream_size;
481 template <
typename stream_type>
485 stream.max_pattern_length(),
486 stream.max_text_length(),
492 temp_size =
nvbio::max( min_temp_size, temp_size );
493 temp_vec.resize( temp_size );
500 const uint32 queue_capacity = align_down<WARP_SIZE>(
uint32( temp_size / (align<WARP_SIZE>( stream.max_text_length() ) *
sizeof(
cell_type)) ) );
503 if (queue_capacity >= stream.size())
505 const uint32 n_warps = stream.size();
506 const uint32 n_blocks = (n_warps + BLOCKWARPS-1) / BLOCKWARPS;
508 warp_batched_alignment_score_kernel<BLOCKDIM> <<<n_blocks,
BLOCKDIM>>>(
511 align<WARP_SIZE>( stream.size() ) );
520 warp_persistent_batched_alignment_score_kernel<BLOCKDIM> <<<n_blocks,
BLOCKDIM>>>(
532 template <
typename stream_type>
544 const uint32 column_size = equal<typename aligner_type::algorithm_tag,PatternBlockingTag>() ?
548 return align<4>( column_size );
555 return column_storage( max_pattern_len, max_text_len ) * 1024;
562 return column_storage( max_pattern_len, max_text_len ) *
stream_size;
570 stream.max_pattern_length(),
571 stream.max_text_length(),
577 temp_size =
nvbio::max( min_temp_size, temp_size );
578 temp_vec.resize( temp_size );
583 const uint32 max_pattern_len = stream.max_pattern_length();
584 const uint32 max_text_len = stream.max_text_length();
585 const uint32 queue_capacity =
uint32( temp_size / column_storage( max_pattern_len, max_text_len ) );
587 m_work_queue.set_capacity( queue_capacity );
597 m_work_queue.consume( score_stream );
612 template <u
int32 CHECKPOINTS,
typename stream_type,
typename cell_type>
616 typedef typename stream_type::aligner_type aligner_type;
617 typedef typename stream_type::context_type context_type;
618 typedef typename stream_type::strings_type strings_type;
621 context_type context;
622 if (stream.init_context( work_id, &context ) ==
false)
625 stream.output( work_id, &context );
630 const uint32 pattern_len = stream.pattern_length( work_id, &context );
633 strings_type strings;
634 stream.load_strings( work_id, 0, pattern_len, &context, &strings );
638 checkpoint_type checkpoint = checkpoint_type( checkpoints + thread_id, stride );
642 submatrix_storage_type submatrix_storage = submatrix_storage_type( submatrices + thread_id, stride );
648 column_type column =
column_type( columns + thread_id, stride );
651 context.alignment = alignment_traceback<CHECKPOINTS>(
663 stream.output( work_id, &context );
666 template <u
int32 BLOCKDIM, u
int32 MINBLOCKS, u
int32 CHECKPOINTS,
typename stream_type,
typename cell_type>
673 if (tid >= stream.size())
679 template <u
int32 BLOCKDIM, u
int32 MINBLOCKS, u
int32 CHECKPOINTS,
typename stream_type,
typename cell_type>
687 const uint32 stream_end = stream.size();
690 for (
uint32 stream_begin = 0; stream_begin <
stream_end; stream_begin += grid_threads)
694 if (work_id < stream_end)
706 template <u
int32 BLOCKDIM, u
int32 MINBLOCKS, u
int32 CHECKPOINTS,
typename stream_type>
716 const uint32 column_size = equal<typename aligner_type::algorithm_tag,PatternBlockingTag>() ?
720 return align<4>( column_size );
727 if (equal<typename aligner_type::algorithm_tag,PatternBlockingTag>())
728 return align<4>(
uint32( max_text_len * ((max_pattern_len + CHECKPOINTS-1) / CHECKPOINTS) *
sizeof(
cell_type) ) );
730 return align<4>(
uint32( max_pattern_len * ((max_text_len + CHECKPOINTS-1) / CHECKPOINTS) *
sizeof(
cell_type) ) );
737 if (equal<typename aligner_type::algorithm_tag,PatternBlockingTag>())
739 typedef typename stream_type::aligner_type
aligner_type;
742 return ((max_text_len * CHECKPOINTS + ELEMENTS_PER_WORD-1) / ELEMENTS_PER_WORD) *
sizeof(
uint32);
746 typedef typename stream_type::aligner_type
aligner_type;
749 return ((max_pattern_len * CHECKPOINTS + ELEMENTS_PER_WORD-1) / ELEMENTS_PER_WORD) *
sizeof(
uint32);
757 return column_storage( max_pattern_len, max_text_len ) +
758 checkpoint_storage( max_pattern_len, max_text_len ) +
759 submatrix_storage( max_pattern_len, max_text_len );
777 template <u
int32 BLOCKDIM, u
int32 MINBLOCKS, u
int32 CHECKPOINTS,
typename stream_type>
780 return element_storage( max_pattern_len, max_text_len ) * 1024;
785 template <u
int32 BLOCKDIM, u
int32 MINBLOCKS, u
int32 CHECKPOINTS,
typename stream_type>
788 return element_storage( max_pattern_len, max_text_len ) *
stream_size;
793 template <u
int32 BLOCKDIM, u
int32 MINBLOCKS, u
int32 CHECKPOINTS,
typename stream_type>
796 const uint64 min_temp_size = min_temp_storage(
797 stream.max_pattern_length(),
798 stream.max_text_length(),
804 temp_size =
nvbio::max( min_temp_size, temp_size );
805 temp_vec.resize( temp_size );
810 const uint32 max_pattern_len = stream.max_pattern_length();
811 const uint32 max_text_len = stream.max_text_length();
812 const uint32 queue_capacity =
uint32( temp_size / element_storage( max_pattern_len, max_text_len ) );
814 const uint64 column_size = column_storage( max_pattern_len, max_text_len );
815 const uint64 checkpoints_size = checkpoint_storage( max_pattern_len, max_text_len );
817 if (queue_capacity >= stream.size())
823 uint32* submatrices = (
uint32*) (temp + (checkpoints_size + column_size) * stream.size());
825 batched_alignment_traceback_kernel<BLOCKDIM,MINBLOCKS,CHECKPOINTS> <<<n_blocks,
BLOCKDIM>>>(
841 uint32* submatrices = (
uint32*) (temp + (checkpoints_size + column_size) * queue_capacity);
843 persistent_batched_alignment_traceback_kernel<BLOCKDIM,MINBLOCKS,CHECKPOINTS> <<<n_blocks,
BLOCKDIM>>>(
858 typename t_aligner_type,
859 typename pattern_set_type,
860 typename qualities_set_type,
861 typename text_set_type,
862 typename sink_iterator>
874 typedef typename std::iterator_traits<sink_iterator>::value_type
sink_type;
898 const pattern_set_type _patterns,
899 const qualities_set_type _quals,
900 const text_set_type _texts,
901 sink_iterator _sinks,
902 const uint32 _max_pattern_length,
903 const uint32 _max_text_length) :
954 const uint32 window_begin,
990 typename aligner_type,
991 typename pattern_set_type,
992 typename text_set_type,
993 typename sink_iterator,
994 typename scheduler_type>
996 const aligner_type aligner,
997 const pattern_set_type patterns,
998 const text_set_type texts,
1000 const scheduler_type scheduler,
1001 const uint32 max_pattern_length,
1002 const uint32 max_text_length)
1021 batch.enact( stream );
1028 typename aligner_type,
1029 typename pattern_set_type,
1030 typename qualities_set_type,
1031 typename text_set_type,
1032 typename sink_iterator,
1033 typename scheduler_type>
1035 const aligner_type aligner,
1036 const pattern_set_type patterns,
1037 const qualities_set_type quals,
1038 const text_set_type texts,
1039 sink_iterator sinks,
1040 const scheduler_type scheduler,
1041 const uint32 max_pattern_length,
1042 const uint32 max_text_length)
1061 batch.enact( stream );
1069 typename aligner_type,
1070 typename pattern_set_type,
1071 typename text_set_type,
1072 typename sink_iterator,
1073 typename scheduler_type>
1075 const aligner_type aligner,
1076 const pattern_set_type patterns,
1077 const text_set_type texts,
1078 sink_iterator sinks,
1079 const scheduler_type scheduler,
1080 const uint32 max_pattern_length,
1081 const uint32 max_text_length)
1100 batch.enact( stream );