43 template <u
int32 BAND_LEN,
typename stream_type>
47 typedef typename stream_type::aligner_type aligner_type;
48 typedef typename stream_type::context_type context_type;
49 typedef typename stream_type::strings_type strings_type;
53 if (stream.init_context( work_id, &context ) ==
true)
56 const uint32 len = equal<typename aligner_type::algorithm_tag,TextBlockingTag>() ?
57 stream.text_length( work_id, &context ) :
58 stream.pattern_length( work_id, &context );
62 stream.load_strings( work_id, 0, len, &context, &strings );
65 banded_alignment_score<BAND_LEN>(
75 stream.output( work_id, &context );
78 template <u
int32 BLOCKDIM, u
int32 MINBLOCKS, u
int32 BAND_LEN,
typename stream_type>
81 batched_banded_alignment_score_kernel(
const stream_type
stream)
83 const uint32 tid = blockIdx.x * blockDim.x + threadIdx.x;
84 if (tid >= stream.size())
87 batched_banded_alignment_score<BAND_LEN>(
stream, tid );
97 template <u
int32 BAND_LEN,
typename stream_type>
120 template <u
int32 BAND_LEN,
typename stream_type>
124 #pragma omp parallel for
126 for (
int tid = 0; tid < int( stream.size() ); ++tid)
127 batched_banded_alignment_score<BAND_LEN>( stream, tid );
135 template <u
int32 BLOCKDIM, u
int32 MINBLOCKS, u
int32 BAND_LEN,
typename stream_type>
156 template <u
int32 BLOCKDIM, u
int32 MINBLOCKS, u
int32 BAND_LEN,
typename stream_type>
161 batched_banded_alignment_score_kernel<BLOCKDIM,MINBLOCKS,BAND_LEN> <<<n_blocks,
BLOCKDIM>>>(
stream );
170 template <u
int32 BAND_LEN,
typename stream_type>
184 return align<4>( column_size );
191 return column_storage( max_pattern_len, max_text_len ) * 1024;
198 return column_storage( max_pattern_len, max_text_len ) *
stream_size;
206 stream.max_pattern_length(),
207 stream.max_text_length(),
210 thrust::device_vector<uint8> temp_dvec;
213 temp_size =
nvbio::max( min_temp_size, temp_size );
214 temp_dvec.resize( temp_size );
219 const uint32 max_pattern_len = stream.max_pattern_length();
220 const uint32 max_text_len = stream.max_text_length();
221 const uint32 queue_capacity =
uint32( temp_size / column_storage( max_pattern_len, max_text_len ) );
223 m_work_queue.set_capacity( queue_capacity );
233 m_work_queue.consume( score_stream );
248 template <u
int32 BAND_LEN, u
int32 CHECKPOINTS,
typename stream_type,
typename cell_type>
252 typedef typename stream_type::aligner_type aligner_type;
253 typedef typename stream_type::context_type context_type;
254 typedef typename stream_type::strings_type strings_type;
257 context_type context;
258 if (stream.init_context( work_id, &context ) ==
false)
261 stream.output( work_id, &context );
266 const uint32 len = equal<typename aligner_type::algorithm_tag,PatternBlockingTag>() ?
267 stream.pattern_length( work_id, &context ) :
268 stream.text_length( work_id, &context );
271 strings_type strings;
272 stream.load_strings( work_id, 0, len, &context, &strings );
276 checkpoint_type checkpoint = checkpoint_type( checkpoints + thread_id, stride );
280 submatrix_storage_type submatrix_storage = submatrix_storage_type( submatrices + thread_id, stride );
285 context.alignment = banded_alignment_traceback<BAND_LEN, CHECKPOINTS>(
296 stream.output( work_id, &context );
299 template <u
int32 BLOCKDIM, u
int32 BAND_LEN, u
int32 CHECKPOINTS,
typename stream_type,
typename cell_type>
304 if (tid >= stream.size())
310 template <u
int32 BLOCKDIM, u
int32 BAND_LEN, u
int32 CHECKPOINTS,
typename stream_type,
typename cell_type>
319 for (
uint32 stream_begin = 0; stream_begin <
stream_end; stream_begin += grid_threads)
323 if (work_id < stream_end)
335 template <u
int32 BAND_LEN, u
int32 CHECKPOINTS,
typename stream_type>
347 return align<4>(
uint32( BAND_LEN * ((max_pattern_len + CHECKPOINTS-1) / CHECKPOINTS) *
sizeof(
cell_type) ) );
354 typedef typename stream_type::aligner_type
aligner_type;
357 return ((BAND_LEN * CHECKPOINTS + ELEMENTS_PER_WORD-1) / ELEMENTS_PER_WORD) *
sizeof(
uint32);
364 return checkpoint_storage( max_pattern_len, max_text_len ) +
365 submatrix_storage( max_pattern_len, max_text_len );
383 template <u
int32 BAND_LEN, u
int32 CHECKPOINTS,
typename stream_type>
386 return element_storage( max_pattern_len, max_text_len ) * 1024;
391 template <u
int32 BAND_LEN, u
int32 CHECKPOINTS,
typename stream_type>
394 return element_storage( max_pattern_len, max_text_len ) *
stream_size;
399 template <u
int32 BAND_LEN, u
int32 CHECKPOINTS,
typename stream_type>
403 stream.max_pattern_length(),
404 stream.max_text_length(),
407 thrust::device_vector<uint8> temp_dvec;
410 temp_dvec.resize( min_temp_size );
412 temp_size = min_temp_size;
416 const uint32 max_pattern_len = stream.max_pattern_length();
417 const uint32 max_text_len = stream.max_text_length();
418 const uint32 queue_capacity =
uint32( temp_size / element_storage( max_pattern_len, max_text_len ) );
420 const uint64 checkpoints_size = checkpoint_storage( max_pattern_len, max_text_len );
422 if (queue_capacity >= stream.size())
429 batched_banded_alignment_traceback_kernel<BLOCKDIM,BAND_LEN,CHECKPOINTS> <<<n_blocks,
BLOCKDIM>>>(
445 persistent_banded_batched_alignment_traceback_kernel<BLOCKDIM,BAND_LEN,CHECKPOINTS> <<<n_blocks,
BLOCKDIM>>>(