NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
traceback_inl.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
31 
32 namespace nvbio {
33 namespace bowtie2 {
34 namespace cuda {
35 
36 namespace detail {
37 
40 
43 
46 
52 template <uint32 ALN_IDX, typename AlignerType, typename PipelineType>
53 struct BestTracebackStream : public AlignmentStreamBase<TRACEBACK_STREAM,AlignerType,PipelineType>
54 {
56  typedef typename base_type::context_type context_type;
58 
70  const MateType _mate_type,
71  const uint32 _count,
72  const uint32* _idx,
73  io::Alignment* _best_data,
74  const uint32 _best_stride,
75  const uint32 _band_len,
76  const PipelineType _pipeline,
77  const AlignerType _aligner,
78  const ParamsPOD _params) :
79  base_type( _pipeline, _aligner, _params ), m_mate_type( _mate_type ), m_count(_count), m_idx(_idx), m_best_data(_best_data), m_best_stride( _best_stride ), m_band_len( _band_len ) {}
80 
85 
90 
94  MateType mate_type() const { return m_mate_type; }
95 
99  uint32 size() const { return m_count; }
100 
105  const uint32 i,
106  context_type* context) const
107  {
108  context->idx = m_idx ? m_idx[ i ] : i;
109 
110  const io::Alignment& alignment = m_best_data[ context->idx + ALN_IDX * m_best_stride ];
111  if (alignment.is_aligned() == false)
112  return false;
113 
114  context->min_score = alignment.score();
115 
116  // setup the read info
117  context->mate = alignment.mate();
118  context->read_rc = alignment.is_rc();
119  context->read_id = context->idx;
120  context->read_range = base_type::m_pipeline.get_reads( context->mate ).get_range( context->read_id );
121 
122  const uint32 read_len = context->read_range.y - context->read_range.x;
123 
124  // setup the genome range
125  const bool paired_opposite_alignment = (m_mate_type == OppositeMate && alignment.is_concordant());
126  const uint32 g_pos = paired_opposite_alignment ?
127  alignment.alignment() :
128  alignment.alignment() > m_band_len/2 ? alignment.alignment() - m_band_len/2 : 0u;
129  const uint32 g_len = paired_opposite_alignment ?
130  alignment.sink() :
131  m_band_len + read_len;
132 
133  context->genome_begin = g_pos;
134  context->genome_end = nvbio::min( context->genome_begin + g_len, base_type::m_pipeline.genome_length );
135  return true;
136  }
137 
141  void output(
142  const uint32 i,
143  const context_type* context)
144  {
145  const uint32 cigar_len = context->backtracer.size;
146  if (cigar_len == 0)
147  return;
148 
149  // alloc the output cigar
150  io::Cigar* cigar = base_type::m_pipeline.cigar.alloc( context->read_id, cigar_len );
151  NVBIO_CUDA_ASSERT_IF( base_type::m_params.debug.asserts, cigar != NULL, "%s backtrack(): unable to allocate CIGAR!\n read[%u], mate[%u], cigar: %u!\n", mate_string( m_mate_type ), context->read_id, context->mate, cigar_len );
152  if (cigar)
153  {
154  // copy the local cigar to the output one
155  for (uint32 i = 0; i < cigar_len; ++i)
156  cigar[i] = context->cigar[i];
157  }
158 
159  // write the cigar coords
160  base_type::m_pipeline.cigar_coords[ context->read_id ] = make_uint2( context->alignment.source.x + (context->alignment.source.y << 16), context->backtracer.size );
161  NVBIO_CUDA_DEBUG_PRINT_IF( base_type::m_params.debug.show_traceback( context->read_id ), "BestTracebackStream<%u>:\n mate[%u], cigar-coords[%u:%u,%u]\n", ALN_IDX, context->mate, context->alignment.source.x, context->alignment.source.y, context->backtracer.size );
162 
163  NVBIO_CUDA_DEBUG_CHECK_IF( base_type::m_params.debug.show_traceback( context->read_id ), context->alignment.sink.x != uint32(-1) && context->alignment.sink.y != uint32(-1), "\nerror:\n %s backtrack(): failed to re-align!\n read[%u], rc[%u], mate[%u] (expected score: %d)\n", mate_string( m_mate_type ), context->read_id, context->read_rc, context->mate, context->min_score );
164  NVBIO_CUDA_DEBUG_CHECK_IF( base_type::m_params.debug.show_traceback( context->read_id ), context->alignment.score == context->min_score, "\nerror:\n %s backtrack(): score %d different from previously calculated value %d!\n read[%u], rc[%u], mate[%u]\n", mate_string( m_mate_type ), context->alignment.score, context->min_score, context->read_id, context->read_rc, context->mate );
165  NVBIO_CUDA_ASSERT_IF( base_type::m_params.debug.asserts, context->alignment.sink.x != uint32(-1) && context->alignment.sink.y != uint32(-1), "%s backtrack(): failed to re-align!\n read[%u], rc[%u], mate[%u] (expected score: %d)\n", mate_string( m_mate_type ), context->read_id, context->read_rc, context->mate, context->min_score );
166  NVBIO_CUDA_ASSERT_IF( base_type::m_params.debug.asserts, context->alignment.score == context->min_score, "%s backtrack(): score %d different from previously calculated value %d!\n read[%u], rc[%u], mate[%u]\n", mate_string( m_mate_type ), context->alignment.score, context->min_score, context->read_id, context->read_rc, context->mate );
167 
169  read_cigar_length( cigar, context->backtracer.size ) == context->read_range.y - context->read_range.x,
170  "%s backtrack(): CIGAR length != read length\n read[%u]\n cigar len: %u\n read len: %u\n source: (%u, %u)\n sink: (%u, %u)\n", mate_string( m_mate_type ),
171  context->read_id, read_cigar_length( cigar, context->backtracer.size ), context->read_range.y - context->read_range.x, context->alignment.source.x, context->alignment.source.y, context->alignment.sink.x, context->alignment.sink.y );
172  }
173 
177  void finish(
178  const uint32 i,
179  const context_type* context,
180  const uint32 ed,
181  const int32 score)
182  {
183  // rewrite the alignment
184  io::Alignment& aln = m_best_data[ context->read_id + ALN_IDX * m_best_stride ];
185  aln.m_align = context->genome_begin;
186  aln.m_ed = ed;
187  aln.m_score_sgn = score < 0 ? 1 : 0;
188  aln.m_score = score < 0 ? -score : score;
189 
190  NVBIO_CUDA_DEBUG_PRINT_IF( base_type::m_params.debug.show_traceback( context->read_id ), "BestTracebackStream<%u>:\n mate[%u], ed[%u], score[%d], pos[%u], rc[%u]\n", ALN_IDX, aln.mate(), aln.ed(), aln.score(), aln.alignment(), uint32( aln.is_rc() ));
191  }
192 
195  const uint32* m_idx;
199 };
200 
204 template <uint32 ALN_IDX, typename aligner_type, typename pipeline_type>
206  const MateType mate_type,
207  const uint32 count,
208  const uint32* idx,
209  io::Alignment* best_data,
210  const uint32 best_stride,
211  const uint32 band_len,
212  const pipeline_type& pipeline,
213  const aligner_type aligner,
214  const ParamsPOD params)
215 {
216  const uint32 static_band_len =
217  (band_len < 4) ? 3u :
218  (band_len < 8) ? 7u :
219  (band_len < 16) ? 15u :
220  31u;
221 
223 
224  stream_type stream(
225  mate_type,
226  count,
227  idx,
228  best_data,
229  best_stride,
230  static_band_len,
231  pipeline,
232  aligner,
233  params );
234 
235  NVBIO_VAR_UNUSED const uint32 CHECKPOINTS = BANDED_DP_CHECKPOINTS;
236 
237  if (band_len < 4)
238  {
240 
241  batch.enact( stream, pipeline.dp_buffer_size, pipeline.dp_buffer );
242  }
243  else if (band_len < 8)
244  {
246 
247  batch.enact( stream, pipeline.dp_buffer_size, pipeline.dp_buffer );
248  }
249  else if (band_len < 16)
250  {
252 
253  batch.enact( stream, pipeline.dp_buffer_size, pipeline.dp_buffer );
254  }
255  else
256  {
258 
259  batch.enact( stream, pipeline.dp_buffer_size, pipeline.dp_buffer );
260  }
261 }
262 
266 template <uint32 ALN_IDX, typename aligner_type, typename pipeline_type>
268  const MateType mate_type,
269  const uint32 count,
270  const uint32* idx,
271  io::Alignment* best_data,
272  const uint32 best_stride,
273  const pipeline_type& pipeline,
274  const aligner_type aligner,
275  const ParamsPOD params)
276 {
278 
279  stream_type stream(
280  mate_type,
281  count,
282  idx,
283  best_data,
284  best_stride,
285  0u, // band-len
286  pipeline,
287  aligner,
288  params );
289 
290  typedef aln::DeviceThreadBlockScheduler<128,9> scheduler_type;
291 
293 
294  batch.enact( stream, pipeline.dp_buffer_size, pipeline.dp_buffer );
295 }
296 
302 template <typename AlignerType, typename PipelineType>
303 struct AllTracebackStream : public AlignmentStreamBase<TRACEBACK_STREAM,AlignerType,PipelineType>
304 {
306  typedef typename base_type::context_type context_type;
308 
320  const uint32 _count,
321  const uint32* _idx,
322  const uint32 _buffer_offset,
323  const uint32 _buffer_size,
324  io::Alignment* _alignments,
325  const uint32 _band_len,
326  const PipelineType _pipeline,
327  const AlignerType _aligner,
328  const ParamsPOD _params) :
329  base_type( _pipeline, _aligner, _params ),
331  m_count(_count),
332  m_idx(_idx),
333  m_buffer_offset(_buffer_offset),
334  m_buffer_size(_buffer_size),
335  m_alignments(_alignments),
336  m_band_len( _band_len ) {}
337 
342 
347 
351  MateType mate_type() const { return m_mate_type; }
352 
356  uint32 size() const { return m_count; }
357 
362  const uint32 i,
363  context_type* context) const
364  {
365  context->idx = i;
366 
367  const uint32 buffer_idx = (m_idx[ i ] + m_buffer_offset) % m_buffer_size;
368  const io::Alignment& alignment = base_type::m_pipeline.buffer_alignments[ buffer_idx ];
369 
370  // setup the read info
371  context->mate = alignment.mate();
372  context->read_rc = alignment.is_rc();
373  context->read_id = base_type::m_pipeline.output_read_info[ i ];
374  context->read_range = base_type::m_pipeline.get_reads( context->mate ).get_range( context->read_id );
375 
376  const uint32 read_len = context->read_range.y - context->read_range.x;
377 
378  // setup the genome range
379  const uint32 g_pos = alignment.alignment() > m_band_len/2 ? alignment.alignment() - m_band_len/2 : 0u;
380  const uint32 g_len = m_band_len + read_len;
381 
382  context->genome_begin = g_pos;
383  context->genome_end = nvbio::min( context->genome_begin + g_len, base_type::m_pipeline.genome_length );
384  return true;
385  }
386 
390  void output(
391  const uint32 i,
392  const context_type* context)
393  {
394  const uint32 cigar_len = context->backtracer.size;
395  if (cigar_len == 0)
396  return;
397 
398  // alloc the output cigar
399  io::Cigar* cigar = base_type::m_pipeline.cigar.alloc( context->idx, cigar_len );
400  NVBIO_CUDA_ASSERT_IF( base_type::m_params.debug.asserts, cigar != NULL, "%s backtrack(): unable to allocate CIGAR!\n aln[%u], read[%u], mate[%u], cigar: %u!\n", mate_string( m_mate_type ), context->idx, context->read_id, context->mate, cigar_len );
401  if (cigar)
402  {
403  // copy the local cigar to the output one
404  for (uint32 i = 0; i < cigar_len; ++i)
405  cigar[i] = context->cigar[i];
406  }
407 
408  // write the cigar coords
409  base_type::m_pipeline.cigar_coords[ context->idx ] = make_uint2( context->alignment.source.x + (context->alignment.source.y << 16), context->backtracer.size );
410  NVBIO_CUDA_DEBUG_PRINT_IF( base_type::m_params.debug.show_traceback( context->read_id ), "AllTracebackStream:\n mate[%u], cigar-coords[%u:%u,%u]\n", context->mate, context->alignment.source.x, context->alignment.source.y, context->backtracer.size );
411 
412  NVBIO_CUDA_DEBUG_CHECK_IF( base_type::m_params.debug.show_traceback( context->read_id ), context->alignment.sink.x != uint32(-1) && context->alignment.sink.y != uint32(-1), "\nerror:\n backtrack(): failed to re-align!\n read[%u], rc[%u], mate[%u] (expected score: %d)\n", context->read_id, context->read_rc, context->mate, context->min_score );
413  NVBIO_CUDA_DEBUG_CHECK_IF( base_type::m_params.debug.show_traceback( context->read_id ), context->alignment.score == context->min_score, "\nerror:\n backtrack(): score %d different from previously calculated value %d!\n read[%u], rc[%u], mate[%u]\n", context->alignment.score, context->min_score, context->read_id, context->read_rc, context->mate );
414  NVBIO_CUDA_ASSERT_IF( base_type::m_params.debug.asserts, context->alignment.sink.x != uint32(-1) && context->alignment.sink.y != uint32(-1), "backtrack(): failed to re-align!\n read[%u], rc[%u], mate[%u] (expected score: %d)\n", context->read_id, context->read_rc, context->mate, context->min_score );
415  NVBIO_CUDA_ASSERT_IF( base_type::m_params.debug.asserts, context->alignment.score == context->min_score, "backtrack(): score %d different from previously calculated value %d!\n read[%u], rc[%u], mate[%u]\n", context->alignment.score, context->min_score, context->read_id, context->read_rc, context->mate );
416 
418  read_cigar_length( cigar, context->backtracer.size ) == context->read_range.y - context->read_range.x,
419  "backtrack(): CIGAR length != read length\n read[%u]\n cigar len: %u\n read len: %u\n source: (%u, %u)\n sink: (%u, %u)\n",
420  context->read_id, read_cigar_length( cigar, context->backtracer.size ), context->read_range.y - context->read_range.x, context->alignment.source.x, context->alignment.source.y, context->alignment.sink.x, context->alignment.sink.y );
421  }
422 
426  void finish(
427  const uint32 i,
428  const context_type* context,
429  const uint32 ed,
430  const int32 score)
431  {
432  const uint32 buffer_idx = (m_idx[ i ] + m_buffer_offset) % m_buffer_size;
433  const io::Alignment& in_alignment = base_type::m_pipeline.buffer_alignments[ buffer_idx ];
434  io::Alignment& out_alignment = m_alignments[ context->idx ];
435 
436  // rewrite the alignment
437  out_alignment = io::Alignment(
438  context->genome_begin,
439  ed,
440  score,
441  in_alignment.is_rc(),
442  in_alignment.is_paired(),
443  in_alignment.is_discordant() );
444 
445  NVBIO_CUDA_DEBUG_PRINT_IF( base_type::m_params.debug.show_traceback( context->read_id ), "finish-alignment:\n mate[%u], ed[%u], score[%d], pos[%u], rc[%u]\n", context->mate, out_alignment.ed(), out_alignment.score(), out_alignment.alignment(), uint32( out_alignment.is_rc() ));
446  }
447 
450  const uint32* m_idx;
455 };
456 
460 template <typename aligner_type, typename pipeline_type>
462  const uint32 count,
463  const uint32* idx,
464  const uint32 buffer_offset,
465  const uint32 buffer_size,
466  io::Alignment* alignments,
467  const uint32 band_len,
468  const pipeline_type& pipeline,
469  const aligner_type aligner,
470  const ParamsPOD params)
471 {
472  const uint32 static_band_len =
473  (band_len < 4) ? 3u :
474  (band_len < 8) ? 7u :
475  (band_len < 16) ? 15u :
476  31u;
477 
479 
480  stream_type stream(
481  count,
482  idx,
483  buffer_offset,
484  buffer_size,
485  alignments,
486  static_band_len,
487  pipeline,
488  aligner,
489  params );
490 
491  NVBIO_VAR_UNUSED const uint32 CHECKPOINTS = BANDED_DP_CHECKPOINTS;
492 
493  if (band_len < 4)
494  {
496 
497  batch.enact( stream, pipeline.dp_buffer_size, pipeline.dp_buffer );
498  }
499  else if (band_len < 8)
500  {
502 
503  batch.enact( stream, pipeline.dp_buffer_size, pipeline.dp_buffer );
504  }
505  else if (band_len < 16)
506  {
508 
509  batch.enact( stream, pipeline.dp_buffer_size, pipeline.dp_buffer );
510  }
511  else
512  {
514 
515  batch.enact( stream, pipeline.dp_buffer_size, pipeline.dp_buffer );
516  }
517 }
518 
519 
523 template <typename stream_type, typename scheme_type, typename pipeline_type> __global__
525  stream_type stream,
526  pipeline_type pipeline,
527  const scheme_type scoring_scheme,
528  const ParamsPOD params)
529 {
530  const uint32 work_id = threadIdx.x + BLOCKDIM*blockIdx.x;
531  if (work_id >= stream.size()) return;
532 
533  typedef typename stream_type::aligner_type aligner_type;
534  typedef typename stream_type::context_type context_type;
535  typedef typename stream_type::strings_type strings_type;
536 
537  // load the alignment context
538  context_type context;
539  if (stream.init_context( work_id, &context ) == false)
540  return;
541 
542  // compute the end of the current DP matrix window
543  const uint32 read_len = stream.pattern_length( work_id, &context );
544 
545  // load the strings to be aligned
546  strings_type strings;
547  stream.load_strings( work_id, 0, read_len, &context, &strings );
548 
549  const uint2 cigar_coord = pipeline.cigar_coords[ context.idx ];
550  const uint32 cigar_offset = cigar_coord.x & 0xFFFF;
551  const uint32 cigar_length = cigar_coord.y;
552 
553  //
554  // compute actual score: this is needed because we might be using plain edit distance in
555  // the first stage rather than doing full SW. We also take the chance to compute the MD
556  // string here.
557  //
558  const io::Cigar* cigar_vector = pipeline.cigar[ context.idx ];
559 
560  // bail-out if no CIGAR's available!
561  if (cigar_vector == NULL)
562  return;
563 
564  const uint32 SUB_MASK = 1u << io::Cigar::SUBSTITUTION;
565  const uint32 DEL_MASK = 1u << io::Cigar::DELETION;
566  const uint32 INS_MASK = 1u << io::Cigar::INSERTION;
567  const uint32 CLP_MASK = 1u << io::Cigar::SOFT_CLIPPING;
568 
569  NVBIO_CUDA_DEBUG_PRINT_IF( params.debug.show_traceback( context.read_id ), "\nfinish_alignment(%s):\n cigar slot: %u\n", stream.mate_type() == OppositeMate ? "opposite" : "anchor", pipeline.cigar.slot( context.read_id ) );
570 
571  // alloc some local memory for the MDS
572  const uint32 MDS_LMEM = 2048;
573  uint8 mds_local[MDS_LMEM];
574 
575  // leave a blank space in the MDS vector to store its length later on
576  uint32 mds_len = 2;
577  uint8 mds_op = io::MDS_INVALID;
578 
579  uint32 ed = 0;
580  int32 score = 0;
581 
582  const uint32 g_len = strings.text.length();
583 
584  for (uint32 i = 0, j = 0/*read_pos*/, k = cigar_offset; i < cigar_length; ++i)
585  {
586  const uint32 l = cigar_vector[ cigar_length - i - 1u ].m_len;
587  const uint32 t = cigar_vector[ cigar_length - i - 1u ].m_type;
588 
589  NVBIO_CUDA_ASSERT_IF( params.debug.asserts, (t == io::Cigar::SUBSTITUTION) || (t == io::Cigar::DELETION) || (t == io::Cigar::INSERTION) || (t == io::Cigar::SOFT_CLIPPING), "finish_alignment(%s): unknown op!\n read[%u], mate[%u], op[%u]: %u\n", stream.mate_type() == OppositeMate ? "opposite" : "anchor", context.read_id, context.mate, i, t );
590 
591  const uint8 t_mask = 1u << t;
592 
593  // handle deletions & insertions
594  if (t_mask & (INS_MASK | DEL_MASK | CLP_MASK))
595  //if (t_mask & DEL_MASK) // track deletions only
596  {
597  NVBIO_CUDA_ASSERT_IF( params.debug.asserts, mds_len+2 < MDS_LMEM, "finish_alignment(%s): exceeded MDS length!\n read[%u], mate[%u], mds: %u!\n", stream.mate_type() == OppositeMate ? "opposite" : "anchor", context.read_id, context.mate, mds_len+2 );
598  mds_op = t_mask & DEL_MASK ? io::MDS_DELETION : io::MDS_INSERTION;
599  mds_local[ mds_len++ ] = mds_op;
600  mds_local[ mds_len++ ] = l;
601 
602  NVBIO_CUDA_DEBUG_PRINT_IF( params.debug.show_traceback( context.read_id ), "\n%c[%03u] ", t_mask & DEL_MASK ? 'D' : 'I', l);
603  NVBIO_CUDA_ASSERT_IF( params.debug.asserts, l, "finish_alignment(%s): zero-length MDS[%u:%u] (%c)\n", context.read_id, mds_len-1, t_mask & DEL_MASK ? 'D' : 'I', l);
604  }
605 
606  for (uint32 n = 0; n < l; ++n)
607  {
608  // advance j and k
609  j += (t_mask & (SUB_MASK | INS_MASK | CLP_MASK)) ? 1u : 0u;
610  k += (t_mask & (SUB_MASK | DEL_MASK)) ? 1u : 0u;
611  NVBIO_CUDA_ASSERT_IF( params.debug.asserts, (j <= read_len) && (k <= g_len), "finish_alignment(%s): coordinates out-of-bounds!\n read[%u], mate[%u], (%u,%u) > (%u,%u) @ %u\n", stream.mate_type() == OppositeMate ? "opposite" : "anchor", context.read_id, context.mate, j, k, read_len, g_len, context.genome_begin );
612  NVBIO_CUDA_ASSERT_IF( params.debug.asserts, (t_mask & (SUB_MASK | INS_MASK | CLP_MASK)) ? (j > 0) : true, "finish_alignment(%s): accessed read at -1!\n read[%u], mate[%u]\n", stream.mate_type() == OppositeMate ? "opposite" : "anchor", context.read_id, context.mate );
613  NVBIO_CUDA_ASSERT_IF( params.debug.asserts, (t_mask & (SUB_MASK | DEL_MASK)) ? (k > 0) : true, "finish_alignment(%s): accessed genome at -1!\n read[%u], mate[%u]\n", stream.mate_type() == OppositeMate ? "opposite" : "anchor", context.read_id, context.mate );
614 
615  const uint8 readc = j > 0 ? strings.pattern[j-1] : 255u;
616  const uint8 refc = k > 0 ? strings.text[k-1] : 255u;
617 
618  if (t_mask == SUB_MASK)
619  {
620  if (readc == refc)
621  {
622  NVBIO_CUDA_ASSERT_IF( params.debug.asserts, mds_len < MDS_LMEM, "finish_alignment(%s): exceeded MDS length!\n read[%u], mds: %u!\n", stream.mate_type() == OppositeMate ? "opposite" : "anchor", context.read_id, mds_len+2 );
623 
624  // handle the most common case first: a character match
625  if (mds_op == io::MDS_MATCH && (mds_local[ mds_len-1 ] < 255))
626  {
627  // prolong the sequence of previous matches
628  mds_local[ mds_len-1 ]++;
629  NVBIO_CUDA_DEBUG_PRINT_IF( params.debug.show_traceback( context.read_id ), "%c", dna_to_char(readc) );
630  }
631  else
632  {
633  // start a new sequence of matches
634  NVBIO_CUDA_ASSERT_IF( params.debug.asserts, mds_len+2 < MDS_LMEM, "finish_alignment(%s): exceeded MDS length!\n read[%u], mds: %u!\n", stream.mate_type() == OppositeMate ? "opposite" : "anchor", context.read_id, mds_len+2 );
635  mds_local[ mds_len++ ] = mds_op = io::MDS_MATCH;
636  mds_local[ mds_len++ ] = 1;
637  NVBIO_CUDA_DEBUG_PRINT_IF( params.debug.show_traceback( context.read_id ), "\nM[%03u] %c", l, dna_to_char(readc) );
638  }
639  }
640  else
641  {
642  // handle mismatches
643  NVBIO_CUDA_ASSERT_IF( params.debug.asserts, mds_len+2 < MDS_LMEM, "finish_alignment(%s): exceeded MDS length!\n read[%u], mds: %u!\n", stream.mate_type() == OppositeMate ? "opposite" : "anchor", context.read_id, mds_len+2 );
644  mds_local[ mds_len++ ] = mds_op = io::MDS_MISMATCH;
645  mds_local[ mds_len++ ] = readc;
646  NVBIO_CUDA_DEBUG_PRINT_IF( params.debug.show_traceback( context.read_id ), "\nS[%03u] %c", l, dna_to_char(readc) );
647  ++ed;
648  }
649  }
650  else //if (t_mask & DEL_MASK) // track deletions only
651  {
652  // handle all the rare cases together: deletions/insertions
653  NVBIO_CUDA_ASSERT_IF( params.debug.asserts, mds_len+1 < MDS_LMEM, "finish_alignment(%s): exceeded MDS length!\n read[%u], mds: %u!\n", stream.mate_type() == OppositeMate ? "opposite" : "anchor", context.read_id, mds_len+1 );
654  mds_local[ mds_len++ ] = t_mask & DEL_MASK ? refc : readc;
655 
656  if (t_mask != CLP_MASK) // don't count soft-clipping in the edit-distance calculation
657  ++ed;
658 
659  NVBIO_CUDA_DEBUG_PRINT_IF( params.debug.show_traceback( context.read_id ), "%c", dna_to_char( (t_mask & DEL_MASK) ? refc : readc ) );
660  }
661 
662  if (t_mask == SUB_MASK)
663  {
664  // score the substitution
665  const uint8 q = strings.quals[j-1];
666  const int32 delta = scoring_scheme.score( readc, 1u << refc, q );
667  score += delta;
668  NVBIO_CUDA_DEBUG_PRINT_IF( params.debug.show_traceback( context.read_id ) && delta, " S(%d,%d)", delta, q );
669  }
670  }
671 
672  if (t_mask == INS_MASK) { score -= scoring_scheme.cumulative_deletion( l ); NVBIO_CUDA_DEBUG_PRINT_IF( params.debug.show_traceback( context.read_id ), " D(%d)", scoring_scheme.cumulative_deletion( l ) ); }
673  else if (t_mask == DEL_MASK) { score -= scoring_scheme.cumulative_insertion( l ); NVBIO_CUDA_DEBUG_PRINT_IF( params.debug.show_traceback( context.read_id ), " I(%d)", scoring_scheme.cumulative_insertion( l ) ); }
674  }
675 
676  #if 0
677  #if defined(NVBIO_CUDA_DEBUG)
678  if (params.debug.show_traceback( context.read_id ))
679  {
680  char read_str[512];
681  char gen_str[512];
682  char cigar_str[512];
683  char mm_string[512];
684 
685  dna_to_string( strings.pattern, read_len, read_str );
686  dna_to_string( strings.text, g_len, gen_str );
687 
688  for (uint32 i = 0; i < read_len; ++i)
689  mm_string[i] = '0' - int32( scoring_scheme.mismatch( strings.quals[i] ) );
690  mm_string[read_len] = '\0';
691 
692  uint32 j = 0;
693  for (uint32 i = 0; i < cigar_coord.y; ++i)
694  {
695  const uint32 l = cigar_vector[ cigar_coord.y - i - 1u ].m_len;
696  const uint32 t = cigar_vector[ cigar_coord.y - i - 1u ].m_type;
697  for (uint32 n = 0; n < l; ++n)
698  cigar_str[j++] = "MID"[t];
699  }
700  cigar_str[j] = '\0';
701  NVBIO_CUDA_DEBUG_PRINT("\nfinish_alignment:\n score: %d, %d, ed: %u, (genome offset: %u)\n read : %s\n genome: %s\n quals : %s\n cigar : %s\n mds len: %u\n", alignment.score(), score, ed, cigar_offset, read_str, gen_str, mm_string, cigar_str, mds_len);
702  }
703  #endif
704  #endif
705 
706  // store the MDS length in the first two bytes
707  NVBIO_CUDA_ASSERT_IF( params.debug.asserts, mds_len < 65536, "finish_alignment(%s): exceeded representable MDS length!\n aln[%u], read[%u], mate[%u], mds: %u!\n", stream.mate_type() == OppositeMate ? "opposite" : "anchor", context.idx, context.read_id, context.mate, mds_len );
708  mds_local[0] = uint8( mds_len & 0xFF );
709  mds_local[1] = uint8( mds_len >> 8 );
710 
711  // alloc the output mds
712  uint8* mds_vector = pipeline.mds.alloc( context.idx, mds_len );
713  NVBIO_CUDA_ASSERT_IF( params.debug.asserts, mds_vector != NULL, "finish_alignment(%s): unable to allocate MDS!\n aln[%u], read[%u], mate[%u], mds: %u!\n", stream.mate_type() == OppositeMate ? "opposite" : "anchor", context.idx, context.read_id, context.mate, mds_len );
714  if (mds_vector)
715  {
716  // copy the local mds to the output one
717  for (uint32 i = 0; i < mds_len; ++i)
718  mds_vector[i] = mds_local[i];
719  }
720 
721  // output the finished alignment
722  stream.finish( work_id, &context, ed, score );
723 }
724 
728 template <uint32 ALN_IDX, typename scheme_type, typename pipeline_type>
730  const MateType mate_type,
731  const uint32 count,
732  const uint32* idx,
733  io::Alignment* best_data,
734  const uint32 best_stride,
735  const uint32 band_len,
736  const pipeline_type& pipeline,
737  const scheme_type scoring_scheme,
738  const ParamsPOD params)
739 {
740  const uint32 static_band_len =
741  (band_len < 4) ? 3u :
742  (band_len < 8) ? 7u :
743  (band_len < 16) ? 15u :
744  31u;
745 
746  typedef typename pipeline_type::scheme_type::local_aligner_type aligner_type;
747 
749 
750  stream_type stream(
751  mate_type,
752  count,
753  idx,
754  best_data,
755  best_stride,
756  static_band_len,
757  pipeline,
758  pipeline.scoring_scheme.local_aligner(),
759  params );
760 
761  const uint32 n_blocks = (count + BLOCKDIM-1) / BLOCKDIM;
762 
763  finish_alignment_kernel<<<n_blocks, BLOCKDIM>>>(
764  stream,
765  pipeline,
766  scoring_scheme,
767  params );
768 
769  // check for overflows
770  //if (mds.allocated_size() >= mds.arena_size())
771  // log_error( stderr, "finish_alignment(): exceeded MDS pool size\n" );
772 }
773 
777 template <typename scheme_type, typename pipeline_type>
779  const uint32 count,
780  const uint32* idx,
781  const uint32 buffer_offset,
782  const uint32 buffer_size,
783  io::Alignment* alignments,
784  const uint32 band_len,
785  const pipeline_type& pipeline,
786  const scheme_type scoring_scheme,
787  const ParamsPOD params)
788 {
789  const uint32 static_band_len =
790  (band_len < 4) ? 3u :
791  (band_len < 8) ? 7u :
792  (band_len < 16) ? 15u :
793  31u;
794 
795  typedef typename pipeline_type::scheme_type::local_aligner_type aligner_type;
796 
798 
799  stream_type stream(
800  count,
801  idx,
802  buffer_offset,
803  buffer_size,
804  alignments,
805  static_band_len,
806  pipeline,
807  pipeline.scoring_scheme.local_aligner(),
808  params );
809 
810  const uint32 n_blocks = (count + BLOCKDIM-1) / BLOCKDIM;
811 
812  finish_alignment_kernel<<<n_blocks, BLOCKDIM>>>(
813  stream,
814  pipeline,
815  scoring_scheme,
816  params );
817 
818  // check for overflows
819  //if (mds.allocated_size() >= mds.arena_size())
820  // log_error( stderr, "finish_alignment(): exceeded MDS pool size\n" );
821 }
822 
826 
827 } // namespace detail
828 
829 //
830 // execute a batch of banded-alignment traceback calculations
831 //
832 template <uint32 ALN_IDX, typename pipeline_type>
834  const uint32 count,
835  const uint32* idx,
836  io::Alignment* best_data,
837  const uint32 best_stride,
838  const uint32 band_len,
839  const pipeline_type& pipeline,
840  const ParamsPOD params)
841 {
842  if (params.alignment_type == LocalAlignment)
843  {
844  detail::banded_traceback_best<ALN_IDX>(
845  AnchorMate,
846  count,
847  idx,
848  best_data,
849  best_stride,
850  band_len,
851  pipeline,
852  pipeline.scoring_scheme.local_aligner(),
853  params );
854  }
855  else
856  {
857  detail::banded_traceback_best<ALN_IDX>(
858  AnchorMate,
859  count,
860  idx,
861  best_data,
862  best_stride,
863  band_len,
864  pipeline,
865  pipeline.scoring_scheme.end_to_end_aligner(),
866  params );
867  }
868 }
869 
870 //
871 // execute a batch of opposite alignment traceback calculations
872 //
873 template <uint32 ALN_IDX, typename pipeline_type>
875  const uint32 count,
876  const uint32* idx,
877  io::Alignment* best_data,
878  const uint32 best_stride,
879  const pipeline_type& pipeline,
880  const ParamsPOD params)
881 {
882  if (params.alignment_type == LocalAlignment)
883  {
884  detail::traceback_best<ALN_IDX>(
885  OppositeMate,
886  count,
887  idx,
888  best_data,
889  best_stride,
890  pipeline,
891  pipeline.scoring_scheme.local_aligner(),
892  params );
893  }
894  else
895  {
896  detail::traceback_best<ALN_IDX>(
897  OppositeMate,
898  count,
899  idx,
900  best_data,
901  best_stride,
902  pipeline,
903  pipeline.scoring_scheme.end_to_end_aligner(),
904  params );
905  }
906 }
907 
908 //
909 // execute a batch of banded-alignment traceback calculations
910 //
911 template <typename pipeline_type>
913  const uint32 count,
914  const uint32* idx,
915  const uint32 buffer_offset,
916  const uint32 buffer_size,
917  io::Alignment* alignments,
918  const uint32 band_len,
919  const pipeline_type& pipeline,
920  const ParamsPOD params)
921 {
922  if (params.alignment_type == LocalAlignment)
923  {
925  count,
926  idx,
927  buffer_offset,
928  buffer_size,
929  alignments,
930  band_len,
931  pipeline,
932  pipeline.scoring_scheme.local_aligner(),
933  params );
934  }
935  else
936  {
938  count,
939  idx,
940  buffer_offset,
941  buffer_size,
942  alignments,
943  band_len,
944  pipeline,
945  pipeline.scoring_scheme.end_to_end_aligner(),
946  params );
947  }
948 }
949 
950 //
951 // finish a batch of alignment calculations
952 //
953 template <uint32 ALN_IDX, typename scoring_scheme_type, typename pipeline_type>
955  const uint32 count,
956  const uint32* idx,
957  io::Alignment* best_data,
958  const uint32 best_stride,
959  const uint32 band_len,
960  const pipeline_type& pipeline,
961  const scoring_scheme_type scoring_scheme,
962  const ParamsPOD params)
963 {
964  detail::finish_alignment_best<ALN_IDX>(
965  AnchorMate,
966  count,
967  idx,
968  best_data,
969  best_stride,
970  band_len,
971  pipeline,
972  scoring_scheme,
973  params );
974 }
975 
976 //
977 // finish a batch of opposite alignment calculations
978 //
979 template <uint32 ALN_IDX, typename scoring_scheme_type, typename pipeline_type>
981  const uint32 count,
982  const uint32* idx,
983  io::Alignment* best_data,
984  const uint32 best_stride,
985  const uint32 band_len,
986  const pipeline_type& pipeline,
987  const scoring_scheme_type scoring_scheme,
988  const ParamsPOD params)
989 {
990  detail::finish_alignment_best<ALN_IDX>(
991  OppositeMate,
992  count,
993  idx,
994  best_data,
995  best_stride,
996  band_len,
997  pipeline,
998  scoring_scheme,
999  params );
1000 }
1001 
1002 //
1003 // finish a batch of alignment calculations
1004 //
1005 template <typename scoring_scheme_type, typename pipeline_type>
1007  const uint32 count,
1008  const uint32* idx,
1009  const uint32 buffer_offset,
1010  const uint32 buffer_size,
1011  io::Alignment* alignments,
1012  const uint32 band_len,
1013  const pipeline_type& pipeline,
1014  const scoring_scheme_type scoring_scheme,
1015  const ParamsPOD params)
1016 {
1018  count,
1019  idx,
1020  buffer_offset,
1021  buffer_size,
1022  alignments,
1023  band_len,
1024  pipeline,
1025  scoring_scheme,
1026  params );
1027 }
1028 
1029 
1030 } // namespace cuda
1031 } // namespace bowtie2
1032 } // namespace nvbio