NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
reduce_inl.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
31 
32 #pragma once
33 
39 #include <nvbio/io/alignments.h>
40 #include <nvbio/io/utils.h>
41 #include <nvbio/basic/exceptions.h>
42 
43 namespace nvbio {
44 namespace bowtie2 {
45 namespace cuda {
46 
47 namespace detail {
48 
51 
54 
57 
73 template <typename ScoringScheme, typename PipelineType, typename ReduceContext> __global__
75  const ReduceContext context,
76  PipelineType pipeline,
77  const ParamsPOD params)
78 {
79  typedef ScoringQueuesDeviceView scoring_queues_type;
80  typedef ReadHitsReference<scoring_queues_type> read_hits_type;
81  typedef typename read_hits_type::reference hit_type;
82 
83  scoring_queues_type& scoring_queues = pipeline.scoring_queues;
84 
85  const uint32 thread_id = threadIdx.x + BLOCKDIM*blockIdx.x;
86  if (thread_id >= scoring_queues.active_read_count()) return;
87 
88  // fetch the active read corresponding to this thread
89  read_hits_type read_hits( scoring_queues, thread_id );
90 
91  const uint32 read_id = read_hits.read_info().read_id;
92 
93  // fetch the best alignments
94  io::BestAlignments best(
95  pipeline.best_alignments[ read_id ],
96  pipeline.best_alignments[ read_id + pipeline.best_stride ] );
97 
98  // setup the read stream
99  const uint2 read_range = pipeline.reads.get_range(read_id);
100  const uint32 read_len = read_range.y - read_range.x;
101 
102  const uint32 count = read_hits.size();
103 
104  for (uint32 i = 0; i < count; ++i)
105  {
106  hit_type hit = read_hits[i];
107  NVBIO_CUDA_DEBUG_ASSERT( hit.read_id == read_id, "reduce hit[%u][%u]: expected id %u, got: %u (slot: %u)\n", thread_id, i, read_id, hit.read_id, scoring_queues.hits_index( thread_id, i ));
108 
109  const packed_seed seed_info = hit.seed;
110  const uint32 read_rc = seed_info.rc;
111  const uint32 top_flag = seed_info.top_flag;
112 
113  const int32 score = hit.score;
114  const uint32 g_pos = hit.loc;
115 
116  // skip locations that we have already visited without paying the extension attempt
117  if ((read_rc == best.m_a1.m_rc && g_pos == best.m_a1.m_align) ||
118  (read_rc == best.m_a2.m_rc && g_pos == best.m_a2.m_align))
119  continue;
120 
121  if (score > best.m_a1.score())
122  {
123  context.best_score( read_id, params );
124 
125  // set the first best score
126  best.m_a2 = best.m_a1;
127  best.m_a1 = io::Alignment( g_pos, 0u, score, read_rc );
128  NVBIO_CUDA_DEBUG_PRINT_IF( params.debug.show_reduce( read_id ), "update best: (parent[%u:%u])\n score[%d], rc[%u], pos[%u]\n", thread_id, i, score, read_rc, g_pos );
129  }
130  else if ((score > best.m_a2.score()) && io::distinct_alignments( best.m_a1.m_align, best.m_a1.m_rc, g_pos, read_rc, read_len/2 ))
131  {
132  context.second_score( read_id, params );
133 
134  // set the second best score
135  best.m_a2 = io::Alignment( g_pos, 0u, score, read_rc );
136  NVBIO_CUDA_DEBUG_PRINT_IF( params.debug.show_reduce( read_id ), "update second: (parent[%u:%u])\n score[%d], rc[%u], pos[%u]\n", thread_id, i, score, read_rc, g_pos );
137  }
138  else if (context.failure( i, read_id, top_flag, params ))
139  {
140  // stop traversal
141  pipeline.hits.erase( read_id );
142 
143  // NOTE: we keep using the score entries rather than breaking here,
144  // as we've already gone through the effort of computing them, and
145  // this reduction is only a minor amount of work.
146  }
147  }
148 
149  // report best alignments
150  pipeline.best_alignments[ read_id ] = best.m_a1;
151  pipeline.best_alignments[ read_id + pipeline.best_stride ] = best.m_a2;
152 }
153 
154 template <typename ReduceContext>
157  const uint32 read_id,
158  io::BestPairedAlignments& best_pairs,
159  const io::PairedAlignments& pair,
160  ReduceContext& context,
161  const ParamsPOD params)
162 {
163  context.best_score( read_id, params );
164 
165  // the old best becomes second-best, and the second-best gets updated
166  best_pairs.m_a2 = best_pairs.m_a1;
167  best_pairs.m_o2 = best_pairs.m_o1;
168  best_pairs.m_a1 = pair.m_a;
169  best_pairs.m_o1 = pair.m_o;
170  NVBIO_CUDA_DEBUG_PRINT_IF( params.debug.show_reduce( read_id ), "update best (anchor[%u]):\n 1. score[%d], rc[%u], pos[%u]\n 2. score[%d], rc[%u], pos[%u,%u]\n", pair.m_a.mate(), pair.m_a.score(), pair.m_a.is_rc(), pair.m_a.alignment(), pair.m_o.score(), pair.m_o.is_rc(), pair.m_o.alignment(), pair.m_o.alignment() + pair.m_o.sink() );
171 }
172 
173 template <typename ReduceContext>
176  const uint32 read_id,
177  io::BestPairedAlignments& best_pairs,
178  const io::PairedAlignments& pair,
179  ReduceContext& context,
180  const ParamsPOD params)
181 {
182  context.best_score( read_id, params );
183 
184  best_pairs.m_a1 = pair.m_a;
185  best_pairs.m_o1 = pair.m_o;
186  NVBIO_CUDA_DEBUG_PRINT_IF( params.debug.show_reduce( read_id ), "update best (anchor[%u]):\n 1. score[%d], rc[%u], pos[%u]\n 2. score[%d], rc[%u], pos[%u,%u]\n", pair.m_a.mate(), pair.m_a.score(), pair.m_a.is_rc(), pair.m_a.alignment(), pair.m_o.score(), pair.m_o.is_rc(), pair.m_o.alignment, pair.m_o.alignment() + pair.m_o.sink() );
187 }
188 
189 template <typename ReduceContext>
192  const uint32 read_id,
193  io::BestPairedAlignments& best_pairs,
194  const io::PairedAlignments& pair,
195  ReduceContext& context,
196  const ParamsPOD params)
197 {
198  context.second_score( read_id, params );
199 
200  best_pairs.m_a2 = pair.m_a;
201  best_pairs.m_o2 = pair.m_o;
202  NVBIO_CUDA_DEBUG_PRINT_IF( params.debug.show_reduce( read_id ), "update second (anchor[%u]):\n 1. score[%d], rc[%u], pos[%u]\n 2. score[%d], rc[%u], pos[%u,%u]\n", pair.m_a.mate(), pair.m_a.score(), pair.m_a.is_rc(), pair.m_a.alignment(), pair.m_o.score(), pair.m_o.is_rc(), pair.m_o.alignment, pair.m_o.alignment() + pair.m_o.sink() );
203 }
204 
205 template <typename ReduceContext>
208  const uint32 read_id,
209  io::BestPairedAlignments& best_pairs,
210  const io::PairedAlignments& pair,
211  const uint32 min_distance,
212  ReduceContext& context,
213  const ParamsPOD params)
214 {
215  const int32 score = pair.score();
216 
217  // sometimes, we might find an alignment which is better than what previously found, despite not being distinct from it...
218  if (distinct_alignments( best_pairs.pair<0>(), pair, min_distance ) == false)
219  {
220  // this alignment is a candidate for replacing the best one
221  //
222 
223  if (score > best_pairs.best_score())
224  replace_best( read_id, best_pairs, pair, context, params );
225 
226  // skip locations that we have already visited, without paying the extension attempt
227  return true;
228  }
229  else if (distinct_alignments( best_pairs.pair<1>(), pair, min_distance ) == false)
230  {
231  // this alignment is a candidate for replacing the second-best one
232  //
233 
234  if (score > best_pairs.best_score()) // is it better than the best?
235  update_best( read_id, best_pairs, pair, context, params );
236  else if (score > best_pairs.second_score()) // is it better than the second-best?
237  update_second( read_id, best_pairs, pair, context, params );
238 
239  // skip locations that we have already visited, without paying the extension attempt
240  return true;
241  }
242  // check if the best alignment was unpaired or had a worse score
243  else if (best_pairs.is_paired() == false || score > best_pairs.best_score())
244  {
245  update_best( read_id, best_pairs, pair, context, params );
246  return true;
247  } // check if the second-best alignment was unpaired or had a worse score
248  else if (best_pairs.has_second_paired() == false || score > best_pairs.second_score())
249  {
250  update_second( read_id, best_pairs, pair, context, params );
251  return true;
252  }
253  return false;
254 }
255 
256 template <typename ReduceContext>
259  const uint32 read_id,
260  io::Alignment& a1,
261  io::Alignment& a2,
262  const io::Alignment& a,
263  ReduceContext& context,
264  const ParamsPOD params)
265 {
266  context.best_score( read_id, params ); // pretend we did find something useful, even though for unpaired alignments
267 
268  a2 = a1;
269  a1 = a;
270  NVBIO_CUDA_DEBUG_PRINT_IF( params.debug.show_reduce( read_id ), "update best unpaired[%u]:\n 1. score[%d], rc[%u], pos[%u]\n", a.mate(), a.score(), a.is_rc(), a.alignment() );
271 }
272 
273 template <typename ReduceContext>
276  const uint32 read_id,
277  io::Alignment& a1,
278  io::Alignment& a2,
279  const io::Alignment& a,
280  ReduceContext& context,
281  const ParamsPOD params)
282 {
283  context.best_score( read_id, params ); // pretend we did find something useful, even though for unpaired alignments
284 
285  a1 = a;
286  NVBIO_CUDA_DEBUG_PRINT_IF( params.debug.show_reduce( read_id ), "update best unpaired[%u]:\n 1. score[%d], rc[%u], pos[%u]\n", a.mate(), a.score(), a.is_rc(), a.alignment() );
287 }
288 
289 template <typename ReduceContext>
292  const uint32 read_id,
293  io::Alignment& a1,
294  io::Alignment& a2,
295  const io::Alignment& a,
296  ReduceContext& context,
297  const ParamsPOD params)
298 {
299  context.second_score( read_id, params ); // pretend we did find something useful, even though for unpaired alignments
300 
301  a2 = a;
302  NVBIO_CUDA_DEBUG_PRINT_IF( params.debug.show_reduce( read_id ), "update second unpaired[%u]:\n score[%d], rc[%u], pos[%u]\n", a.mate(), i, a.score(), a.is_rc(), a.alignment() );
303 }
304 
305 template <typename ReduceContext>
308  const uint32 read_id,
309  io::Alignment& a1,
310  io::Alignment& a2,
311  const io::Alignment& a,
312  const uint32 min_distance,
313  ReduceContext& context,
314  const ParamsPOD params)
315 {
316  // sometimes, we might find an alignment which is better than what previously found, despite not being distinct from it...
317  if (distinct_alignments( a1, a, min_distance ) == false)
318  {
319  // this alignment is a candidate for replacing the best one
320  //
321 
322  if (a.score() > a1.score())
323  replace_best( read_id, a1, a2, a, context, params );
324 
325  // skip locations that we have already visited, without paying the extension attempt
326  return true;
327  }
328  else if (distinct_alignments( a2, a, min_distance ) == false)
329  {
330  // this alignment is a candidate for replacing the second-best one
331  //
332 
333  if (a.score() > a1.score()) // is it better than the best?
334  update_best( read_id, a1, a2, a, context, params );
335  else if (a.score() > a2.score()) // is it better than the second-best?
336  update_second( read_id, a1, a2, a, context, params );
337 
338  // skip locations that we have already visited, without paying the extension attempt
339  return true;
340  }
341  else if (a.score() > a1.score())
342  {
343  update_best( read_id, a1, a2, a, context, params );
344  return true;
345  }
346  else if (a.score() > a2.score())
347  {
348  update_second( read_id, a1, a2, a, context, params );
349  return true;
350  }
351  return false;
352 }
353 
368 template <typename ScoringScheme, typename PipelineType, typename ReduceContext> __global__
370  const ReduceContext context,
371  PipelineType pipeline,
372  const ParamsPOD params)
373 {
374  typedef ScoringQueuesDeviceView scoring_queues_type;
375  typedef ReadHitsReference<scoring_queues_type> read_hits_type;
376  typedef typename read_hits_type::reference hit_type;
377 
378  scoring_queues_type& scoring_queues = pipeline.scoring_queues;
379 
380  const uint32 thread_id = threadIdx.x + BLOCKDIM*blockIdx.x;
381  if (thread_id >= scoring_queues.active_read_count()) return;
382 
383  const uint32 anchor = pipeline.anchor;
384 
385  // fetch the active read corresponding to this thread
386  read_hits_type read_hits( scoring_queues, thread_id );
387 
388  const uint32 read_id = read_hits.read_info().read_id;
389 
390  // fetch current best alignments
392  io::BestAlignments( pipeline.best_alignments[ read_id ],
393  pipeline.best_alignments[ read_id + pipeline.best_stride ] ),
394  io::BestAlignments( pipeline.best_alignments_o[ read_id ],
395  pipeline.best_alignments_o[ read_id + pipeline.best_stride ] ) );
396 
397  // setup the read stream
398  const uint2 read_range = pipeline.reads.get_range(read_id);
399  const uint32 read_len = read_range.y - read_range.x;
400 
401  const uint32 count = read_hits.size();
402 
403  // sort the entries in the score queue so that we process them in the order they were selected
404  for (uint32 i = 0; i < count; ++i)
405  {
406  hit_type hit = read_hits[i];
407  NVBIO_CUDA_DEBUG_ASSERT( hit.read_id == read_id, "reduce hit[%u][%u]: expected id %u, got: %u (slot: %u)\n", thread_id, i, read_id, hit.read_id, read_hits.slot(i) );
408 
409  const packed_seed seed_info = hit.seed;
410  const uint32 read_rc = seed_info.rc;
411  const uint32 top_flag = seed_info.top_flag;
412 
413  const uint2 g_pos = make_uint2( hit.loc, hit.sink );
414  const int32 score_a = hit.score;
415  const int32 score_o = hit.opposite_score;
416  const int32 score_o2 = hit.opposite_score2;
417  //const int32 score = score_a + score_o;
418  //const int32 score2 = score_a + score_o2;
419 
420  // compute opposite mate placement & orientation
421  bool o_left;
422  bool o_fw;
423 
425  params.pe_policy,
426  anchor,
427  !read_rc,
428  o_left,
429  o_fw );
430 
431  const uint2 o_g_pos = make_uint2( hit.opposite_loc, hit.opposite_sink );
432  const uint2 o_g_pos2 = make_uint2( hit.opposite_loc, hit.opposite_sink2 );
433  const uint32 o_read_rc = !o_fw;
434 
435  const io::PairedAlignments pair(
436  io::Alignment( g_pos.x, g_pos.y - g_pos.x, score_a, read_rc, anchor, score_o > pipeline.score_limit ),
437  io::Alignment( o_g_pos.x, o_g_pos.y - o_g_pos.x, score_o, o_read_rc, !anchor, score_o > pipeline.score_limit ) );
438 
439  const io::PairedAlignments pair2(
440  io::Alignment( g_pos.x, g_pos.y - g_pos.x, score_a, read_rc, anchor, score_o2 > pipeline.score_limit ),
441  io::Alignment( o_g_pos2.x, o_g_pos2.y - o_g_pos2.x, score_o2, o_read_rc, !anchor, score_o2 > pipeline.score_limit ) );
442 
443  bool updated = false;
444 
445  const uint32 min_distance = read_len/4;
446 
447  if (pair.is_paired())
448  {
449  if (try_update( read_id, best_pairs, pair, min_distance, context, params ))
450  updated = true;
451 
452  // and now rinse & repeat with the second hit
453  if (pair2.is_paired())
454  {
455  if (try_update( read_id, best_pairs, pair2, min_distance, context, params ))
456  updated = true;
457  }
458  }
459  else if ((params.pe_unpaired == true) && (best_pairs.is_paired() == false))
460  {
461  //
462  // We didn't find a paired alignment yet - hence we proceed keeping track of the best unpaired alignments
463  // for the first and second mate separately.
464  // We store best two alignments for the first mate in best_pairs.m_a*, and the ones of the second mate in best_pairs.m_o*.
465  //
466 
467  const io::Alignment& a = pair.m_a;
468  io::Alignment& a1 = anchor ? best_pairs.m_o1 : best_pairs.m_a1;
469  io::Alignment& a2 = anchor ? best_pairs.m_o2 : best_pairs.m_a2;
470 
471  //
472  // update the first mate alignments
473  //
474 
475  if (try_update( read_id, a1, a2, a, min_distance, context, params ))
476  updated = true;
477  }
478 
479  if ((updated == false) && context.failure( i, read_id, top_flag, params ))
480  {
481  // stop traversal
482  pipeline.hits.erase( read_id );
483  //NVBIO_CUDA_DEBUG_PRINT_IF( params.debug.show_reduce( read_id ), "discard: (parent[%u:%u])\n score[%d + %d], rc[%u], pos[%u]\n", anchor, thread_id, i, score_a, score_o, read_rc, g_pos );
484  }
485  }
486 
487  // report best alignments
488  pipeline.best_alignments[ read_id ] = best_pairs.m_a1;
489  pipeline.best_alignments[ read_id + pipeline.best_stride ] = best_pairs.m_a2;
490  pipeline.best_alignments_o[ read_id ] = best_pairs.m_o1;
491  pipeline.best_alignments_o[ read_id + pipeline.best_stride ] = best_pairs.m_o2;
492 }
493 
497 
498 } // namespace detail
499 
500 //
501 // Reduce the scores associated to each read in the input queue to find
502 // the best 2 alignments.
503 //
504 template <typename ScoringScheme, typename ReduceContext>
506  const ReduceContext context,
508  const ParamsPOD params)
509 {
510  typedef BestApproxScoringPipelineState<ScoringScheme> pipeline_type;
511 
512  const int blocks = (pipeline.scoring_queues.active_reads.in_size + BLOCKDIM-1) / BLOCKDIM;
513 
514  detail::score_reduce_kernel<typename pipeline_type::scheme_type> <<<blocks, BLOCKDIM>>>(
515  context,
516  pipeline,
517  params );
518 }
519 
520 //
521 // call the scoring kernel
522 //
523 template <typename ScoringScheme, typename ReduceContext>
525  const ReduceContext context,
527  const ParamsPOD params)
528 {
529  typedef BestApproxScoringPipelineState<ScoringScheme> pipeline_type;
530 
531  const int blocks = (pipeline.scoring_queues.active_reads.in_size + BLOCKDIM-1) / BLOCKDIM;
532 
533  detail::score_reduce_paired_kernel<typename pipeline_type::scheme_type> <<<blocks, BLOCKDIM>>>(
534  context,
535  pipeline,
536  params );
537 }
538 
539 } // namespace cuda
540 } // namespace bowtie2
541 } // namespace nvbio