NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
score_opposite_inl.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
32 
33 namespace nvbio {
34 namespace bowtie2 {
35 namespace cuda {
36 
37 namespace detail {
38 
41 
44 
47 
53 template <typename AlignerType, typename PipelineType>
54 struct BestOppositeScoreStream : public AlignmentStreamBase<OPPOSITE_SCORE_STREAM,AlignerType,PipelineType>
55 {
57  typedef typename base_type::context_type context_type;
59 
71  const PipelineType _pipeline,
72  const AlignerType _aligner,
73  const ParamsPOD _params) :
74  base_type( _pipeline, _aligner, _params ) {}
75 
79  uint32 max_pattern_length() const { return base_type::m_pipeline.reads_o.max_sequence_len(); }
80 
85 
89  uint32 size() const { return base_type::m_pipeline.opposite_queue_size; }
90 
95  const uint32 i,
96  context_type* context) const
97  {
98  context->idx = base_type::m_pipeline.idx_queue[ base_type::m_pipeline.opposite_queue[i] ];
99 
100  // initialize score and sink
101  context->sink.invalidate();
102 
103  // fetch the hit to process
104  HitReference<HitQueuesDeviceView> hit = base_type::m_pipeline.scoring_queues.hits[ context->idx ];
105 
106  const uint32 read_rc = hit.seed.rc;
107  const uint32 read_id = hit.read_id;
108 
109  const uint32 g_pos = hit.loc;
110 
111  const uint2 a_read_range = base_type::m_pipeline.reads.get_range( read_id );
112  const uint32 a_len = a_read_range.y - a_read_range.x;
113  const int32 a_optimal_score = base_type::m_pipeline.scoring_scheme.perfect_score( a_len );
114  const int32 a_worst_score = base_type::m_pipeline.scoring_scheme.min_score( a_len );
115 
116  const uint2 o_read_range = base_type::m_pipeline.reads_o.get_range( read_id );
117  const uint32 o_len = o_read_range.y - o_read_range.x;
118  const int32 o_optimal_score = base_type::m_pipeline.scoring_scheme.perfect_score( o_len );
119  const int32 o_worst_score = base_type::m_pipeline.scoring_scheme.min_score( o_len );
120 
121  // compute the pair score threshold
122  const int32 anchor_score = hit.score;
123 
125  io::BestAlignments( base_type::m_pipeline.best_alignments[ read_id ], base_type::m_pipeline.best_alignments[ read_id + base_type::m_pipeline.best_stride ] ),
126  io::BestAlignments( base_type::m_pipeline.best_alignments_o[ read_id ], base_type::m_pipeline.best_alignments_o[ read_id + base_type::m_pipeline.best_stride ] ) );
127 
128  const int32 target_pair_score = nvbio::min(
129  compute_target_score( best, a_worst_score, o_worst_score ) + 1,
130  a_optimal_score + o_optimal_score );
131 
132  int32 target_mate_score = target_pair_score - anchor_score;
133 
134  #if 1
135  //
136  // bound the score of this mate by the worst score allowed for its own read length.
137  // NOTE: disabling this is equivalent to allowing a worst score proportional to the total
138  // length of the read pair.
139  //
140 
141  target_mate_score = nvbio::max( target_mate_score, o_worst_score );
142  #endif
143 
144  // assign the final score threshold
145  context->min_score = nvbio::max( target_mate_score, base_type::m_pipeline.score_limit );
146 
147  #if DP_REPORT_MULTIPLE
148  context->sink.set_min_score( context->min_score );
149  #endif
150 
151  // check if it's even possible to reach the score threshold
152  if (context->min_score > o_optimal_score)
153  {
154  NVBIO_CUDA_DEBUG_PRINT_IF( base_type::m_params.debug.show_score( context->read_id, false ), "score opposite: min-score too high: %d > %d (mate[%u], rc[%u], [qid %u])\n", context->min_score, o_optimal_score, base_type::m_pipeline.anchor ? 0u : 1u, context->read_rc, i );
155  return false;
156  }
157 
158  // frame the alignment
159  bool o_left;
160  bool o_fw;
161 
163  base_type::m_params.pe_policy,
164  base_type::m_pipeline.anchor,
165  !read_rc,
166  o_left,
167  o_fw );
168 
169  // setup the read info
170  context->mate = 1u;
171  context->read_rc = !o_fw;
172  context->read_id = read_id;
173  context->read_range = o_read_range;
174 
175  // FIXME: re-implement the logic of Bowtie2's otherMate() function!
176  const int32 max_ref_gaps = aln::max_text_gaps( base_type::aligner(), context->min_score, o_len );
177  const uint32 o_gapped_len = o_len + max_ref_gaps;
178 
179  if (o_left)
180  {
181  const uint32 max_end = g_pos + a_len + o_gapped_len > base_type::m_params.min_frag_len ? g_pos + a_len + o_gapped_len - base_type::m_params.min_frag_len : 0u;
182  context->genome_begin = g_pos + a_len > base_type::m_params.max_frag_len ? (g_pos + a_len) - base_type::m_params.max_frag_len : 0u;
183  context->genome_end = base_type::m_params.pe_overlap ? g_pos + a_len : g_pos;
184  context->genome_end = nvbio::min( context->genome_end, max_end );
185  }
186  else
187  {
188  const uint32 min_begin = g_pos + base_type::m_params.min_frag_len > o_gapped_len ? g_pos + base_type::m_params.min_frag_len - o_gapped_len : 0u;
189  context->genome_end = g_pos + base_type::m_params.max_frag_len;
190  context->genome_begin = base_type::m_params.pe_overlap ? g_pos : g_pos + a_len;
191  context->genome_begin = nvbio::max( context->genome_begin, min_begin );
192  }
193  context->genome_end = nvbio::min( context->genome_end, base_type::m_pipeline.genome_length );
194 
195  // don't align if completely outside genome
196  if (context->genome_begin >= base_type::m_pipeline.genome_length)
197  return false;
198 
199  // bound against genome end
200  context->genome_end = nvbio::min( context->genome_end, base_type::m_pipeline.genome_length );
201 
202  // skip locations that we have already visited
203  const uint32 mate = base_type::m_pipeline.anchor ? 0u : 1u;
204  const bool skip = (mate == best.m_a1.m_mate && (context->read_rc == best.m_a1.m_rc && g_pos == best.m_a1.m_align)) ||
205  (mate == best.m_o1.m_mate && (context->read_rc == best.m_o1.m_rc && g_pos == best.m_o1.m_align)) ||
206  (mate == best.m_a2.m_mate && (context->read_rc == best.m_a2.m_rc && g_pos == best.m_a2.m_align)) ||
207  (mate == best.m_o2.m_mate && (context->read_rc == best.m_o2.m_rc && g_pos == best.m_o2.m_align)) ||
208  (context->genome_begin == context->genome_end);
209 
210  return !skip;
211  }
212 
216  void output(
217  const uint32 i,
218  const context_type* context) const
219  {
220  // write the final hit.score and hit.sink attributes
221  HitReference<HitQueuesDeviceView> hit = base_type::m_pipeline.scoring_queues.hits[ context->idx ];
222 
223  #if DP_REPORT_MULTIPLE
224  const aln::BestColumnSink<int32,20>& sink = context->sink;
225  uint32 best_idx1, best_idx2;
226 
227  sink.best2( best_idx1, best_idx2, (context->read_range.y - context->read_range.x) / 2u );
228 
229  const uint32 genome_sink = sink.sinks[best_idx1].x != uint32(-1) ? sink.sinks[best_idx1].x : 0u;
230  const uint32 genome_sink2 = sink.sinks[best_idx2].x != uint32(-1) ? sink.sinks[best_idx2].x : 0u;
231 
232  hit.opposite_score = (sink.scores[best_idx1] >= context->min_score) ? sink.scores[best_idx1] : scheme_type::worst_score;
233  hit.opposite_score2 = (sink.scores[best_idx2] >= context->min_score) ? sink.scores[best_idx2] : scheme_type::worst_score;
234  #else
235  const aln::BestSink<int32> sink = context->sink;
236  const uint32 genome_sink = sink.sink.x != uint32(-1) ? sink.sink.x : 0u;
237  const uint32 genome_sink2 = 0u;
238 
239  hit.opposite_score = (sink.score >= context->min_score) ? sink.score : scheme_type::worst_score;
240  hit.opposite_score2 = scheme_type::worst_score;
241  #endif
242 
243  hit.opposite_loc = context->genome_begin;
244  hit.opposite_sink = context->genome_begin + genome_sink;
245  hit.opposite_sink2 = context->genome_begin + genome_sink2;
246  NVBIO_CUDA_DEBUG_PRINT_IF( base_type::m_params.debug.show_score( context->read_id, (sink.score >= context->min_score) ), "score opposite: %d (min[%d], mate[%u], rc[%u], pos[%u:%u], [qid %u])\n", sink.score, context->min_score, base_type::m_pipeline.anchor ? 0u : 1u, context->read_rc, context->genome_begin, context->genome_end, i );
247  }
248 };
249 
253 template <typename aligner_type, typename pipeline_type>
255  const pipeline_type& pipeline,
256  const aligner_type aligner,
257  const ParamsPOD params)
258 {
260 
261  stream_type stream(
262  pipeline,
263  aligner,
264  params );
265 
267  //aln::BatchedAlignmentScore<stream_type, aln::DeviceStagedThreadScheduler> batch;
268 
269  batch.enact( stream, pipeline.dp_buffer_size, pipeline.dp_buffer );
270 }
271 
275 
276 } // namespace detail
277 
278 } // namespace cuda
279 } // namespace bowtie2
280 } // namespace nvbio