NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
alignment_utils.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
33 #include <nvbio/io/alignments.h>
34 #include <nvbio/io/utils.h>
36 
37 namespace nvbio {
38 namespace bowtie2 {
39 namespace cuda {
40 
41 namespace detail {
42 
45 
48 
51 
63  const int policy,
64  const uint32 anchor,
65  const bool anchor_fw,
66  bool& left,
67  bool& fw)
68 {
69  const bool anchor_1 = (anchor == 0);
70 
71  switch(policy)
72  {
73  case io::PE_POLICY_FF:
74  {
75  left = (anchor_1 != anchor_fw);
76  fw = anchor_fw;
77  break;
78  }
79  case io::PE_POLICY_RR:
80  {
81  left = (anchor_1 == anchor_fw);
82  fw = anchor_fw;
83  break;
84  }
85  case io::PE_POLICY_FR:
86  {
87  left = !anchor_fw;
88  fw = !anchor_fw;
89  break;
90  }
91  case io::PE_POLICY_RF:
92  {
93  left = anchor_fw;
94  fw = !anchor_fw;
95  break;
96  }
97  }
98 }
99 
104 int32 compute_target_score(const io::BestPairedAlignments& best, const int32 a_worst_score, const int32 o_worst_score)
105 {
106  if (best.has_second_paired() == false)
107  return a_worst_score + o_worst_score;
108 
109  // reproduce Bowtie2's score bounding behavior (under its default setting 'tighten = 3')
110  const int32 delta = best.best_score() - best.second_score();
111  return best.second_score() + (delta * 3)/4;
112 }
113 
115 {
119 };
120 
125 template <typename vector>
127 {
128  // constructor
130  Backtracker(vector vec, const uint32 _capacity) : out(vec), size(0), prev(255), capacity(_capacity) {}
131 
132  // encode a soft clipping operation
134  void clip(const uint32 l)
135  {
136  if (l)
137  {
138  NVBIO_CUDA_DEBUG_ASSERT( size < capacity, "Backtracker: exceeded maximum CIGAR size!\n" );
140  }
141  }
142 
143  // encode a new operation
145  void push(uint8 type)
146  {
148  type == aln::DELETION ||
149  type == aln::INSERTION,
150  "Backtracker: unknown op type %u\n", uint32(type) );
151 
152  // perform run-length encoding of the symbol sequence
153  if (prev == type)
154  out[size-1u].m_len++;
155  else
156  {
157  NVBIO_CUDA_DEBUG_ASSERT( size < capacity, "Backtracker: exceeded maximum CIGAR size!\n" );
158  // encode a new symbol
159  out[size++] = io::Cigar( type, 1u );
160  prev = type;
161  }
162  }
163 
168 };
169 
172 template <typename AlignerType, typename PipelineType>
174 {
175  typedef typename PipelineType::genome_iterator genome_iterator;
176  typedef typename PipelineType::read_batch_type read_batch_type;
177  typedef typename PipelineType::scheme_type scheme_type;
178  typedef AlignerType aligner_type;
179 
180  static const uint32 CACHE_SIZE = 64;
182 
185  typedef typename pattern_string::qual_string_type qual_string;
186 
187  typedef PackedStringLoader<
188  typename genome_iterator::storage_iterator,
189  genome_iterator::SYMBOL_SIZE,
190  genome_iterator::BIG_ENDIAN,
192  typedef typename text_loader_type::iterator text_iterator;
194 
195 
196  template <typename context_type>
198  void load(const PipelineType& pipeline, const context_type* context)
199  {
200  // select the read batch based on context->mate
201  read_batch_type reads = context->mate ?
202  pipeline.reads_o :
203  pipeline.reads;
204 
205  const DirType read_dir = context->read_rc ? FORWARD : REVERSE;
206  const ReadType read_type = context->read_rc ? COMPLEMENT : STANDARD;
207 
209  reads,
210  context->read_range,
211  read_dir, // the reads are loaded in REVERSE fashion, so we invert this flag here
212  read_type );
213 
214  quals = pattern.qualities();
215  text = text_string(
216  context->genome_end - context->genome_begin,
217  text_loader.load( pipeline.genome + context->genome_begin, context->genome_end - context->genome_begin ) );
218  }
219 
225 };
226 
229 template <AlignmentStreamType TYPE>
231 
234 template <>
236 {
238 };
239 
242 template <>
244 {
245  #if DP_REPORT_MULTIPLE
247  #else
249  #endif
250 };
251 
254 template <>
256 {
257  io::Cigar cigar[1024];
260 
265 };
266 
273 template <AlignmentStreamType TYPE, typename AlignerType, typename PipelineType>
275 {
277  typedef typename PipelineType::scheme_type scheme_type;
278  typedef AlignerType aligner_type;
279 
283  {
286  uint2 read_range;
292  };
293 
297  const PipelineType _pipeline,
298  const aligner_type _aligner,
299  const ParamsPOD _params) :
300  m_pipeline( _pipeline ), m_aligner( _aligner ), m_params( _params ) {}
301 
305  const aligner_type& aligner() const { return m_aligner; };
306 
310  uint32 pattern_length(const uint32 i, const context_type* context) const
311  {
312  return context->read_range.y - context->read_range.x;
313  }
314 
318  uint32 text_length(const uint32 i, const context_type* context) const
319  {
320  return context->genome_end - context->genome_begin;
321  }
322 
332  const uint32 i,
333  const uint32 window_begin,
334  const uint32 window_end,
335  const context_type* context,
336  strings_type* strings) const
337  {
338  // load the strings
339  strings->load( m_pipeline, context );
340  }
341 
342  PipelineType m_pipeline;
345 };
346 
350 
351 } // namespace detail
352 
353 } // namespace cuda
354 } // namespace bowtie2
355 } // namespace nvbio