NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
output_utils.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
30 #include <nvbio/io/alignments.h>
31 #include <nvbio/basic/dna.h>
32 
33 namespace nvbio {
34 namespace io {
35 
36 // compute the CIGAR alignment position given the alignment base and the sink offset
37 inline uint32 compute_cigar_pos(const uint32 sink, const uint32 alignment)
38 {
39  return alignment + (sink & 0xFFFFu);
40 }
41 
42 // compute the reference mapped length from a CIGAR
43 template <typename vector_type>
45  const vector_type cigar,
46  const uint32 cigar_len)
47 {
48  uint32 r = 0;
49  for (uint32 i = 0; i < cigar_len; ++i)
50  {
51  const uint32 l = cigar[ cigar_len - i - 1u ].m_len;
52  const uint32 op = cigar[ cigar_len - i - 1u ].m_type;
53  if (op == Cigar::SUBSTITUTION || op == Cigar::DELETION) r += l;
54  }
55  return r;
56 }
57 
58 // count the symbols of a given type inside a CIGAR
59 template <typename vector_type>
61  const Cigar::Operation type,
62  const vector_type cigar,
63  const uint32 cigar_len)
64 {
65  uint32 r = 0;
66  for (uint32 i = 0; i < cigar_len; ++i)
67  {
68  const uint32 l = cigar[ cigar_len - i - 1u ].m_len;
69  const uint32 op = cigar[ cigar_len - i - 1u ].m_type;
70  if (op == type) r += l;
71  }
72  return r;
73 }
74 
75 // build the MD string from the internal representation
76 template <typename vector_type>
77 void analyze_md_string(const vector_type mds, uint32& n_mm, uint32& n_gapo, uint32& n_gape)
78 {
79  const uint32 mds_len = uint32(mds[0]) | (uint32(mds[1]) << 8);
80 
81  n_mm = 0;
82  n_gapo = 0;
83  n_gape = 0;
84 
85  for (uint32 i = 2; i < mds_len; )
86  {
87  const uint8 op = mds[i++];
88  if (op == MDS_MATCH)
89  {
90  uint8 l = mds[i++];
91 
92  // prolong the MDS match if it spans multiple tokens
93  while (i < mds_len && mds[i] == MDS_MATCH)
94  l += mds[i++];
95  }
96  else if (op == MDS_MISMATCH)
97  {
98  n_mm++;
99 
100  ++i;
101  }
102  else if (op == MDS_INSERTION)
103  {
104  const uint8 l = mds[i++];
105 
106  n_gapo++;
107  n_gape += l-1;
108 
109  i += l;
110  }
111  else if (op == MDS_DELETION)
112  {
113  const uint8 l = mds[i++];
114 
115  n_gapo++;
116  n_gape += l-1;
117 
118  i += l;
119  }
120  }
121 }
122 
123 } // namespace io
124 } // namespace nvbio