NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
alignment_bam.cpp
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
30 #include <nvbio/basic/console.h>
31 #include <crc/crc.h>
32 
33 namespace nvbio {
34 namespace alndiff {
35 
37 {
38  BAMAlignmentStream(const char* file_name)
39  {
40  log_verbose(stderr, "opening BAM file \"%s\"... started\n", file_name);
41  m_bam_reader.Open( file_name );
42  m_offset = 0;
43  log_verbose(stderr, "opening BAM file \"%s\"... done\n", file_name);
44  }
45 
46  // return if the stream is ok
47  //
48  bool is_ok() { return true; } // TODO: add a mechanism to bamtools to know whether the file opened correctly
49 
50  // get the next batch
51  //
53  const uint32 count,
54  Alignment* batch)
55  {
56  uint32 n_read = 0;
57 
58  while (n_read < count)
59  {
60  Alignment* aln = batch + n_read;
61 
62  // clean the alignment
63  *aln = Alignment();
64 
65  BamTools::BamAlignment bam_aln;
66 
67  if (m_bam_reader.GetNextAlignment( bam_aln ) == false)
68  break;
69 
70  aln->read_id = uint32( crcCalc( bam_aln.Name.c_str(), uint32(bam_aln.Name.length()) ) );
71  aln->read_len = bam_aln.Length;
72  aln->mate = bam_aln.IsFirstMate() ? 0u : 1u;
73  aln->flag = bam_aln.AlignmentFlag;
74  aln->pos = bam_aln.Position;
75  if (aln->is_mapped())
76  {
77  aln->ref_id = bam_aln.RefID;
78  aln->mapQ = uint8( bam_aln.MapQuality );
79  bam_aln.GetEditDistance( aln->ed );
80 
81  analyze_cigar( bam_aln.CigarData, aln );
82 
83  bam_aln.GetTag( "AS", aln->score );
84  aln->has_second = bam_aln.GetTag( "XS", aln->sec_score );
85  bam_aln.GetTag( "XM", aln->n_mm );
86  bam_aln.GetTag( "XO", aln->n_gapo );
87  bam_aln.GetTag( "XG", aln->n_gape );
88 
89  const char* md = bam_aln.GetTag( "MD" );
90  if (md)
91  analyze_md( md, aln );
92  }
93 
94  ++n_read;
95  }
96  m_offset += n_read;
97  return n_read;
98  }
99 
100  void analyze_cigar(const std::vector<BamTools::CigarOp>& cigar, Alignment* aln)
101  {
102  aln->subs = aln->ins = aln->dels = 0;
103 
104  for (uint32 i = 0; i < cigar.size(); ++i)
105  {
106  const BamTools::CigarOp op = cigar[i];
107 
108  if (op.Type == 'X')
109  ++aln->n_mm;
110 
111  if (op.Type == 'M' || op.Type == 'X' || op.Type == '=')
112  aln->subs += op.Length;
113  else if (op.Type == 'I')
114  aln->ins += op.Length;
115  else if (op.Type == 'D')
116  aln->dels += op.Length;
117  }
118  }
119  void analyze_md(const char* md, Alignment* aln)
120  {
121  aln->n_mm = 0;
122 
123  for (; *md != '\0'; ++md)
124  {
125  const char c = *md;
126 
127  if (c >= '0' &&
128  c <= '9')
129  continue;
130 
131  if (c >= 'A' &&
132  c <= 'Z')
133  ++aln->n_mm;
134 
135  if (c == '^')
136  {
137  // a deletion, skip it
138  for (++md; *md != '\0' && (*md <= '0' || *md >= '9'); ++md) {}
139  }
140  }
141  }
142 
145 };
146 
147 AlignmentStream* open_bam_file(const char* file_name)
148 {
149  return new BAMAlignmentStream( file_name );
150 }
151 
152 } // alndiff namespace
153 } // nvbio namespace
154 
155