NVBIO
Main Page
Modules
Classes
Examples
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
nvbio-aln-diff
alignment_bam.cpp
Go to the documentation of this file.
1
/*
2
* nvbio
3
* Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4
*
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions are met:
7
* * Redistributions of source code must retain the above copyright
8
* notice, this list of conditions and the following disclaimer.
9
* * Redistributions in binary form must reproduce the above copyright
10
* notice, this list of conditions and the following disclaimer in the
11
* documentation and/or other materials provided with the distribution.
12
* * Neither the name of the NVIDIA CORPORATION nor the
13
* names of its contributors may be used to endorse or promote products
14
* derived from this software without specific prior written permission.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
*/
27
28
#include <
nvbio-aln-diff/alignment.h
>
29
#include <
contrib/bamtools/BamReader.h
>
30
#include <
nvbio/basic/console.h
>
31
#include <
crc/crc.h
>
32
33
namespace
nvbio {
34
namespace
alndiff {
35
36
struct
BAMAlignmentStream
:
public
AlignmentStream
37
{
38
BAMAlignmentStream
(
const
char
* file_name)
39
{
40
log_verbose
(stderr,
"opening BAM file \"%s\"... started\n"
, file_name);
41
m_bam_reader
.
Open
( file_name );
42
m_offset
= 0;
43
log_verbose
(stderr,
"opening BAM file \"%s\"... done\n"
, file_name);
44
}
45
46
// return if the stream is ok
47
//
48
bool
is_ok
() {
return
true
; }
// TODO: add a mechanism to bamtools to know whether the file opened correctly
49
50
// get the next batch
51
//
52
uint32
next_batch
(
53
const
uint32
count,
54
Alignment
* batch)
55
{
56
uint32
n_read = 0;
57
58
while
(n_read < count)
59
{
60
Alignment
* aln = batch + n_read;
61
62
// clean the alignment
63
*aln =
Alignment
();
64
65
BamTools::BamAlignment
bam_aln;
66
67
if
(
m_bam_reader
.
GetNextAlignment
( bam_aln ) ==
false
)
68
break
;
69
70
aln->
read_id
=
uint32
(
crcCalc
( bam_aln.
Name
.c_str(),
uint32
(bam_aln.
Name
.length()) ) );
71
aln->
read_len
= bam_aln.
Length
;
72
aln->
mate
= bam_aln.
IsFirstMate
() ? 0u : 1u;
73
aln->
flag
= bam_aln.
AlignmentFlag
;
74
aln->
pos
= bam_aln.
Position
;
75
if
(aln->
is_mapped
())
76
{
77
aln->
ref_id
= bam_aln.
RefID
;
78
aln->
mapQ
=
uint8
( bam_aln.
MapQuality
);
79
bam_aln.
GetEditDistance
( aln->
ed
);
80
81
analyze_cigar
( bam_aln.
CigarData
, aln );
82
83
bam_aln.
GetTag
(
"AS"
, aln->
score
);
84
aln->
has_second
= bam_aln.
GetTag
(
"XS"
, aln->
sec_score
);
85
bam_aln.
GetTag
(
"XM"
, aln->
n_mm
);
86
bam_aln.
GetTag
(
"XO"
, aln->
n_gapo
);
87
bam_aln.
GetTag
(
"XG"
, aln->
n_gape
);
88
89
const
char
* md = bam_aln.
GetTag
(
"MD"
);
90
if
(md)
91
analyze_md
( md, aln );
92
}
93
94
++n_read;
95
}
96
m_offset
+= n_read;
97
return
n_read;
98
}
99
100
void
analyze_cigar
(
const
std::vector<BamTools::CigarOp>&
cigar
,
Alignment
* aln)
101
{
102
aln->
subs
= aln->
ins
= aln->
dels
= 0;
103
104
for
(
uint32
i = 0; i < cigar.size(); ++i)
105
{
106
const
BamTools::CigarOp
op = cigar[i];
107
108
if
(op.
Type
==
'X'
)
109
++aln->
n_mm
;
110
111
if
(op.
Type
==
'M'
|| op.
Type
==
'X'
|| op.
Type
==
'='
)
112
aln->
subs
+= op.
Length
;
113
else
if
(op.
Type
==
'I'
)
114
aln->
ins
+= op.
Length
;
115
else
if
(op.
Type
==
'D'
)
116
aln->
dels
+= op.
Length
;
117
}
118
}
119
void
analyze_md
(
const
char
* md,
Alignment
* aln)
120
{
121
aln->
n_mm
= 0;
122
123
for
(; *md !=
'\0'
; ++md)
124
{
125
const
char
c = *md;
126
127
if
(c >=
'0'
&&
128
c <=
'9'
)
129
continue
;
130
131
if
(c >=
'A'
&&
132
c <=
'Z'
)
133
++aln->
n_mm
;
134
135
if
(c ==
'^'
)
136
{
137
// a deletion, skip it
138
for
(++md; *md !=
'\0'
&& (*md <= '0' || *md >=
'9'
); ++md) {}
139
}
140
}
141
}
142
143
BamTools::BamReader
m_bam_reader
;
144
uint32
m_offset
;
145
};
146
147
AlignmentStream
*
open_bam_file
(
const
char
* file_name)
148
{
149
return
new
BAMAlignmentStream
( file_name );
150
}
151
152
}
// alndiff namespace
153
}
// nvbio namespace
154
155
Generated on Wed Feb 25 2015 08:32:51 for NVBIO by
1.8.4