NVBIO
Main Page
Modules
Classes
Examples
File List
File Members
•
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
nvbio
io
reads
sam.h
Go to the documentation of this file.
1
/*
2
* nvbio
3
* Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4
*
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions are met:
7
* * Redistributions of source code must retain the above copyright
8
* notice, this list of conditions and the following disclaimer.
9
* * Redistributions in binary form must reproduce the above copyright
10
* notice, this list of conditions and the following disclaimer in the
11
* documentation and/or other materials provided with the distribution.
12
* * Neither the name of the NVIDIA CORPORATION nor the
13
* names of its contributors may be used to endorse or promote products
14
* derived from this software without specific prior written permission.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
*/
27
28
#pragma once
29
30
#include <zlib/zlib.h>
31
32
#include <
nvbio/io/reads/reads.h
>
33
#include <
nvbio/io/reads/reads_priv.h
>
34
#include <
nvbio/basic/console.h
>
35
36
namespace
nvbio {
37
namespace
io {
38
39
// SAM format description: http://samtools.sourceforge.net/SAM1.pdf
40
41
// flag comments come from SAMtools spec
42
// a better explanation is available at:
43
// http://genome.sph.umich.edu/wiki/SAM#What_Information_Does_SAM.2FBAM_Have_for_an_Alignment
44
enum
AlignmentFlags
45
{
46
// SAMtools: template having multiple segments in sequencing
47
SAMFlag_MultipleSegments
= 0x1,
48
// each segment properly aligned according to the aligner
49
SAMFlag_AllSegmentsAligned
= 0x2,
50
// segment unmapped
51
SAMFlag_SegmentUnmapped
= 0x4,
52
// next segment in the template unmapped
53
SAMFlag_NextSegmentUnmapped
= 0x8,
54
// SEQ being reverse complemented
55
SAMFlag_ReverseComplemented
= 0x10,
56
// SEQ of the next segment in the template being reversed
57
SAMFlag_NextSegmentReverseComplemented
= 0x20,
58
// the first segment in the template
59
SAMFlag_FirstSegment
= 0x40,
60
// the last segment in the template
61
SAMFlag_LastSegment
= 0x80,
62
// secondary alignment
63
SAMFlag_SecondaryAlignment
= 0x100,
64
// not passing quality controls
65
SAMFlag_FailedQC
= 0x200,
66
// PCR or optical duplicate
67
SAMFlag_Duplicate
= 0x400,
68
};
69
70
71
// ReadDataFile from a SAM file
72
struct
ReadDataFile_SAM
:
public
ReadDataFile
73
{
74
enum
{
LINE_BUFFER_INIT_SIZE
= 1024 };
75
76
enum
SortOrder
77
{
78
SortOrder_unknown
,
79
SortOrder_unsorted
,
80
SortOrder_queryname
,
81
SortOrder_coordinate
,
82
};
83
84
ReadDataFile_SAM
(
const
char
*read_file_name,
85
const
uint32
max_reads,
86
const
uint32
max_read_len,
87
const
ReadEncoding
flags);
88
89
virtual
int
nextChunk
(
ReadDataRAM
*output,
uint32
max_reads,
uint32
max_bps);
90
91
bool
init
(
void
);
92
93
private
:
94
bool
readLine(
void
);
95
void
rewindLine(
void
);
96
bool
parseHeaderLine(
char
*start);
97
bool
parseReferenceSequenceLine(
char
*start);
98
99
gzFile
fp;
100
101
// a buffer for a full line; this will grow as needed
102
char
*linebuf;
103
// current size of the buffer
104
int
linebuf_size;
105
// length of the current line in the buffer
106
int
line_length;
107
108
// how many lines we parsed so far
109
int
numLines;
110
111
// info from the header
112
char
*version;
113
SortOrder
sortOrder;
114
115
public
:
116
// reference sequence info
117
std::vector<std::string>
sq_names
;
118
std::vector<uint64>
sq_lengths
;
119
};
120
121
}
122
}
Generated on Wed Feb 25 2015 08:32:48 for NVBIO by
1.8.4