NVBIO
Main Page
Modules
Classes
Examples
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
nvbio
io
reads
sam.h
Go to the documentation of this file.
1
/*
2
* nvbio
3
* Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4
*
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions are met:
7
* * Redistributions of source code must retain the above copyright
8
* notice, this list of conditions and the following disclaimer.
9
* * Redistributions in binary form must reproduce the above copyright
10
* notice, this list of conditions and the following disclaimer in the
11
* documentation and/or other materials provided with the distribution.
12
* * Neither the name of the NVIDIA CORPORATION nor the
13
* names of its contributors may be used to endorse or promote products
14
* derived from this software without specific prior written permission.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
*/
27
28
#pragma once
29
30
#include <zlib/zlib.h>
31
32
#include <
nvbio/io/reads/reads.h
>
33
#include <
nvbio/io/reads/reads_priv.h
>
34
#include <
nvbio/basic/console.h
>
35
36
namespace
nvbio {
37
namespace
io {
38
39
// SAM format description: http://samtools.sourceforge.net/SAM1.pdf
40
41
// flag comments come from SAMtools spec
42
// a better explanation is available at:
43
// http://genome.sph.umich.edu/wiki/SAM#What_Information_Does_SAM.2FBAM_Have_for_an_Alignment
44
enum
AlignmentFlags
45
{
46
// SAMtools: template having multiple segments in sequencing
47
SAMFlag_MultipleSegments
= 0x1,
48
// each segment properly aligned according to the aligner
49
SAMFlag_AllSegmentsAligned
= 0x2,
50
// segment unmapped
51
SAMFlag_SegmentUnmapped
= 0x4,
52
// next segment in the template unmapped
53
SAMFlag_NextSegmentUnmapped
= 0x8,
54
// SEQ being reverse complemented
55
SAMFlag_ReverseComplemented
= 0x10,
56
// SEQ of the next segment in the template being reversed
57
SAMFlag_NextSegmentReverseComplemented
= 0x20,
58
// the first segment in the template
59
SAMFlag_FirstSegment
= 0x40,
60
// the last segment in the template
61
SAMFlag_LastSegment
= 0x80,
62
// secondary alignment
63
SAMFlag_SecondaryAlignment
= 0x100,
64
// not passing quality controls
65
SAMFlag_FailedQC
= 0x200,
66
// PCR or optical duplicate
67
SAMFlag_Duplicate
= 0x400,
68
};
69
70
71
// ReadDataFile from a SAM file
72
struct
ReadDataFile_SAM
:
public
ReadDataFile
73
{
74
enum
{
LINE_BUFFER_INIT_SIZE
= 1024 };
75
76
enum
SortOrder
77
{
78
SortOrder_unknown
,
79
SortOrder_unsorted
,
80
SortOrder_queryname
,
81
SortOrder_coordinate
,
82
};
83
84
ReadDataFile_SAM
(
const
char
*read_file_name,
85
const
uint32
max_reads,
86
const
uint32
max_read_len,
87
const
ReadEncoding
flags);
88
89
virtual
int
nextChunk
(
ReadDataRAM
*output,
uint32
max_reads,
uint32
max_bps);
90
91
bool
init
(
void
);
92
93
private
:
94
bool
readLine(
void
);
95
void
rewindLine(
void
);
96
bool
parseHeaderLine(
char
*start);
97
bool
parseReferenceSequenceLine(
char
*start);
98
99
gzFile
fp;
100
101
// a buffer for a full line; this will grow as needed
102
char
*linebuf;
103
// current size of the buffer
104
int
linebuf_size;
105
// length of the current line in the buffer
106
int
line_length;
107
108
// how many lines we parsed so far
109
int
numLines;
110
111
// info from the header
112
char
*version;
113
SortOrder
sortOrder;
114
115
public
:
116
// reference sequence info
117
std::vector<std::string>
sq_names
;
118
std::vector<uint64>
sq_lengths
;
119
};
120
121
}
122
}
Generated on Wed Feb 25 2015 08:32:48 for NVBIO by
1.8.4