NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
sequence_sam.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
30 #include <zlib/zlib.h>
31 
34 #include <nvbio/basic/console.h>
35 
36 namespace nvbio {
37 namespace io {
38 
41 
44 
47 
48 // SAM format description: http://samtools.sourceforge.net/SAM1.pdf
49 
54 {
55  // SAMtools: template having multiple segments in sequencing
57  // each segment properly aligned according to the aligner
59  // segment unmapped
61  // next segment in the template unmapped
63  // SEQ being reverse complemented
65  // SEQ of the next segment in the template being reversed
67  // the first segment in the template
68  SAMFlag_FirstSegment = 0x40,
69  // the last segment in the template
70  SAMFlag_LastSegment = 0x80,
71  // secondary alignment
73  // not passing quality controls
74  SAMFlag_FailedQC = 0x200,
75  // PCR or optical duplicate
76  SAMFlag_Duplicate = 0x400,
77 };
78 
79 
83 {
84  enum { LINE_BUFFER_INIT_SIZE = 1024 };
85 
86  enum SortOrder
87  {
92  };
93 
97  const char* read_file_name,
98  const SequenceDataFile::Options& options);
99 
102  virtual int nextChunk(struct SequenceDataEncoder *output, uint32 max_reads, uint32 max_bps);
103 
106  virtual bool rewind();
107 
108  bool init(void);
109 
110 private:
111  bool readLine(void);
112  void rewindLine(void);
113  bool parseHeaderLine(char *start);
114  bool parseReferenceSequenceLine(char *start);
115 
116  gzFile fp;
117  char* linebuf;
118  int linebuf_size;
119  int line_length;
120  int numLines;
121 
122  // info from the header
123  char *version;
124  SortOrder sortOrder;
125 
126 public:
127  // reference sequence info
128  std::vector<std::string> sq_names;
129  std::vector<uint64> sq_lengths;
130 };
131 
132 }
133 }