NVBIO
Main Page
Modules
Classes
Examples
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
nvbio
io
sequence
sequence_fastq.h
Go to the documentation of this file.
1
/*
2
* nvbio
3
* Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4
*
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions are met:
7
* * Redistributions of source code must retain the above copyright
8
* notice, this list of conditions and the following disclaimer.
9
* * Redistributions in binary form must reproduce the above copyright
10
* notice, this list of conditions and the following disclaimer in the
11
* documentation and/or other materials provided with the distribution.
12
* * Neither the name of the NVIDIA CORPORATION nor the
13
* names of its contributors may be used to endorse or promote products
14
* derived from this software without specific prior written permission.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
*/
27
28
#pragma once
29
30
#include <
nvbio/io/sequence/sequence.h
>
31
#include <
nvbio/io/sequence/sequence_priv.h
>
32
#include <
nvbio/io/output_stream.h
>
33
#include <
nvbio/basic/console.h
>
34
35
#include <zlib/zlib.h>
36
37
namespace
nvbio {
38
namespace
io {
39
42
45
48
53
struct
SequenceDataFile_FASTQ_parser
:
public
SequenceDataFile
54
{
55
protected
:
56
SequenceDataFile_FASTQ_parser
(
57
const
char
* read_file_name,
58
const
SequenceDataFile::Options
& options,
59
const
uint32
buffer_size = 64536u)
60
:
SequenceDataFile
( options ),
61
m_file_name
(read_file_name),
62
m_buffer
(buffer_size),
63
m_buffer_size
(buffer_size),
64
m_buffer_pos
(buffer_size),
65
m_line
(0),
66
m_name
( 1024*1024 ),
67
m_read_bp
( 1024*1024 ),
68
m_read_q
( 1024*1024 )
69
{}
70
71
// get next read chunk from file and parse it (up to max reads)
72
// this can cause m_file_state to change
73
virtual
int
nextChunk
(
struct
SequenceDataEncoder
*output,
uint32
max_reads,
uint32
max_bps);
74
75
// fill m_buffer with data from the file, return the new file state
76
// this should only report EOF when no more bytes could be read
77
// derived classes should override this method to return actual file data
78
virtual
FileState
fillBuffer
(
void
) = 0;
79
80
virtual
bool
gets
(
char
* buffer,
int
len) = 0;
81
82
private
:
83
// get next character from file
84
char
get
();
85
86
protected
:
87
// file name we're reading from
88
const
char
*
m_file_name
;
89
90
// buffers input from the fastq file
91
std::vector<char>
m_buffer
;
92
uint32
m_buffer_size
;
93
uint32
m_buffer_pos
;
94
95
// counter for which line we're at
96
uint32
m_line
;
97
98
// error reporting from the parser: stores the character that generated an error
99
uint8
m_error_char
;
100
101
// temp buffers for data coming in from the FASTQ file: read name, base pairs and qualities
102
std::vector<char>
m_name
;
103
std::vector<uint8>
m_read_bp
;
104
std::vector<uint8>
m_read_q
;
105
};
106
110
struct
SequenceDataFile_FASTQ_gz
:
public
SequenceDataFile_FASTQ_parser
111
{
112
SequenceDataFile_FASTQ_gz
(
113
const
char
* read_file_name,
114
const
SequenceDataFile::Options
& options);
115
116
~SequenceDataFile_FASTQ_gz
();
117
118
virtual
FileState
fillBuffer
(
void
);
119
120
virtual
bool
gets
(
char
* buffer,
int
len) {
return
gzgets
( m_file, buffer, len ) != NULL; };
121
124
virtual
bool
rewind
();
125
126
private
:
127
gzFile
m_file;
128
};
129
130
135
struct
SequenceDataOutputFile_FASTQ
:
SequenceDataOutputStream
136
{
139
SequenceDataOutputFile_FASTQ
(
140
const
char
* file_name,
141
const
char
* compressor,
142
const
char
* options);
143
146
void
next
(
const
SequenceDataHost
& sequence_data);
147
150
bool
is_ok
();
151
152
private
:
153
// file name we're reading from
154
const
char
* m_file_name;
155
OutputStream
* m_file;
156
};
157
161
162
inline
char
SequenceDataFile_FASTQ_parser::get(
void
)
163
{
164
if
(
m_buffer_pos
>=
m_buffer_size
/*|| m_buffer[m_buffer_pos] == '\0'*/
)
165
{
166
// check whether we had already reached the end of file
167
if
(
m_buffer_size
<
m_buffer
.size())
168
{
169
m_file_state
=
FILE_EOF
;
170
return
0;
171
}
172
else
173
{
174
// grab more data from the underlying file
175
m_file_state
=
fillBuffer
();
176
m_buffer_pos
= 0;
177
178
// if we failed to read more data, return \0
179
if
(
m_file_state
!=
FILE_OK
)
180
return
0;
181
}
182
}
183
184
return
m_buffer
[
m_buffer_pos
++];
185
}
186
187
}
// namespace io
188
}
// namespace nvbio
Generated on Wed Feb 25 2015 08:33:01 for NVBIO by
1.8.4