NVBIO
Main Page
Modules
Classes
Examples
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
nvbio
io
reads
reads_fastq.cpp
Go to the documentation of this file.
1
/*
2
* nvbio
3
* Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4
*
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions are met:
7
* * Redistributions of source code must retain the above copyright
8
* notice, this list of conditions and the following disclaimer.
9
* * Redistributions in binary form must reproduce the above copyright
10
* notice, this list of conditions and the following disclaimer in the
11
* documentation and/or other materials provided with the distribution.
12
* * Neither the name of the NVIDIA CORPORATION nor the
13
* names of its contributors may be used to endorse or promote products
14
* derived from this software without specific prior written permission.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
*/
27
28
#include <
nvbio/io/reads/reads_fastq.h
>
29
#include <
nvbio/basic/types.h
>
30
#include <
nvbio/basic/timer.h
>
31
32
#include <
string.h
>
33
#include <ctype.h>
34
35
namespace
nvbio {
36
namespace
io {
37
40
43
46
47
int
ReadDataFile_FASTQ_parser::nextChunk
(
ReadDataRAM
*output,
uint32
max_reads,
uint32
max_bps)
48
{
49
uint32
n_reads = 0;
50
uint32
n_bps = 0;
51
uint8
marker;
52
53
const
uint32
read_mult =
54
((
m_flags
&
FORWARD
) ? 1u : 0u) +
55
((
m_flags
&
REVERSE
) ? 1u : 0u) +
56
((
m_flags
&
FORWARD_COMPLEMENT
) ? 1u : 0u) +
57
((
m_flags
&
REVERSE_COMPLEMENT
) ? 1u : 0u);
58
59
while
(n_reads + read_mult <= max_reads &&
60
n_bps + read_mult*
ReadDataFile::LONG_READ
<= max_bps)
61
{
62
// consume spaces & newlines
63
do
{
64
marker =
get
();
65
66
// count lines
67
if
(marker ==
'\n'
)
68
m_line
++;
69
}
70
while
(marker ==
'\n'
|| marker ==
' '
);
71
72
// check for EOF or read errors
73
if
(
m_file_state
!=
FILE_OK
)
74
break
;
75
76
// if the newlines didn't end in a read marker,
77
// issue a parsing error...
78
if
(marker !=
'@'
)
79
{
80
m_file_state
=
FILE_PARSE_ERROR
;
81
m_error_char
= marker;
82
return
uint32
(-1);
83
}
84
85
// read all the line
86
uint32
len = 0;
87
for
(
uint8
c =
get
(); c !=
'\n'
&& c != 0; c =
get
())
88
{
89
m_name
[ len++ ] = c;
90
91
// expand on demand
92
if
(
m_name
.size() <= len)
93
m_name
.resize( len * 2u );
94
}
95
96
m_name
[ len++ ] =
'\0'
;
97
98
// check for errors
99
if
(
m_file_state
!=
FILE_OK
)
100
{
101
log_error
(stderr,
"incomplete read!\n"
);
102
103
m_error_char
= 0;
104
return
uint32
(-1);
105
}
106
107
m_line
++;
108
109
// start reading the bp read
110
len = 0;
111
for
(
uint8
c =
get
(); c !=
'+'
&& c != 0; c =
get
())
112
{
113
// if (isgraph(c))
114
if
(c >= 0x21 && c <= 0x7E)
115
m_read_bp
[ len++ ] = c;
116
else
if
(c ==
'\n'
)
117
m_line
++;
118
119
// expand on demand
120
if
(
m_read_bp
.size() <= len)
121
{
122
m_read_bp
.resize( len * 2u );
123
m_read_q
.resize( len * 2u );
124
}
125
}
126
127
// check for errors
128
if
(
m_file_state
!=
FILE_OK
)
129
{
130
log_error
(stderr,
"incomplete read!\n"
);
131
132
m_error_char
= 0;
133
return
uint32
(-1);
134
}
135
136
// read all the line
137
for
(
uint8
c =
get
(); c !=
'\n'
&& c != 0; c =
get
()) {}
138
139
// check for errors
140
if
(
m_file_state
!=
FILE_OK
)
141
{
142
log_error
(stderr,
"incomplete read!\n"
);
143
144
m_error_char
= 0;
145
return
uint32
(-1);
146
}
147
148
m_line
++;
149
150
// start reading the quality read
151
len = 0;
152
for
(
uint8
c =
get
(); c !=
'\n'
&& c != 0; c =
get
())
153
m_read_q
[ len++ ] = c;
154
155
// check for errors
156
if
(
m_file_state
!=
FILE_OK
)
157
{
158
log_error
(stderr,
"incomplete read!\n"
);
159
160
m_error_char
= 0;
161
return
uint32
(-1);
162
}
163
164
m_line
++;
165
166
if
(
m_flags
&
FORWARD
)
167
{
168
output->
push_back
( len,
169
&
m_name
[0],
170
&
m_read_bp
[0],
171
&
m_read_q
[0],
172
m_quality_encoding
,
173
m_truncate_read_len
,
174
ReadDataRAM::NO_OP
);
175
}
176
if
(
m_flags
&
REVERSE
)
177
{
178
output->
push_back
( len,
179
&
m_name
[0],
180
&
m_read_bp
[0],
181
&
m_read_q
[0],
182
m_quality_encoding
,
183
m_truncate_read_len
,
184
ReadDataRAM::REVERSE_OP
);
185
}
186
if
(
m_flags
&
FORWARD_COMPLEMENT
)
187
{
188
output->
push_back
( len,
189
&
m_name
[0],
190
&
m_read_bp
[0],
191
&
m_read_q
[0],
192
m_quality_encoding
,
193
m_truncate_read_len
,
194
ReadDataRAM::COMPLEMENT_OP
);
195
}
196
if
(
m_flags
&
REVERSE_COMPLEMENT
)
197
{
198
output->
push_back
( len,
199
&
m_name
[0],
200
&
m_read_bp
[0],
201
&
m_read_q
[0],
202
m_quality_encoding
,
203
m_truncate_read_len
,
204
ReadDataRAM::REVERSE_COMPLEMENT_OP
);
205
}
206
207
n_bps += read_mult * len;
208
n_reads += read_mult;
209
}
210
return
n_reads;
211
}
212
213
ReadDataFile_FASTQ_gz::ReadDataFile_FASTQ_gz
(
const
char
*read_file_name,
214
const
QualityEncoding
qualities,
215
const
uint32
max_reads,
216
const
uint32
max_read_len,
217
const
ReadEncoding
flags)
218
:
ReadDataFile_FASTQ_parser
(read_file_name, qualities, max_reads, max_read_len, flags)
219
{
220
m_file =
gzopen
(read_file_name,
"r"
);
221
if
(!m_file) {
222
m_file_state
=
FILE_OPEN_FAILED
;
223
}
else
{
224
m_file_state
=
FILE_OK
;
225
}
226
227
gzbuffer
(m_file,
m_buffer_size
);
228
}
229
230
static
float
time = 0.0f;
231
232
ReadDataFile_FASTQ_parser::FileState
ReadDataFile_FASTQ_gz::fillBuffer
(
void
)
233
{
234
m_buffer_size
=
gzread
(m_file, &
m_buffer
[0], (
uint32
)
m_buffer
.size());
235
236
if
(
m_buffer_size
<= 0)
237
{
238
// check for EOF separately; zlib will not always return Z_STREAM_END at EOF below
239
if
(
gzeof
(m_file))
240
{
241
return
FILE_EOF
;
242
}
else
{
243
// ask zlib what happened and inform the user
244
int
err;
245
const
char
*msg;
246
247
msg =
gzerror
(m_file, &err);
248
// we're making the assumption that we never see Z_STREAM_END here
249
assert(err !=
Z_STREAM_END
);
250
251
log_error
(stderr,
"error processing FASTQ file: zlib error %d (%s)\n"
, err, msg);
252
return
FILE_STREAM_ERROR
;
253
}
254
}
255
return
FILE_OK
;
256
}
257
261
262
}
// namespace io
263
}
// namespace nvbio
Generated on Wed Feb 25 2015 08:33:00 for NVBIO by
1.8.4