NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
reads_fastq.cpp
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
29 #include <nvbio/basic/types.h>
30 #include <nvbio/basic/timer.h>
31 
32 #include <string.h>
33 #include <ctype.h>
34 
35 namespace nvbio {
36 namespace io {
37 
40 
43 
46 
48 {
49  uint32 n_reads = 0;
50  uint32 n_bps = 0;
51  uint8 marker;
52 
53  const uint32 read_mult =
54  ((m_flags & FORWARD) ? 1u : 0u) +
55  ((m_flags & REVERSE) ? 1u : 0u) +
56  ((m_flags & FORWARD_COMPLEMENT) ? 1u : 0u) +
57  ((m_flags & REVERSE_COMPLEMENT) ? 1u : 0u);
58 
59  while (n_reads + read_mult <= max_reads &&
60  n_bps + read_mult*ReadDataFile::LONG_READ <= max_bps)
61  {
62  // consume spaces & newlines
63  do {
64  marker = get();
65 
66  // count lines
67  if (marker == '\n')
68  m_line++;
69  }
70  while (marker == '\n' || marker == ' ');
71 
72  // check for EOF or read errors
73  if (m_file_state != FILE_OK)
74  break;
75 
76  // if the newlines didn't end in a read marker,
77  // issue a parsing error...
78  if (marker != '@')
79  {
81  m_error_char = marker;
82  return uint32(-1);
83  }
84 
85  // read all the line
86  uint32 len = 0;
87  for (uint8 c = get(); c != '\n' && c != 0; c = get())
88  {
89  m_name[ len++ ] = c;
90 
91  // expand on demand
92  if (m_name.size() <= len)
93  m_name.resize( len * 2u );
94  }
95 
96  m_name[ len++ ] = '\0';
97 
98  // check for errors
99  if (m_file_state != FILE_OK)
100  {
101  log_error(stderr, "incomplete read!\n");
102 
103  m_error_char = 0;
104  return uint32(-1);
105  }
106 
107  m_line++;
108 
109  // start reading the bp read
110  len = 0;
111  for (uint8 c = get(); c != '+' && c != 0; c = get())
112  {
113  // if (isgraph(c))
114  if (c >= 0x21 && c <= 0x7E)
115  m_read_bp[ len++ ] = c;
116  else if (c == '\n')
117  m_line++;
118 
119  // expand on demand
120  if (m_read_bp.size() <= len)
121  {
122  m_read_bp.resize( len * 2u );
123  m_read_q.resize( len * 2u );
124  }
125  }
126 
127  // check for errors
128  if (m_file_state != FILE_OK)
129  {
130  log_error(stderr, "incomplete read!\n");
131 
132  m_error_char = 0;
133  return uint32(-1);
134  }
135 
136  // read all the line
137  for(uint8 c = get(); c != '\n' && c != 0; c = get()) {}
138 
139  // check for errors
140  if (m_file_state != FILE_OK)
141  {
142  log_error(stderr, "incomplete read!\n");
143 
144  m_error_char = 0;
145  return uint32(-1);
146  }
147 
148  m_line++;
149 
150  // start reading the quality read
151  len = 0;
152  for (uint8 c = get(); c != '\n' && c != 0; c = get())
153  m_read_q[ len++ ] = c;
154 
155  // check for errors
156  if (m_file_state != FILE_OK)
157  {
158  log_error(stderr, "incomplete read!\n");
159 
160  m_error_char = 0;
161  return uint32(-1);
162  }
163 
164  m_line++;
165 
166  if (m_flags & FORWARD)
167  {
168  output->push_back( len,
169  &m_name[0],
170  &m_read_bp[0],
171  &m_read_q[0],
175  }
176  if (m_flags & REVERSE)
177  {
178  output->push_back( len,
179  &m_name[0],
180  &m_read_bp[0],
181  &m_read_q[0],
185  }
187  {
188  output->push_back( len,
189  &m_name[0],
190  &m_read_bp[0],
191  &m_read_q[0],
195  }
197  {
198  output->push_back( len,
199  &m_name[0],
200  &m_read_bp[0],
201  &m_read_q[0],
205  }
206 
207  n_bps += read_mult * len;
208  n_reads += read_mult;
209  }
210  return n_reads;
211 }
212 
214  const QualityEncoding qualities,
215  const uint32 max_reads,
216  const uint32 max_read_len,
217  const ReadEncoding flags)
218  : ReadDataFile_FASTQ_parser(read_file_name, qualities, max_reads, max_read_len, flags)
219 {
220  m_file = gzopen(read_file_name, "r");
221  if (!m_file) {
223  } else {
225  }
226 
227  gzbuffer(m_file, m_buffer_size);
228 }
229 
230 static float time = 0.0f;
231 
233 {
234  m_buffer_size = gzread(m_file, &m_buffer[0], (uint32)m_buffer.size());
235 
236  if (m_buffer_size <= 0)
237  {
238  // check for EOF separately; zlib will not always return Z_STREAM_END at EOF below
239  if (gzeof(m_file))
240  {
241  return FILE_EOF;
242  } else {
243  // ask zlib what happened and inform the user
244  int err;
245  const char *msg;
246 
247  msg = gzerror(m_file, &err);
248  // we're making the assumption that we never see Z_STREAM_END here
249  assert(err != Z_STREAM_END);
250 
251  log_error(stderr, "error processing FASTQ file: zlib error %d (%s)\n", err, msg);
252  return FILE_STREAM_ERROR;
253  }
254  }
255  return FILE_OK;
256 }
257 
261 
262 } // namespace io
263 } // namespace nvbio