NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
reads_txt.cpp
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
29 #include <nvbio/basic/types.h>
30 
31 #include <string.h>
32 #include <ctype.h>
33 
34 namespace nvbio {
35 namespace io {
36 
39 
42 
45 
46 int ReadDataFile_TXT::nextChunk(ReadDataRAM *output, uint32 max_reads, uint32 max_bps)
47 {
48  const char* name = "";
49 
50  uint32 n_reads = 0;
51  uint32 n_bps = 0;
52 
53  const uint32 read_mult =
54  ((m_flags & FORWARD) ? 1u : 0u) +
55  ((m_flags & REVERSE) ? 1u : 0u) +
56  ((m_flags & FORWARD_COMPLEMENT) ? 1u : 0u) +
57  ((m_flags & REVERSE_COMPLEMENT) ? 1u : 0u);
58 
59  while (n_reads + read_mult <= max_reads &&
60  n_bps + read_mult*ReadDataFile::LONG_READ <= max_bps)
61  {
62  // reset the read
63  m_read_bp.erase( m_read_bp.begin(), m_read_bp.end() );
64 
65  // read an entire line
66  for (uint8 c = get(); c != '\n' && c != 0; c = get())
67  {
68  // if (isgraph(c))
69  if (c >= 0x21 && c <= 0x7E)
70  m_read_bp.push_back( c );
71  }
72 
73  ++m_line;
74 
75  if (m_read_q.size() < m_read_bp.size())
76  {
77  // extend the quality score vector if needed
78  const size_t old_size = m_read_q.size();
79  m_read_q.resize( m_read_bp.size() );
80  for (size_t i = old_size; i < m_read_bp.size(); ++i)
81  m_read_q[i] = char(255);
82  }
83 
84  if (m_read_bp.size())
85  {
86  if (m_flags & FORWARD)
87  {
88  output->push_back(uint32( m_read_bp.size() ),
89  name,
90  &m_read_bp[0],
91  &m_read_q[0],
95  }
96  if (m_flags & REVERSE)
97  {
98  output->push_back(uint32( m_read_bp.size() ),
99  name,
100  &m_read_bp[0],
101  &m_read_q[0],
105  }
107  {
108  output->push_back(uint32( m_read_bp.size() ),
109  name,
110  &m_read_bp[0],
111  &m_read_q[0],
115  }
117  {
118  output->push_back(uint32( m_read_bp.size() ),
119  name,
120  &m_read_bp[0],
121  &m_read_q[0],
125  }
126 
127  n_bps += read_mult * (uint32)m_read_bp.size();
128  n_reads += read_mult;
129  }
130 
131  // check for end-of-file
132  if (m_file_state != FILE_OK)
133  break;
134  }
135  return n_reads;
136 }
137 
138 ReadDataFile_TXT_gz::ReadDataFile_TXT_gz(const char *read_file_name,
139  const QualityEncoding qualities,
140  const uint32 max_reads,
141  const uint32 max_read_len,
142  const ReadEncoding flags,
143  const uint32 buffer_size)
144  : ReadDataFile_TXT(read_file_name, qualities, max_reads, max_read_len, flags, buffer_size)
145 {
146  m_file = gzopen(read_file_name, "r");
147  if (!m_file) {
149  } else {
151  }
152 
153  gzbuffer(m_file, m_buffer_size);
154 }
155 
157 {
158  m_buffer_size = gzread(m_file, &m_buffer[0], (uint32)m_buffer.size());
159  if (m_buffer_size <= 0)
160  {
161  // check for EOF separately; zlib will not always return Z_STREAM_END at EOF below
162  if (gzeof(m_file))
163  {
164  return FILE_EOF;
165  } else {
166  // ask zlib what happened and inform the user
167  int err;
168  const char *msg;
169 
170  msg = gzerror(m_file, &err);
171  // we're making the assumption that we never see Z_STREAM_END here
172  assert(err != Z_STREAM_END);
173 
174  log_error(stderr, "error processing TXT file: zlib error %d (%s)\n", err, msg);
175  return FILE_STREAM_ERROR;
176  }
177  }
178 
179  return FILE_OK;
180 }
181 
185 
186 } // namespace io
187 } // namespace nvbio