NVBIO
Main Page
Modules
Classes
Examples
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
nvbio
io
reads
reads_fastq.h
Go to the documentation of this file.
1
/*
2
* nvbio
3
* Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4
*
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions are met:
7
* * Redistributions of source code must retain the above copyright
8
* notice, this list of conditions and the following disclaimer.
9
* * Redistributions in binary form must reproduce the above copyright
10
* notice, this list of conditions and the following disclaimer in the
11
* documentation and/or other materials provided with the distribution.
12
* * Neither the name of the NVIDIA CORPORATION nor the
13
* names of its contributors may be used to endorse or promote products
14
* derived from this software without specific prior written permission.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
*/
27
28
#pragma once
29
30
#include <
nvbio/io/reads/reads.h
>
31
#include <
nvbio/io/reads/reads_priv.h
>
32
#include <
nvbio/basic/console.h
>
33
34
#include <zlib/zlib.h>
35
36
namespace
nvbio {
37
namespace
io {
38
41
44
47
48
// ReadDataFile from a FASTQ file
49
// contains the code to parse FASTQ files and dump the results into a ReadDataRAM object
50
// file access is done via derived classes
51
struct
ReadDataFile_FASTQ_parser
:
public
ReadDataFile
52
{
53
protected
:
54
ReadDataFile_FASTQ_parser
(
const
char
*read_file_name,
55
const
QualityEncoding
quality_encoding,
56
const
uint32
max_reads,
57
const
uint32
max_read_len,
58
const
ReadEncoding
flags,
59
const
uint32
buffer_size = 64536u)
60
:
ReadDataFile
(max_reads, max_read_len, flags),
61
m_file_name
(read_file_name),
62
m_quality_encoding
(quality_encoding),
63
m_buffer
(buffer_size),
64
m_buffer_size
(buffer_size),
65
m_buffer_pos
(buffer_size),
66
m_line
(0),
67
m_name
( 1024*1024 ),
68
m_read_bp
( 1024*1024 ),
69
m_read_q
( 1024*1024 )
70
{};
71
72
// get next read chunk from file and parse it (up to max reads)
73
// this can cause m_file_state to change
74
virtual
int
nextChunk
(
ReadDataRAM
*output,
uint32
max_reads,
uint32
max_bps);
75
76
// fill m_buffer with data from the file, return the new file state
77
// this should only report EOF when no more bytes could be read
78
// derived classes should override this method to return actual file data
79
virtual
FileState
fillBuffer
(
void
) = 0;
80
81
private
:
82
// get next character from file
83
uint8
get
();
84
85
protected
:
86
// file name we're reading from
87
const
char
*
m_file_name
;
88
// the quality encoding we're using (for FASTQ, this comes from the command line or defaults to Phred33)
89
QualityEncoding
m_quality_encoding
;
90
91
// buffers input from the fastq file
92
std::vector<char>
m_buffer
;
93
uint32
m_buffer_size
;
94
uint32
m_buffer_pos
;
95
96
// counter for which line we're at
97
uint32
m_line
;
98
99
// error reporting from the parser: stores the character that generated an error
100
uint8
m_error_char
;
101
102
// temp buffers for data coming in from the FASTQ file: read name, base pairs and qualities
103
std::vector<char>
m_name
;
104
std::vector<uint8>
m_read_bp
;
105
std::vector<uint8>
m_read_q
;
106
};
107
108
// loader for gzipped files
109
// this also works for plain uncompressed files, as zlib does that transparently
110
struct
ReadDataFile_FASTQ_gz
:
public
ReadDataFile_FASTQ_parser
111
{
112
ReadDataFile_FASTQ_gz
(
const
char
*read_file_name,
113
const
QualityEncoding
qualities,
114
const
uint32
max_reads,
115
const
uint32
max_read_len,
116
const
ReadEncoding
flags);
117
118
virtual
FileState
fillBuffer
(
void
);
119
120
private
:
121
gzFile
m_file;
122
};
123
127
128
inline
uint8
ReadDataFile_FASTQ_parser::get(
void
)
129
{
130
if
(
m_buffer_pos
>=
m_buffer_size
)
131
{
132
// grab more data from the underlying file
133
m_file_state
=
fillBuffer
();
134
m_buffer_pos
= 0;
135
136
// if we failed to read more data, return \0
137
if
(
m_file_state
!=
FILE_OK
)
138
return
0;
139
}
140
141
return
m_buffer
[
m_buffer_pos
++];
142
}
143
144
}
// namespace io
145
}
// namespace nvbio
Generated on Wed Feb 25 2015 08:33:00 for NVBIO by
1.8.4