NVBIO
Main Page
Modules
Classes
Examples
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
nvbio
io
reads
reads_txt.h
Go to the documentation of this file.
1
/*
2
* nvbio
3
* Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4
*
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions are met:
7
* * Redistributions of source code must retain the above copyright
8
* notice, this list of conditions and the following disclaimer.
9
* * Redistributions in binary form must reproduce the above copyright
10
* notice, this list of conditions and the following disclaimer in the
11
* documentation and/or other materials provided with the distribution.
12
* * Neither the name of the NVIDIA CORPORATION nor the
13
* names of its contributors may be used to endorse or promote products
14
* derived from this software without specific prior written permission.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
*/
27
28
#pragma once
29
30
#include <
nvbio/io/reads/reads.h
>
31
#include <
nvbio/io/reads/reads_priv.h
>
32
#include <
nvbio/basic/console.h
>
33
34
#include <zlib/zlib.h>
35
36
namespace
nvbio {
37
namespace
io {
38
41
44
47
48
// ReadDataFile from a FASTQ file
49
// contains the code to parse FASTQ files and dump the results into a ReadDataRAM object
50
// file access is done via derived classes
51
struct
ReadDataFile_TXT
:
public
ReadDataFile
52
{
53
protected
:
54
ReadDataFile_TXT
(
const
char
*read_file_name,
55
const
QualityEncoding
quality_encoding,
56
const
uint32
max_reads,
57
const
uint32
max_read_len,
58
const
ReadEncoding
flags,
59
const
uint32
buffer_size = 64536u)
60
:
ReadDataFile
(max_reads, max_read_len, flags),
61
m_file_name
(read_file_name),
62
m_quality_encoding
(quality_encoding),
63
m_buffer
(buffer_size),
64
m_buffer_size
(buffer_size),
65
m_buffer_pos
(buffer_size),
66
m_line
(0)
67
{};
68
69
// get next read chunk from file and parse it (up to max reads)
70
// this can cause m_file_state to change
71
virtual
int
nextChunk
(
ReadDataRAM
*output,
uint32
max_reads,
uint32
max_bps);
72
73
// fill m_buffer with data from the file, return the new file state
74
// this should only report EOF when no more bytes could be read
75
// derived classes should override this method to return actual file data
76
virtual
FileState
fillBuffer
(
void
) = 0;
77
78
private
:
79
// get next character from file
80
uint8
get
();
81
82
protected
:
83
// file name we're reading from
84
const
char
*
m_file_name
;
85
// the quality encoding we're using (for FASTQ, this comes from the command line or defaults to Phred33)
86
QualityEncoding
m_quality_encoding
;
87
88
// buffers input from the fastq file
89
std::vector<char>
m_buffer
;
90
uint32
m_buffer_size
;
91
uint32
m_buffer_pos
;
92
93
// counter for which line we're at
94
uint32
m_line
;
95
96
// error reporting from the parser: stores the character that generated an error
97
uint8
m_error_char
;
98
99
// temp buffers for data coming in from the FASTQ file: read name, base pairs and qualities
100
std::vector<char>
m_name
;
101
std::vector<uint8>
m_read_bp
;
102
std::vector<uint8>
m_read_q
;
103
};
104
105
// loader for gzipped files
106
// this also works for plain uncompressed files, as zlib does that transparently
107
struct
ReadDataFile_TXT_gz
:
public
ReadDataFile_TXT
108
{
109
ReadDataFile_TXT_gz
(
const
char
*read_file_name,
110
const
QualityEncoding
qualities,
111
const
uint32
max_reads,
112
const
uint32
max_read_len,
113
const
ReadEncoding
flags,
114
const
uint32
buffer_size = 64536u);
115
116
virtual
FileState
fillBuffer
(
void
);
117
118
private
:
119
gzFile
m_file;
120
};
121
125
126
inline
uint8
ReadDataFile_TXT::get(
void
)
127
{
128
if
(
m_buffer_pos
>=
m_buffer_size
)
129
{
130
// grab more data from the underlying file
131
m_file_state
=
fillBuffer
();
132
m_buffer_pos
= 0;
133
134
// if we failed to read more data, return \0
135
if
(
m_file_state
!=
FILE_OK
)
136
return
0;
137
}
138
139
return
m_buffer
[
m_buffer_pos
++];
140
}
141
142
}
// namespace io
143
}
// namespace nvbio
Generated on Wed Feb 25 2015 08:33:00 for NVBIO by
1.8.4