NVBIO
Main Page
Modules
Classes
Examples
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
nvbio
io
sequence
sequence_txt.h
Go to the documentation of this file.
1
/*
2
* nvbio
3
* Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4
*
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions are met:
7
* * Redistributions of source code must retain the above copyright
8
* notice, this list of conditions and the following disclaimer.
9
* * Redistributions in binary form must reproduce the above copyright
10
* notice, this list of conditions and the following disclaimer in the
11
* documentation and/or other materials provided with the distribution.
12
* * Neither the name of the NVIDIA CORPORATION nor the
13
* names of its contributors may be used to endorse or promote products
14
* derived from this software without specific prior written permission.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
*/
27
28
#pragma once
29
30
#include <
nvbio/io/sequence/sequence.h
>
31
#include <
nvbio/io/sequence/sequence_priv.h
>
32
#include <
nvbio/io/output_stream.h
>
33
#include <
nvbio/basic/console.h
>
34
35
#include <zlib/zlib.h>
36
37
namespace
nvbio {
38
namespace
io {
39
42
45
48
49
// SequenceDataFile from a FASTQ file
50
// contains the code to parse FASTQ files and dump the results into a SequenceDataRAM object
51
// file access is done via derived classes
52
struct
SequenceDataFile_TXT
:
public
SequenceDataFile
53
{
54
protected
:
55
SequenceDataFile_TXT
(
56
const
char
* read_file_name,
57
const
Options
& options,
58
const
uint32
buffer_size = 64536u)
59
:
SequenceDataFile
( options ),
60
m_file_name
(read_file_name),
61
m_buffer
(buffer_size),
62
m_buffer_size
(buffer_size),
63
m_buffer_pos
(buffer_size),
64
m_line
(0)
65
{};
66
67
// get next read chunk from file and parse it (up to max reads)
68
// this can cause m_file_state to change
69
virtual
int
nextChunk
(
struct
SequenceDataEncoder
* output,
uint32
max_reads,
uint32
max_bps);
70
71
// fill m_buffer with data from the file, return the new file state
72
// this should only report EOF when no more bytes could be read
73
// derived classes should override this method to return actual file data
74
virtual
FileState
fillBuffer
(
void
) = 0;
75
76
virtual
bool
gets
(
char
* buffer,
int
len) = 0;
77
78
private
:
79
// get next character from file
80
uint8
get
();
81
82
protected
:
83
// file name we're reading from
84
const
char
*
m_file_name
;
85
86
// buffers input from the fastq file
87
std::vector<char>
m_buffer
;
88
uint32
m_buffer_size
;
89
uint32
m_buffer_pos
;
90
91
// counter for which line we're at
92
uint32
m_line
;
93
94
// error reporting from the parser: stores the character that generated an error
95
uint8
m_error_char
;
96
97
// temp buffers for data coming in from the FASTQ file: read name, base pairs and qualities
98
std::vector<char>
m_name
;
99
std::vector<uint8>
m_read_bp
;
100
std::vector<uint8>
m_read_q
;
101
};
102
103
// loader for gzipped files
104
// this also works for plain uncompressed files, as zlib does that transparently
105
struct
SequenceDataFile_TXT_gz
:
public
SequenceDataFile_TXT
106
{
107
SequenceDataFile_TXT_gz
(
108
const
char
* read_file_name,
109
const
Options
& options,
110
const
uint32
buffer_size = 64536u);
111
112
~SequenceDataFile_TXT_gz
();
113
116
virtual
FileState
fillBuffer
(
void
);
117
118
virtual
bool
gets
(
char
* buffer,
int
len) {
return
gzgets
( m_file, buffer, len ) != NULL; };
119
122
virtual
bool
rewind
();
123
124
private
:
125
gzFile
m_file;
126
};
127
132
struct
SequenceDataOutputFile_TXT
:
SequenceDataOutputStream
133
{
136
SequenceDataOutputFile_TXT
(
137
const
char
* file_name,
138
const
char
* compressor,
139
const
char
* options);
140
143
void
next
(
const
SequenceDataHost
& sequence_data);
144
147
bool
is_ok
();
148
149
private
:
150
// file name we're reading from
151
const
char
* m_file_name;
152
OutputStream
* m_file;
153
};
154
158
159
inline
uint8
SequenceDataFile_TXT::get(
void
)
160
{
161
if
(
m_buffer_pos
>=
m_buffer_size
)
162
{
163
// grab more data from the underlying file
164
m_file_state
=
fillBuffer
();
165
m_buffer_pos
= 0;
166
167
// if we failed to read more data, return \0
168
if
(
m_file_state
!=
FILE_OK
)
169
return
0;
170
}
171
172
return
m_buffer
[
m_buffer_pos
++];
173
}
174
175
}
// namespace io
176
}
// namespace nvbio
Generated on Wed Feb 25 2015 08:33:01 for NVBIO by
1.8.4