NVBIO
Main Page
Modules
Classes
Examples
File List
File Members
•
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
nvbio
io
reads
reads.h
Go to the documentation of this file.
1
/*
2
* nvbio
3
* Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4
*
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions are met:
7
* * Redistributions of source code must retain the above copyright
8
* notice, this list of conditions and the following disclaimer.
9
* * Redistributions in binary form must reproduce the above copyright
10
* notice, this list of conditions and the following disclaimer in the
11
* documentation and/or other materials provided with the distribution.
12
* * Neither the name of the NVIDIA CORPORATION nor the
13
* names of its contributors may be used to endorse or promote products
14
* derived from this software without specific prior written permission.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
*/
27
28
#pragma once
29
30
#include <
nvbio/basic/strided_iterator.h
>
31
#include <
nvbio/basic/packedstream.h
>
32
#include <
nvbio/basic/vector_view.h
>
33
#include <
nvbio/strings/string_set.h
>
34
#include <stdio.h>
35
#include <stdlib.h>
36
#include <vector>
37
38
namespace
nvbio {
39
namespace
io {
40
58
61
73
74
// describes the quality encoding for a given read file
75
enum
QualityEncoding
76
{
77
// phred quality
78
Phred
= 0,
79
// phred quality + 33
80
Phred33
= 1,
81
// phred quality + 64
82
Phred64
= 2,
83
Solexa
= 3,
84
};
85
86
// a set of flags describing the types of supported read strands
87
enum
ReadEncoding
88
{
89
FORWARD
= 0x0001,
90
REVERSE
= 0x0002,
91
FORWARD_COMPLEMENT
= 0x0004,
92
REVERSE_COMPLEMENT
= 0x0008,
93
};
94
95
// how mates of a paired-end read are encoded
96
// F = forward, R = reverse
97
enum
PairedEndPolicy
98
{
99
PE_POLICY_FF
= 0,
100
PE_POLICY_FR
= 1,
101
PE_POLICY_RF
= 2,
102
PE_POLICY_RR
= 3,
103
};
104
116
template
<
117
typename
IndexIterator,
118
typename
ReadStorageIterator,
119
typename
QualStorageIterator,
120
typename
NameStorageIterator>
121
struct
ReadDataView
122
{
123
typedef
IndexIterator
index_iterator
;
124
typedef
typename
to_const<index_iterator>::type
const_index_iterator
;
125
126
typedef
ReadStorageIterator
read_storage_iterator
;
127
typedef
typename
to_const<read_storage_iterator>::type
const_read_storage_iterator
;
128
129
typedef
QualStorageIterator
qual_storage_iterator
;
130
typedef
typename
to_const<qual_storage_iterator>::type
const_qual_storage_iterator
;
131
132
typedef
NameStorageIterator
name_storage_iterator
;
133
typedef
typename
to_const<name_storage_iterator>::type
const_name_storage_iterator
;
134
135
// symbol size for reads
136
static
const
uint32
READ_BITS
= 4;
137
// big endian?
138
static
const
bool
HI_BITS
=
false
;
// deprecated
139
// big endian?
140
static
const
bool
READ_BIG_ENDIAN
=
false
;
141
// symbols per word
142
static
const
uint32
READ_SYMBOLS_PER_WORD
= (4*
sizeof
(
uint32
))/
READ_BITS
;
143
144
typedef
PackedStream
<
145
read_storage_iterator
,
uint8
,
READ_BITS
,
READ_BIG_ENDIAN
>
read_stream_type
;
146
typedef
PackedStream
<
147
const_read_storage_iterator
,
uint8
,
READ_BITS
,
READ_BIG_ENDIAN
>
const_read_stream_type
;
148
149
typedef
typename
read_stream_type::iterator
read_stream_iterator
;
150
typedef
typename
const_read_stream_type::iterator
const_read_stream_iterator
;
151
152
typedef
vector_view<read_stream_iterator>
read_string
;
153
typedef
vector_view<const_read_stream_iterator>
const_read_string
;
154
155
typedef
ConcatenatedStringSet
<
156
read_stream_iterator
,
157
index_iterator
>
read_string_set_type
;
158
159
typedef
ConcatenatedStringSet
<
160
const_read_stream_iterator
,
161
const_index_iterator
>
const_read_string_set_type
;
162
163
typedef
ConcatenatedStringSet
<
164
qual_storage_iterator
,
165
index_iterator
>
qual_string_set_type
;
166
167
typedef
ConcatenatedStringSet
<
168
const_qual_storage_iterator
,
169
const_index_iterator
>
const_qual_string_set_type
;
170
171
typedef
ConcatenatedStringSet
<
172
name_storage_iterator
,
173
index_iterator
>
name_string_set_type
;
174
175
typedef
ConcatenatedStringSet
<
176
const_name_storage_iterator
,
177
const_index_iterator
>
const_name_string_set_type
;
178
181
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
182
ReadDataView
()
183
:
m_n_reads
(0),
184
m_name_stream_len
(0),
185
m_read_stream_len
(0),
186
m_read_stream_words
(0),
187
m_min_read_len
(
uint32
(-1)),
188
m_max_read_len
(0),
189
m_avg_read_len
(0)
190
{};
191
194
template
<
195
typename
InIndexIterator,
196
typename
InReadIterator,
197
typename
InQualIterator,
198
typename
InNameIterator>
199
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
200
ReadDataView
(
const
ReadDataView<InIndexIterator,InReadIterator,InQualIterator,InNameIterator>
& in)
201
:
m_n_reads
(in.
m_n_reads
),
202
m_name_stream
(NameStorageIterator(in.
m_name_stream
)),
203
m_name_stream_len
(in.
m_name_stream_len
),
204
m_name_index
(IndexIterator(in.
m_name_index
)),
205
m_read_stream
(ReadStorageIterator(in.
m_read_stream
)),
206
m_read_stream_len
(in.
m_read_stream_len
),
207
m_read_stream_words
(in.
m_read_stream_words
),
208
m_read_index
(IndexIterator(in.
m_read_index
)),
209
m_qual_stream
(QualStorageIterator(in.
m_qual_stream
)),
210
m_min_read_len
(in.
m_min_read_len
),
211
m_max_read_len
(in.
m_max_read_len
),
212
m_avg_read_len
(in.
m_avg_read_len
)
213
{}
214
215
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
index_iterator
name_index
() {
return
m_name_index
; }
216
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
index_iterator
read_index
() {
return
m_read_index
; }
217
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
name_storage_iterator
name_stream
() {
return
m_name_stream
; }
218
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
read_storage_iterator
read_stream_storage
() {
return
m_read_stream
; }
219
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
read_stream_type
read_stream
() {
return
read_stream_type
(
m_read_stream
); }
220
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
qual_storage_iterator
qual_stream
() {
return
m_qual_stream
; }
221
222
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_index_iterator
const_name_index
()
const
{
return
m_name_index
; }
223
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_index_iterator
const_read_index
()
const
{
return
m_read_index
; }
224
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_name_storage_iterator
const_name_stream
()
const
{
return
m_name_stream
; }
225
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_read_storage_iterator
const_read_stream_storage
()
const
{
return
m_read_stream
; }
226
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_read_stream_type
const_read_stream
()
const
{
return
const_read_stream_type
(
m_read_stream
); }
227
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_qual_storage_iterator
const_qual_stream
()
const
{
return
m_qual_stream
; }
228
229
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_index_iterator
name_index
()
const
{
return
const_name_index
(); }
230
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_index_iterator
read_index
()
const
{
return
const_read_index
(); }
231
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_name_storage_iterator
name_stream
()
const
{
return
const_name_stream
(); }
232
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_read_storage_iterator
read_stream_storage
()
const
{
return
const_read_stream_storage
(); }
233
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_read_stream_type
read_stream
()
const
{
return
const_read_stream
(); }
234
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_qual_storage_iterator
qual_stream
()
const
{
return
const_qual_stream
(); }
235
236
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
uint32
size
()
const
{
return
m_n_reads
; }
237
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
uint32
bps
()
const
{
return
m_read_stream_len
; }
238
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
uint32
words
()
const
{
return
m_read_stream_words
; }
239
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
uint32
name_stream_len
()
const
{
return
m_name_stream_len
; }
240
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
uint32
max_read_len
()
const
{
return
m_max_read_len
; }
241
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
uint32
min_read_len
()
const
{
return
m_min_read_len
; }
242
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
uint32
avg_read_len
()
const
{
return
m_avg_read_len
; }
243
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
uint2
get_range
(
const
uint32
i)
const
{
return
make_uint2(
m_read_index
[i],
m_read_index
[i+1]); }
244
247
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
read_string_set_type
read_string_set
()
248
{
249
return
read_string_set_type
(
250
size
(),
251
read_stream
().
begin
(),
252
read_index
() );
253
}
254
257
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_read_string_set_type
read_string_set
()
const
258
{
259
return
const_read_string_set_type
(
260
size
(),
261
read_stream
().
begin
(),
262
read_index
() );
263
}
264
267
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_read_string_set_type
const_read_string_set
()
const
268
{
269
return
const_read_string_set_type
(
270
size
(),
271
read_stream
().
begin
(),
272
read_index
() );
273
}
274
277
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
read_string
get_read
(
const
uint32
i)
278
{
279
const
uint2 read_range =
get_range
( i );
280
return
read_string
( read_range.y - read_range.x,
read_stream
().
begin
() + read_range.x );
281
}
282
285
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
read_string
get_read
(
const
uint32
i)
const
286
{
287
const
uint2 read_range =
get_range
( i );
288
return
const_read_string
( read_range.y - read_range.x,
read_stream
().
begin
() + read_range.x );
289
}
290
293
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
qual_string_set_type
qual_string_set
()
294
{
295
return
qual_string_set_type
(
296
size
(),
297
qual_stream
(),
298
read_index
() );
299
}
300
303
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_qual_string_set_type
qual_string_set
()
const
304
{
305
return
const_qual_string_set_type
(
306
size
(),
307
qual_stream
(),
308
read_index
() );
309
}
310
313
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_qual_string_set_type
const_qual_string_set
()
const
314
{
315
return
const_qual_string_set_type
(
316
size
(),
317
qual_stream
(),
318
read_index
() );
319
}
320
323
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
name_string_set_type
name_string_set
()
324
{
325
return
name_string_set_type
(
326
size
(),
327
name_stream
(),
328
name_index
() );
329
}
330
333
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_name_string_set_type
name_string_set
()
const
334
{
335
return
const_name_string_set_type
(
336
size
(),
337
name_stream
(),
338
name_index
() );
339
}
340
343
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_name_string_set_type
const_name_string_set
()
const
344
{
345
return
const_name_string_set_type
(
346
size
(),
347
name_stream
(),
348
name_index
() );
349
}
350
351
public
:
352
// number of reads in this struct
353
uint32
m_n_reads
;
354
355
// a pointer to a buffer containing the names of all the reads in this batch
356
name_storage_iterator
m_name_stream
;
357
// the length (in bytes) of the name_stream buffer
358
uint32
m_name_stream_len
;
359
// an array of uint32 with the byte indices of the starting locations of each name in name_stream
360
index_iterator
m_name_index
;
361
362
// a pointer to a buffer containing the read data
363
// note that this could point at either host or device memory
364
read_storage_iterator
m_read_stream
;
365
// the length of read_stream in base pairs
366
uint32
m_read_stream_len
;
367
// the number of words in read_stream
368
uint32
m_read_stream_words
;
369
// an array of uint32 with the indices of the starting locations of each read in read_stream (in base pairs)
370
index_iterator
m_read_index
;
371
372
// a pointer to a buffer containing quality data
373
// (the indices in m_read_index are also valid for this buffer)
374
qual_storage_iterator
m_qual_stream
;
375
376
// statistics on the reads: minimum size, maximum size, average size
377
uint32
m_min_read_len
;
378
uint32
m_max_read_len
;
379
uint32
m_avg_read_len
;
380
};
381
385
typedef
ReadDataView<uint32*,uint32*,char*,char*>
ReadDataCore
;
386
391
struct
ReadData
:
public
ReadDataCore
392
{
393
typedef
ReadDataView<uint32*,uint32*,char*,char*>
plain_view_type
;
394
typedef
ReadDataView<const uint32*,const uint32*,const char*,const char*>
const_plain_view_type
;
395
398
ReadData
() :
ReadDataCore
()
399
{
400
m_name_stream
= NULL;
401
m_name_index
= NULL;
402
m_read_stream
= NULL;
403
m_read_index
= NULL;
404
m_qual_stream
= NULL;
405
}
406
409
virtual
~ReadData
() {}
410
};
411
415
struct
ReadDataRAM
:
public
ReadData
416
{
419
enum
StrandOp
420
{
421
NO_OP
= 0x0000,
422
REVERSE_OP
= 0x0001,
423
COMPLEMENT_OP
= 0x0002,
424
REVERSE_COMPLEMENT_OP
= 0x0003,
425
};
426
427
ReadDataRAM
();
428
431
void
reserve
(
const
uint32
n_reads,
const
uint32
n_bps);
432
443
void
push_back
(
uint32
read_len,
444
const
char
* name,
445
const
uint8
* base_pairs,
446
const
uint8
* quality,
447
const
QualityEncoding
quality_encoding,
448
const
uint32
truncate_read_len,
449
const
StrandOp
conversion_flags);
450
453
void
end_batch
(
void
);
454
455
std::vector<uint32>
m_read_vec
;
456
std::vector<uint32>
m_read_index_vec
;
457
std::vector<char>
m_qual_vec
;
458
std::vector<char>
m_name_vec
;
459
std::vector<uint32>
m_name_index_vec
;
460
};
461
465
struct
ReadDataDevice
:
public
ReadData
466
{
467
enum
{
468
READS
= 0x01,
469
QUALS
= 0x02,
470
};
471
474
ReadDataDevice
(
const
ReadData
& host_data,
const
uint32
flags =
READS
);
475
478
~ReadDataDevice
();
479
480
uint64
allocated
()
const
{
return
m_allocated; }
481
482
private
:
483
uint64
m_allocated;
484
};
485
486
typedef
ReadDataRAM
ReadDataHost
;
487
typedef
ReadDataDevice
ReadDataCUDA
;
488
493
struct
ReadDataStream
494
{
495
ReadDataStream
(
uint32
truncate_read_len =
uint32
(-1))
496
:
m_truncate_read_len
(truncate_read_len)
497
{
498
};
499
502
virtual
~ReadDataStream
() {}
503
506
virtual
ReadData
*
next
(
const
uint32
batch_size,
const
uint32
batch_bps =
uint32
(-1)) = 0;
507
510
virtual
bool
is_ok
() = 0;
511
512
// maximum length of a read; longer reads are truncated to this size
513
uint32
m_truncate_read_len
;
514
};
515
516
529
ReadDataStream
*
open_read_file
(
const
char
* read_file_name,
530
const
QualityEncoding
qualities,
531
const
uint32
max_reads =
uint32
(-1),
532
const
uint32
max_read_len =
uint32
(-1),
533
const
ReadEncoding
flags =
REVERSE
);
534
537
538
}
// namespace io
539
542
inline
543
io::ReadData::plain_view_type
plain_view
(
io::ReadData
& read_data)
544
{
545
return
io::ReadData::plain_view_type
( read_data );
546
}
547
550
inline
551
io::ReadData::const_plain_view_type
plain_view
(
const
io::ReadData
& read_data)
552
{
553
return
io::ReadData::const_plain_view_type
( read_data );
554
}
555
556
}
// namespace nvbio
Generated on Wed Feb 25 2015 08:33:00 for NVBIO by
1.8.4