NVBIO
Main Page
Modules
Classes
Examples
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
nvbio
io
reads
reads.h
Go to the documentation of this file.
1
/*
2
* nvbio
3
* Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4
*
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions are met:
7
* * Redistributions of source code must retain the above copyright
8
* notice, this list of conditions and the following disclaimer.
9
* * Redistributions in binary form must reproduce the above copyright
10
* notice, this list of conditions and the following disclaimer in the
11
* documentation and/or other materials provided with the distribution.
12
* * Neither the name of the NVIDIA CORPORATION nor the
13
* names of its contributors may be used to endorse or promote products
14
* derived from this software without specific prior written permission.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
*/
27
28
#pragma once
29
30
#include <
nvbio/basic/strided_iterator.h
>
31
#include <
nvbio/basic/packedstream.h
>
32
#include <
nvbio/basic/vector_view.h
>
33
#include <
nvbio/strings/string_set.h
>
34
#include <stdio.h>
35
#include <stdlib.h>
36
#include <vector>
37
38
namespace
nvbio {
39
namespace
io {
40
58
61
73
74
// describes the quality encoding for a given read file
75
enum
QualityEncoding
76
{
77
// phred quality
78
Phred
= 0,
79
// phred quality + 33
80
Phred33
= 1,
81
// phred quality + 64
82
Phred64
= 2,
83
Solexa
= 3,
84
};
85
86
// a set of flags describing the types of supported read strands
87
enum
ReadEncoding
88
{
89
FORWARD
= 0x0001,
90
REVERSE
= 0x0002,
91
FORWARD_COMPLEMENT
= 0x0004,
92
REVERSE_COMPLEMENT
= 0x0008,
93
};
94
95
// how mates of a paired-end read are encoded
96
// F = forward, R = reverse
97
enum
PairedEndPolicy
98
{
99
PE_POLICY_FF
= 0,
100
PE_POLICY_FR
= 1,
101
PE_POLICY_RF
= 2,
102
PE_POLICY_RR
= 3,
103
};
104
116
template
<
117
typename
IndexIterator,
118
typename
ReadStorageIterator,
119
typename
QualStorageIterator,
120
typename
NameStorageIterator>
121
struct
ReadDataView
122
{
123
typedef
IndexIterator
index_iterator
;
124
typedef
typename
to_const<index_iterator>::type
const_index_iterator
;
125
126
typedef
ReadStorageIterator
read_storage_iterator
;
127
typedef
typename
to_const<read_storage_iterator>::type
const_read_storage_iterator
;
128
129
typedef
QualStorageIterator
qual_storage_iterator
;
130
typedef
typename
to_const<qual_storage_iterator>::type
const_qual_storage_iterator
;
131
132
typedef
NameStorageIterator
name_storage_iterator
;
133
typedef
typename
to_const<name_storage_iterator>::type
const_name_storage_iterator
;
134
135
// symbol size for reads
136
static
const
uint32
READ_BITS
= 4;
137
// big endian?
138
static
const
bool
HI_BITS
=
false
;
// deprecated
139
// big endian?
140
static
const
bool
READ_BIG_ENDIAN
=
false
;
141
// symbols per word
142
static
const
uint32
READ_SYMBOLS_PER_WORD
= (4*
sizeof
(
uint32
))/
READ_BITS
;
143
144
typedef
PackedStream
<
145
read_storage_iterator
,
uint8
,
READ_BITS
,
READ_BIG_ENDIAN
>
read_stream_type
;
146
typedef
PackedStream
<
147
const_read_storage_iterator
,
uint8
,
READ_BITS
,
READ_BIG_ENDIAN
>
const_read_stream_type
;
148
149
typedef
typename
read_stream_type::iterator
read_stream_iterator
;
150
typedef
typename
const_read_stream_type::iterator
const_read_stream_iterator
;
151
152
typedef
vector_view<read_stream_iterator>
read_string
;
153
typedef
vector_view<const_read_stream_iterator>
const_read_string
;
154
155
typedef
ConcatenatedStringSet
<
156
read_stream_iterator
,
157
index_iterator
>
read_string_set_type
;
158
159
typedef
ConcatenatedStringSet
<
160
const_read_stream_iterator
,
161
const_index_iterator
>
const_read_string_set_type
;
162
163
typedef
ConcatenatedStringSet
<
164
qual_storage_iterator
,
165
index_iterator
>
qual_string_set_type
;
166
167
typedef
ConcatenatedStringSet
<
168
const_qual_storage_iterator
,
169
const_index_iterator
>
const_qual_string_set_type
;
170
171
typedef
ConcatenatedStringSet
<
172
name_storage_iterator
,
173
index_iterator
>
name_string_set_type
;
174
175
typedef
ConcatenatedStringSet
<
176
const_name_storage_iterator
,
177
const_index_iterator
>
const_name_string_set_type
;
178
181
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
182
ReadDataView
()
183
:
m_n_reads
(0),
184
m_name_stream_len
(0),
185
m_read_stream_len
(0),
186
m_read_stream_words
(0),
187
m_min_read_len
(
uint32
(-1)),
188
m_max_read_len
(0),
189
m_avg_read_len
(0)
190
{};
191
194
template
<
195
typename
InIndexIterator,
196
typename
InReadIterator,
197
typename
InQualIterator,
198
typename
InNameIterator>
199
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
200
ReadDataView
(
const
ReadDataView<InIndexIterator,InReadIterator,InQualIterator,InNameIterator>
& in)
201
:
m_n_reads
(in.
m_n_reads
),
202
m_name_stream
(NameStorageIterator(in.
m_name_stream
)),
203
m_name_stream_len
(in.
m_name_stream_len
),
204
m_name_index
(IndexIterator(in.
m_name_index
)),
205
m_read_stream
(ReadStorageIterator(in.
m_read_stream
)),
206
m_read_stream_len
(in.
m_read_stream_len
),
207
m_read_stream_words
(in.
m_read_stream_words
),
208
m_read_index
(IndexIterator(in.
m_read_index
)),
209
m_qual_stream
(QualStorageIterator(in.
m_qual_stream
)),
210
m_min_read_len
(in.
m_min_read_len
),
211
m_max_read_len
(in.
m_max_read_len
),
212
m_avg_read_len
(in.
m_avg_read_len
)
213
{}
214
215
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
index_iterator
name_index
() {
return
m_name_index
; }
216
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
index_iterator
read_index
() {
return
m_read_index
; }
217
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
name_storage_iterator
name_stream
() {
return
m_name_stream
; }
218
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
read_storage_iterator
read_stream_storage
() {
return
m_read_stream
; }
219
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
read_stream_type
read_stream
() {
return
read_stream_type
(
m_read_stream
); }
220
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
qual_storage_iterator
qual_stream
() {
return
m_qual_stream
; }
221
222
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_index_iterator
const_name_index
()
const
{
return
m_name_index
; }
223
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_index_iterator
const_read_index
()
const
{
return
m_read_index
; }
224
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_name_storage_iterator
const_name_stream
()
const
{
return
m_name_stream
; }
225
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_read_storage_iterator
const_read_stream_storage
()
const
{
return
m_read_stream
; }
226
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_read_stream_type
const_read_stream
()
const
{
return
const_read_stream_type
(
m_read_stream
); }
227
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_qual_storage_iterator
const_qual_stream
()
const
{
return
m_qual_stream
; }
228
229
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_index_iterator
name_index
()
const
{
return
const_name_index
(); }
230
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_index_iterator
read_index
()
const
{
return
const_read_index
(); }
231
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_name_storage_iterator
name_stream
()
const
{
return
const_name_stream
(); }
232
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_read_storage_iterator
read_stream_storage
()
const
{
return
const_read_stream_storage
(); }
233
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_read_stream_type
read_stream
()
const
{
return
const_read_stream
(); }
234
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_qual_storage_iterator
qual_stream
()
const
{
return
const_qual_stream
(); }
235
236
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
uint32
size
()
const
{
return
m_n_reads
; }
237
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
uint32
bps
()
const
{
return
m_read_stream_len
; }
238
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
uint32
words
()
const
{
return
m_read_stream_words
; }
239
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
uint32
name_stream_len
()
const
{
return
m_name_stream_len
; }
240
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
uint32
max_read_len
()
const
{
return
m_max_read_len
; }
241
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
uint32
min_read_len
()
const
{
return
m_min_read_len
; }
242
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
uint32
avg_read_len
()
const
{
return
m_avg_read_len
; }
243
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
uint2
get_range
(
const
uint32
i)
const
{
return
make_uint2(
m_read_index
[i],
m_read_index
[i+1]); }
244
247
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
read_string_set_type
read_string_set
()
248
{
249
return
read_string_set_type
(
250
size
(),
251
read_stream
().
begin
(),
252
read_index
() );
253
}
254
257
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_read_string_set_type
read_string_set
()
const
258
{
259
return
const_read_string_set_type
(
260
size
(),
261
read_stream
().
begin
(),
262
read_index
() );
263
}
264
267
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_read_string_set_type
const_read_string_set
()
const
268
{
269
return
const_read_string_set_type
(
270
size
(),
271
read_stream
().
begin
(),
272
read_index
() );
273
}
274
277
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
read_string
get_read
(
const
uint32
i)
278
{
279
const
uint2 read_range =
get_range
( i );
280
return
read_string
( read_range.y - read_range.x,
read_stream
().
begin
() + read_range.x );
281
}
282
285
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
read_string
get_read
(
const
uint32
i)
const
286
{
287
const
uint2 read_range =
get_range
( i );
288
return
const_read_string
( read_range.y - read_range.x,
read_stream
().
begin
() + read_range.x );
289
}
290
293
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
qual_string_set_type
qual_string_set
()
294
{
295
return
qual_string_set_type
(
296
size
(),
297
qual_stream
(),
298
read_index
() );
299
}
300
303
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_qual_string_set_type
qual_string_set
()
const
304
{
305
return
const_qual_string_set_type
(
306
size
(),
307
qual_stream
(),
308
read_index
() );
309
}
310
313
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_qual_string_set_type
const_qual_string_set
()
const
314
{
315
return
const_qual_string_set_type
(
316
size
(),
317
qual_stream
(),
318
read_index
() );
319
}
320
323
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
name_string_set_type
name_string_set
()
324
{
325
return
name_string_set_type
(
326
size
(),
327
name_stream
(),
328
name_index
() );
329
}
330
333
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_name_string_set_type
name_string_set
()
const
334
{
335
return
const_name_string_set_type
(
336
size
(),
337
name_stream
(),
338
name_index
() );
339
}
340
343
NVBIO_HOST_DEVICE
NVBIO_FORCEINLINE
const_name_string_set_type
const_name_string_set
()
const
344
{
345
return
const_name_string_set_type
(
346
size
(),
347
name_stream
(),
348
name_index
() );
349
}
350
351
public
:
352
// number of reads in this struct
353
uint32
m_n_reads
;
354
355
// a pointer to a buffer containing the names of all the reads in this batch
356
name_storage_iterator
m_name_stream
;
357
// the length (in bytes) of the name_stream buffer
358
uint32
m_name_stream_len
;
359
// an array of uint32 with the byte indices of the starting locations of each name in name_stream
360
index_iterator
m_name_index
;
361
362
// a pointer to a buffer containing the read data
363
// note that this could point at either host or device memory
364
read_storage_iterator
m_read_stream
;
365
// the length of read_stream in base pairs
366
uint32
m_read_stream_len
;
367
// the number of words in read_stream
368
uint32
m_read_stream_words
;
369
// an array of uint32 with the indices of the starting locations of each read in read_stream (in base pairs)
370
index_iterator
m_read_index
;
371
372
// a pointer to a buffer containing quality data
373
// (the indices in m_read_index are also valid for this buffer)
374
qual_storage_iterator
m_qual_stream
;
375
376
// statistics on the reads: minimum size, maximum size, average size
377
uint32
m_min_read_len
;
378
uint32
m_max_read_len
;
379
uint32
m_avg_read_len
;
380
};
381
385
typedef
ReadDataView<uint32*,uint32*,char*,char*>
ReadDataCore
;
386
391
struct
ReadData
:
public
ReadDataCore
392
{
393
typedef
ReadDataView<uint32*,uint32*,char*,char*>
plain_view_type
;
394
typedef
ReadDataView<const uint32*,const uint32*,const char*,const char*>
const_plain_view_type
;
395
398
ReadData
() :
ReadDataCore
()
399
{
400
m_name_stream
= NULL;
401
m_name_index
= NULL;
402
m_read_stream
= NULL;
403
m_read_index
= NULL;
404
m_qual_stream
= NULL;
405
}
406
409
virtual
~ReadData
() {}
410
};
411
415
struct
ReadDataRAM
:
public
ReadData
416
{
419
enum
StrandOp
420
{
421
NO_OP
= 0x0000,
422
REVERSE_OP
= 0x0001,
423
COMPLEMENT_OP
= 0x0002,
424
REVERSE_COMPLEMENT_OP
= 0x0003,
425
};
426
427
ReadDataRAM
();
428
431
void
reserve
(
const
uint32
n_reads,
const
uint32
n_bps);
432
443
void
push_back
(
uint32
read_len,
444
const
char
* name,
445
const
uint8
* base_pairs,
446
const
uint8
* quality,
447
const
QualityEncoding
quality_encoding,
448
const
uint32
truncate_read_len,
449
const
StrandOp
conversion_flags);
450
453
void
end_batch
(
void
);
454
455
std::vector<uint32>
m_read_vec
;
456
std::vector<uint32>
m_read_index_vec
;
457
std::vector<char>
m_qual_vec
;
458
std::vector<char>
m_name_vec
;
459
std::vector<uint32>
m_name_index_vec
;
460
};
461
465
struct
ReadDataDevice
:
public
ReadData
466
{
467
enum
{
468
READS
= 0x01,
469
QUALS
= 0x02,
470
};
471
474
ReadDataDevice
(
const
ReadData
& host_data,
const
uint32
flags =
READS
);
475
478
~ReadDataDevice
();
479
480
uint64
allocated
()
const
{
return
m_allocated; }
481
482
private
:
483
uint64
m_allocated;
484
};
485
486
typedef
ReadDataRAM
ReadDataHost
;
487
typedef
ReadDataDevice
ReadDataCUDA
;
488
493
struct
ReadDataStream
494
{
495
ReadDataStream
(
uint32
truncate_read_len =
uint32
(-1))
496
:
m_truncate_read_len
(truncate_read_len)
497
{
498
};
499
502
virtual
~ReadDataStream
() {}
503
506
virtual
ReadData
*
next
(
const
uint32
batch_size,
const
uint32
batch_bps =
uint32
(-1)) = 0;
507
510
virtual
bool
is_ok
() = 0;
511
512
// maximum length of a read; longer reads are truncated to this size
513
uint32
m_truncate_read_len
;
514
};
515
516
529
ReadDataStream
*
open_read_file
(
const
char
* read_file_name,
530
const
QualityEncoding
qualities,
531
const
uint32
max_reads =
uint32
(-1),
532
const
uint32
max_read_len =
uint32
(-1),
533
const
ReadEncoding
flags =
REVERSE
);
534
537
538
}
// namespace io
539
542
inline
543
io::ReadData::plain_view_type
plain_view
(
io::ReadData
& read_data)
544
{
545
return
io::ReadData::plain_view_type
( read_data );
546
}
547
550
inline
551
io::ReadData::const_plain_view_type
plain_view
(
const
io::ReadData
& read_data)
552
{
553
return
io::ReadData::const_plain_view_type
( read_data );
554
}
555
556
}
// namespace nvbio
Generated on Wed Feb 25 2015 08:33:00 for NVBIO by
1.8.4