37 #include <cuda_runtime.h>
48 const uint32 truncate_read_len,
53 bool is_gzipped =
false;
56 if (len >= strlen(
".gz"))
58 if (strcmp(&read_file_name[len - strlen(
".gz")],
".gz") == 0)
61 len =
uint32(len - strlen(
".gz"));
66 if (len >= strlen(
".fastq"))
68 if (strncmp(&read_file_name[len - strlen(
".fastq")],
".fastq", strlen(
".fastq")) == 0)
79 if (len >= strlen(
".fq"))
81 if (strncmp(&read_file_name[len - strlen(
".fq")],
".fq", strlen(
".fq")) == 0)
92 if (len >= strlen(
".txt"))
94 if (strncmp(&read_file_name[len - strlen(
".txt")],
".txt", strlen(
".txt")) == 0)
105 if (len >= strlen(
".sam"))
107 if (strncmp(&read_file_name[len - strlen(
".sam")],
".sam", strlen(
".sam")) == 0)
116 if (ret->
init() ==
false)
127 if (len >= strlen(
".bam"))
129 if (strncmp(&read_file_name[len - strlen(
".bam")],
".bam", strlen(
".bam")) == 0)
138 if (ret->
init() ==
false)
149 log_warning(stderr,
"could not determine file type for %s; guessing %sfastq\n", read_file_name, is_gzipped ?
"compressed " :
"");
161 inline unsigned char nst_nt4_encode(
unsigned char c)
163 static unsigned char nst_nt4_table[256] = {
164 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
165 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
166 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5 , 4, 4,
167 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
168 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
169 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
170 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
171 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
172 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
173 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
174 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
175 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
176 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
177 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
178 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
179 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
182 return nst_nt4_table[c];
186 template <QualityEncoding encoding>
187 inline unsigned char convert_to_phred_quality(
const uint8 q)
191 0, 1, 1, 1, 1, 1, 1, 2, 2, 3,
192 3, 4, 4, 5, 5, 6, 7, 8, 9, 10,
193 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
194 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
195 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
196 40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
197 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
198 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
199 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
200 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
201 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
202 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
203 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
204 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
205 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
206 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
207 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
208 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
209 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
210 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
211 190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
212 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
213 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,
214 220, 221, 222, 223, 224, 225, 226, 227, 228, 229,
215 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
216 240, 241, 242, 243, 244, 245, 246, 247, 248, 249,
217 250, 251, 252, 253, 254, 255
232 return s_solexa_to_phred[q];
240 inline unsigned char complement_bp(
unsigned char bp)
291 #if defined(PRESIZED_VECTORS)
292 #define PRESIZE_VECTORS(vec, x) vec.resize(x)
293 #define ADJUST_VECTORS(vec, x) vec.resize(x)
294 #define RESIZE_VECTORS(vec, x) if (vec.size() < x) vec.resize(x)
296 #define PRESIZE_VECTORS(vec, x)
297 #define ADJUST_VECTORS(vec, x)
298 #define RESIZE_VECTORS(vec, x) vec.resize(x)
306 const uint32 AVG_NAME_LENGTH = 250;
343 template <ReadDataRAM::StrandOp FLAGS>
361 return bp < 4u ? 3u - bp : 4u;
381 template <QualityEncoding quality_encoding,
typename read_type>
383 const read_type
read,
393 for (
uint32 i = 0; i < read.length(); i++)
394 qual_stream[i] = convert_to_phred_quality<quality_encoding>(read.quality(i));
398 for (
uint32 i = 0; i < read.length(); i++)
401 qual_stream[i] = convert_to_phred_quality<quality_encoding>(read.quality(i));
408 template <
typename read_type>
411 const read_type
read,
415 switch (quality_encoding)
439 const uint8* quality,
452 encode( quality_encoding, rc_read, stream, qual_stream );
454 encode( quality_encoding, r_read, stream, qual_stream );
459 encode( quality_encoding, fc_read, stream, qual_stream );
461 encode( quality_encoding, f_read, stream, qual_stream );
469 const uint8* quality,
471 const uint32 truncate_read_len,
476 read_len =
nvbio::min(read_len, truncate_read_len);
484 const uint32 words = (stream_len + bps_per_word - 1) / bps_per_word;
515 m_name_vec.resize(name_offset + name_len + 1);
517 memcpy(&
m_name_vec[name_offset],name,name_len + 1);
524 template <
typename T>
525 static void cudaAllocAndCopyVector(T*& dst,
const T* src,
const uint32 words,
uint64& allocated)
527 const uint32 words4 = 4u * ((words + 3u) / 4u);
530 cudaMalloc( &dst,
sizeof(T) * words4 );
532 throw std::bad_alloc(
WINONLY(
"ReadDataDevice: not enough device memory"));
534 cudaMemcpy( dst, src,
sizeof(T) * words, cudaMemcpyHostToDevice );
536 allocated += words4 *
sizeof(T);
590 if (!
is_ok() || reads_to_load == 0)
599 batch_bps ==
uint32(-1) ? batch_size * AVG_READ_LENGTH : batch_bps );
601 while (reads->
size() < reads_to_load &&
602 reads->
bps() < batch_bps)
606 const uint32 chunk_bps = batch_bps - reads->
bps();
608 const int n =
nextChunk(reads, chunk_reads, chunk_bps);
609 assert(n <= (
int) chunk_reads);
613 assert(reads->
size() <= reads_to_load);
616 if (reads->
size() == 0)