38 inline unsigned char nst_nt4_encode(
unsigned char c)
40 static unsigned char nst_nt4_table[256] = {
41 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
42 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
43 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5 , 4, 4,
44 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
45 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
46 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
47 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
48 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
49 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
50 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
51 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
52 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
53 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
54 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
55 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
56 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
59 return nst_nt4_table[c];
63 template <QualityEncoding encoding>
64 inline unsigned char convert_to_phred_quality(
const uint8 q)
68 0, 1, 1, 1, 1, 1, 1, 2, 2, 3,
69 3, 4, 4, 5, 5, 6, 7, 8, 9, 10,
70 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
71 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
72 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
73 40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
74 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
75 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
76 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
77 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
78 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
79 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
80 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
81 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
82 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
83 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
84 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
85 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
86 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
87 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
88 190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
89 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
90 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,
91 220, 221, 222, 223, 224, 225, 226, 227, 228, 229,
92 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
93 240, 241, 242, 243, 244, 245, 246, 247, 248, 249,
94 250, 251, 252, 253, 254, 255
109 return s_solexa_to_phred[q];
123 template <Alphabet ALPHABET, SequenceDataEncoder::StrandOp FLAGS>
141 if (ALPHABET ==
DNA || ALPHABET ==
DNA_N)
143 const uint8 bp = nst_nt4_encode( c );
146 return bp < 4u ? 3u - bp : 4u;
152 const uint8 bp = from_char<ALPHABET>( c );
179 template <Alphabet ALPHABET, QualityEncoding quality_encoding,
typename sequence_type>
181 const sequence_type sequence,
186 const uint32 len = sequence.length();
189 assign( len, sequence, stream );
192 for (
uint32 i = 0; i < len; i++)
193 qual_stream[i] = convert_to_phred_quality<quality_encoding>(sequence.quality(i));
196 for (
uint32 i = 0; i < sequence.length(); i++)
198 stream[i] = sequence[i];
199 qual_stream[i] = convert_to_phred_quality<quality_encoding>(sequence.quality(i));
206 template <Alphabet ALPHABET,
typename sequence_type>
209 const sequence_type sequence,
213 switch (quality_encoding)
216 encode<ALPHABET,Phred>( sequence,
stream, qual_stream );
219 encode<ALPHABET,Phred33>( sequence,
stream, qual_stream );
222 encode<ALPHABET,Phred64>( sequence,
stream, qual_stream );
225 encode<ALPHABET,Solexa>( sequence,
stream, qual_stream );
235 template <Alphabet ALPHABET>
239 const uint32 sequence_len,
240 const uint8* sequence,
241 const uint8* quality,
253 encode<ALPHABET>( quality_encoding, rc_sequence,
stream, qual_stream );
255 encode<ALPHABET>( quality_encoding, r_sequence,
stream, qual_stream );
260 encode<ALPHABET>( quality_encoding, fc_sequence,
stream, qual_stream );
262 encode<ALPHABET>( quality_encoding, f_sequence,
stream, qual_stream );
269 template <Alphabet SEQUENCE_ALPHABET>
289 m_data->
reserve( m_data->
size() + n_sequences, m_data->
bps() + n_bps );
291 m_data->
reserve( n_sequences, n_bps );
298 if (m_append ==
false)
332 const uint32 in_sequence_len,
334 const uint8* base_pairs,
335 const uint8* quality,
337 const uint32 max_sequence_len,
342 const uint32 trimmed_len = in_sequence_len > trim3 + trim5 ?
343 in_sequence_len - trim3 - trim5 : 0u;
351 assert(sequence_len);
369 encode<SEQUENCE_ALPHABET>(
393 if (m_data->
m_name_vec.size() < name_offset + name_len + 1)
394 m_data->
m_name_vec.resize( (name_offset + name_len + 1)*2 );
462 return stream->
next( &encoder, batch_size, batch_bps );
468 return stream->
next( &encoder, batch_size, batch_bps );
474 return stream->
next( &encoder, batch_size, batch_bps );
480 return stream->
next( &encoder, batch_size, batch_bps );
486 return stream->
next( &encoder, batch_size, batch_bps );
492 return stream->
next( &encoder, batch_size, batch_bps );
511 return stream->
next( &encoder, batch_size, batch_bps );
517 return stream->
next( &encoder, batch_size, batch_bps );
523 return stream->
next( &encoder, batch_size, batch_bps );
529 return stream->
next( &encoder, batch_size, batch_bps );
535 return stream->
next( &encoder, batch_size, batch_bps );
541 return stream->
next( &encoder, batch_size, batch_bps );
556 return stream->
next( &encoder, batch_size );