37 template <
bool BIG_ENDIAN_T, u
int32 SYMBOL_SIZE,
typename Symbol,
typename InputStream,
typename IndexType,
typename ValueType>
41 template <
bool BIG_ENDIAN_T, u
int32 SYMBOL_SIZE,
typename Symbol,
typename InputStream,
typename IndexType>
42 struct packer<BIG_ENDIAN_T,SYMBOL_SIZE,Symbol,InputStream,IndexType,
uint32>
46 const uint32 SYMBOL_COUNT = 1u << SYMBOL_SIZE;
47 const uint32 SYMBOL_MASK = SYMBOL_COUNT - 1u;
52 const index_type word_idx = index_type( bit_idx >> 5u );
54 if (is_pow2<SYMBOL_SIZE>())
56 const uint32 word = stream[ word_idx ];
57 const uint32 symbol_offset = BIG_ENDIAN_T ? (32u - SYMBOL_SIZE -
uint32(bit_idx & 31u)) :
uint32(bit_idx & 31u);
58 const uint32 symbol = (word >> symbol_offset) & SYMBOL_MASK;
60 return Symbol( symbol );
64 const uint32 word1 = stream[ word_idx ];
66 const uint32 symbol1 = (word1 >> symbol_offset) & SYMBOL_MASK;
70 const uint32 rem_bits = SYMBOL_SIZE - read_bits;
73 const uint32 rem_mask = (1u << rem_bits) - 1u;
75 const uint32 word2 = stream[ word_idx+1 ];
76 const uint32 symbol2 = word2 & rem_mask;
78 return Symbol( symbol1 | (symbol2 << read_bits) );
81 return Symbol( symbol1 );
87 const uint32 SYMBOL_COUNT = 1u << SYMBOL_SIZE;
88 const uint32 SYMBOL_MASK = SYMBOL_COUNT - 1u;
93 const index_type word_idx = index_type( bit_idx >> 5u );
95 if (is_pow2<SYMBOL_SIZE>())
97 uint32 word = stream[ word_idx ];
98 const uint32 symbol_offset = BIG_ENDIAN_T ? (32u - SYMBOL_SIZE -
uint32(bit_idx & 31u)) :
uint32(bit_idx & 31u);
99 const uint32 symbol =
uint32(sym & SYMBOL_MASK) << symbol_offset;
102 word &= ~(SYMBOL_MASK << symbol_offset);
105 stream[ word_idx ] = word | symbol;
109 uint32 word1 = stream[ word_idx ];
111 const uint32 symbol1 =
uint32(sym & SYMBOL_MASK) << symbol_offset;
114 word1 &= ~(SYMBOL_MASK << symbol_offset);
117 stream[ word_idx ] = word1 | symbol1;
121 const uint32 rem_bits = SYMBOL_SIZE - read_bits;
124 const uint32 rem_mask = (1u << rem_bits) - 1u;
126 uint32 word2 = stream[ word_idx+1 ];
127 const uint32 symbol2 =
uint32(sym & SYMBOL_MASK) >> read_bits;
133 stream[ word_idx+1 ] = word2 | symbol2;
139 template <
bool BIG_ENDIAN_T, u
int32 SYMBOL_SIZE,
typename Symbol,
typename InputStream,
typename IndexType>
140 struct packer<BIG_ENDIAN_T,SYMBOL_SIZE,Symbol,InputStream,IndexType,
uint64>
144 const uint32 SYMBOL_COUNT = 1u << SYMBOL_SIZE;
145 const uint32 SYMBOL_MASK = SYMBOL_COUNT - 1u;
150 const index_type word_idx = index_type( bit_idx >> 6u );
152 if (is_pow2<SYMBOL_SIZE>())
154 const uint64 word = stream[ word_idx ];
155 const uint32 symbol_offset = BIG_ENDIAN_T ? (64u - SYMBOL_SIZE -
uint32(bit_idx & 63u)) :
uint32(bit_idx & 63u);
156 const uint32 symbol =
uint32((word >> symbol_offset) & SYMBOL_MASK);
158 return Symbol( symbol );
162 const uint64 word1 = stream[ word_idx ];
164 const uint32 symbol1 =
uint32((word1 >> symbol_offset) & SYMBOL_MASK);
168 const uint32 rem_bits = SYMBOL_SIZE - read_bits;
173 const uint64 word2 = stream[ word_idx+1 ];
176 return Symbol( symbol1 | (symbol2 << read_bits) );
179 return Symbol( symbol1 );
185 const uint32 SYMBOL_COUNT = 1u << SYMBOL_SIZE;
186 const uint32 SYMBOL_MASK = SYMBOL_COUNT - 1u;
191 const index_type word_idx = index_type( bit_idx >> 6u );
193 if (is_pow2<SYMBOL_SIZE>())
195 uint64 word = stream[ word_idx ];
196 const uint32 symbol_offset = BIG_ENDIAN_T ? (64u - SYMBOL_SIZE -
uint32(bit_idx & 63u)) :
uint32(bit_idx & 63u);
197 const uint64 symbol =
uint64(sym & SYMBOL_MASK) << symbol_offset;
200 word &= ~(
uint64(SYMBOL_MASK) << symbol_offset);
203 stream[ word_idx ] = word | symbol;
207 uint64 word1 = stream[ word_idx ];
209 const uint64 symbol1 =
uint64(sym & SYMBOL_MASK) << symbol_offset;
212 word1 &= ~(
uint64(SYMBOL_MASK) << symbol_offset);
215 stream[ word_idx ] = word1 | symbol1;
219 const uint32 rem_bits = SYMBOL_SIZE - read_bits;
224 uint64 word2 = stream[ word_idx+1 ];
225 const uint64 symbol2 =
uint64(sym & SYMBOL_MASK) >> read_bits;
231 stream[ word_idx+1 ] = word2 | symbol2;
237 template <
bool BIG_ENDIAN_T, u
int32 SYMBOL_SIZE,
typename Symbol,
typename InputStream,
typename IndexType>
238 struct packer<BIG_ENDIAN_T,SYMBOL_SIZE,Symbol,InputStream,IndexType,
uint8>
242 const uint8 SYMBOL_COUNT =
uint8(1u) << SYMBOL_SIZE;
243 const uint8 SYMBOL_MASK = SYMBOL_COUNT -
uint8(1u);
248 const index_type word_idx = index_type( bit_idx >> 3u );
250 if (is_pow2<SYMBOL_SIZE>())
252 const uint8 word = stream[ word_idx ];
253 const uint8 symbol_offset = BIG_ENDIAN_T ? (8u - SYMBOL_SIZE -
uint8(bit_idx & 7u)) :
uint8(bit_idx & 7u);
254 const uint8 symbol = (word >> symbol_offset) & SYMBOL_MASK;
256 return Symbol( symbol );
260 const uint8 word1 = stream[ word_idx ];
261 const uint8 symbol_offset =
uint8(bit_idx & 7u);
262 const uint8 symbol1 = (word1 >> symbol_offset) & SYMBOL_MASK;
266 const uint32 rem_bits = SYMBOL_SIZE - read_bits;
269 const uint8 rem_mask =
uint8((1u << rem_bits) - 1u);
271 const uint8 word2 = stream[ word_idx+1 ];
272 const uint8 symbol2 = word2 & rem_mask;
274 return Symbol( symbol1 | (symbol2 << read_bits) );
277 return Symbol( symbol1 );
283 const uint8 SYMBOL_COUNT =
uint8(1u) << SYMBOL_SIZE;
284 const uint8 SYMBOL_MASK = SYMBOL_COUNT -
uint8(1u);
289 const index_type word_idx = index_type( bit_idx >> 3u );
291 if (is_pow2<SYMBOL_SIZE>())
293 uint8 word = stream[ word_idx ];
294 const uint8 symbol_offset = BIG_ENDIAN_T ? (8u - SYMBOL_SIZE -
uint8(bit_idx & 7u)) :
uint8(bit_idx & 7u);
295 const uint8 symbol =
uint32(sym & SYMBOL_MASK) << symbol_offset;
298 word &= ~(SYMBOL_MASK << symbol_offset);
301 stream[ word_idx ] = word | symbol;
305 uint8 word1 = stream[ word_idx ];
306 const uint8 symbol_offset =
uint8(bit_idx & 7u);
307 const uint8 symbol1 =
uint8(sym & SYMBOL_MASK) << symbol_offset;
310 word1 &= ~(SYMBOL_MASK << symbol_offset);
313 stream[ word_idx ] = word1 | symbol1;
317 const uint32 rem_bits = SYMBOL_SIZE - read_bits;
320 uint8 word2 = stream[ word_idx+1 ];
321 const uint8 symbol2 =
uint32(sym & SYMBOL_MASK) >> read_bits;
323 const uint8 rem_mask =
uint8((1u << rem_bits) - 1u);
329 stream[ word_idx+1 ] = word2 | symbol2;
336 template <
bool BIG_ENDIAN_T,
typename Symbol,
typename InputStream,
typename IndexType>
341 const uint32 SYMBOL_MASK = 3u;
345 const index_type word_idx = sym_idx >> 4u;
347 const uint32 word = stream[ word_idx ];
348 const uint32 symbol_offset = BIG_ENDIAN_T ? (30u - (
uint32(sym_idx & 15u) << 1)) :
uint32((sym_idx & 15u) << 1);
349 const uint32 symbol = (word >> symbol_offset) & SYMBOL_MASK;
351 return Symbol( symbol );
356 const uint32 SYMBOL_MASK = 3u;
360 const index_type word_idx = sym_idx >> 4u;
362 uint32 word = stream[ word_idx ];
363 const uint32 symbol_offset = BIG_ENDIAN_T ? (30u - (
uint32(sym_idx & 15u) << 1)) :
uint32((sym_idx & 15u) << 1);
364 const uint32 symbol =
uint32(sym & SYMBOL_MASK) << symbol_offset;
367 word &= ~(SYMBOL_MASK << symbol_offset);
370 stream[ word_idx ] = word | symbol;
373 template <
bool BIG_ENDIAN_T,
typename Symbol,
typename InputStream,
typename IndexType>
378 const uint32 SYMBOL_MASK = 15u;
382 const index_type word_idx = sym_idx >> 3u;
384 const uint32 word = stream[ word_idx ];
385 const uint32 symbol_offset = BIG_ENDIAN_T ? (28u - (
uint32(sym_idx & 7u) << 2)) :
uint32((sym_idx & 7u) << 2);
386 const uint32 symbol = (word >> symbol_offset) & SYMBOL_MASK;
388 return Symbol( symbol );
393 const uint32 SYMBOL_MASK = 15u;
397 const index_type word_idx = sym_idx >> 3u;
399 uint32 word = stream[ word_idx ];
400 const uint32 symbol_offset = BIG_ENDIAN_T ? (28u - (
uint32(sym_idx & 7u) << 2)) :
uint32((sym_idx & 7u) << 2);
401 const uint32 symbol =
uint32(sym & SYMBOL_MASK) << symbol_offset;
404 word &= ~(SYMBOL_MASK << symbol_offset);
407 stream[ word_idx ] = word | symbol;
411 template <
bool BIG_ENDIAN_T,
typename Symbol,
typename InputStream,
typename IndexType>
412 struct packer<BIG_ENDIAN_T,2u,Symbol,InputStream,IndexType,uint4>
416 const uint32 SYMBOL_MASK = 3u;
420 const index_type word_idx = sym_idx >> 6u;
422 const uint4 word = stream[ word_idx ];
423 const uint32 symbol_comp = (sym_idx & 63u) >> 4u;
424 const uint32 symbol_offset = BIG_ENDIAN_T ? (30u - (
uint32(sym_idx & 15u) << 1)) :
uint32((sym_idx & 15u) << 1);
425 const uint32 symbol = (
comp( word, symbol_comp ) >> symbol_offset) & SYMBOL_MASK;
427 return Symbol( symbol );
432 const uint32 SYMBOL_MASK = 3u;
436 const index_type word_idx = sym_idx >> 6u;
438 uint4 word = stream[ word_idx ];
439 const uint32 symbol_comp = (sym_idx & 63u) >> 4u;
440 const uint32 symbol_offset = BIG_ENDIAN_T ? (30u - (
uint32(sym_idx & 15u) << 1)) :
uint32((sym_idx & 15u) << 1);
441 const uint32 symbol =
uint32(sym & SYMBOL_MASK) << symbol_offset;
444 select( word, symbol_comp ) &= ~(SYMBOL_MASK << symbol_offset);
445 select( word, symbol_comp ) |= symbol;
448 stream[ word_idx ] = word;
451 template <
bool BIG_ENDIAN_T,
typename Symbol,
typename InputStream,
typename IndexType>
452 struct packer<BIG_ENDIAN_T,4u,Symbol,InputStream,IndexType,uint4>
456 const uint32 SYMBOL_MASK = 15u;
460 const index_type word_idx = sym_idx >> 5u;
462 const uint4 word = stream[ word_idx ];
463 const uint32 symbol_comp = (sym_idx & 31u) >> 3u;
464 const uint32 symbol_offset = BIG_ENDIAN_T ? (28u - (
uint32(sym_idx & 7u) << 2)) :
uint32((sym_idx & 7u) << 2);
465 const uint32 symbol = (
comp( word, symbol_comp ) >> symbol_offset) & SYMBOL_MASK;
467 return Symbol( symbol );
472 const uint32 SYMBOL_MASK = 15u;
476 const index_type word_idx = sym_idx >> 5u;
478 uint4 word = stream[ word_idx ];
479 const uint32 symbol_comp = (sym_idx & 31u) >> 3u;
480 const uint32 symbol_offset = BIG_ENDIAN_T ? (28u - (
uint32(sym_idx & 7u) << 2)) :
uint32((sym_idx & 7u) << 2);
481 const uint32 symbol =
uint32(sym & SYMBOL_MASK) << symbol_offset;
484 select( word, symbol_comp ) &= ~(SYMBOL_MASK << symbol_offset);
485 select( word, symbol_comp ) |= symbol;
488 stream[ word_idx ] = word;
491 template <
bool BIG_ENDIAN_T,
typename Symbol,
typename InputStream,
typename IndexType>
492 struct packer<BIG_ENDIAN_T,8u,Symbol,InputStream,IndexType,uint4>
496 const uint32 SYMBOL_MASK = 255u;
500 const index_type word_idx = sym_idx >> 4u;
502 const uint4 word = stream[ word_idx ];
503 const uint32 symbol_comp = (sym_idx & 15u) >> 2u;
504 const uint32 symbol_offset = BIG_ENDIAN_T ? (24u - (
uint32(sym_idx & 3u) << 3)) :
uint32((sym_idx & 3u) << 3);
505 const uint32 symbol = (
comp( word, symbol_comp ) >> symbol_offset) & SYMBOL_MASK;
507 return Symbol( symbol );
512 const uint32 SYMBOL_MASK = 255u;
516 const index_type word_idx = sym_idx >> 4u;
518 uint4 word = stream[ word_idx ];
519 const uint32 symbol_comp = (sym_idx & 15u) >> 2u;
520 const uint32 symbol_offset = BIG_ENDIAN_T ? (24u - (
uint32(sym_idx & 3u) << 3)) :
uint32((sym_idx & 3u) << 3);
521 const uint32 symbol =
uint32(sym & SYMBOL_MASK) << symbol_offset;
524 select( word, symbol_comp ) &= ~(SYMBOL_MASK << symbol_offset);
525 select( word, symbol_comp ) |= symbol;
528 stream[ word_idx ] = word;
532 template <
bool BIG_ENDIAN_T,
typename Symbol,
typename InputStream,
typename IndexType>
537 const uint32 SYMBOL_MASK = 3u;
541 const index_type word_idx = sym_idx >> 5u;
543 const uint64 word = stream[ word_idx ];
544 const uint32 symbol_offset = BIG_ENDIAN_T ? (62u - (
uint32(sym_idx & 31u) << 1)) :
uint32((sym_idx & 31u) << 1);
545 const uint64 symbol = (word >> symbol_offset) & SYMBOL_MASK;
547 return Symbol( symbol );
552 const uint32 SYMBOL_MASK = 3u;
556 const index_type word_idx = sym_idx >> 5u;
558 uint64 word = stream[ word_idx ];
559 const uint32 symbol_offset = BIG_ENDIAN_T ? (62u - (
uint32(sym_idx & 31u) << 1)) :
uint32((sym_idx & 31u) << 1);
560 const uint64 symbol =
uint64(sym & SYMBOL_MASK) << symbol_offset;
563 word &= ~(
uint64(SYMBOL_MASK) << symbol_offset);
566 stream[ word_idx ] = word | symbol;
569 template <
bool BIG_ENDIAN_T,
typename Symbol,
typename InputStream,
typename IndexType>
574 const uint32 SYMBOL_MASK = 15u;
578 const index_type word_idx = sym_idx >> 5u;
580 const uint64 word = stream[ word_idx ];
581 const uint32 symbol_offset = BIG_ENDIAN_T ? (60u - (
uint32(sym_idx & 15u) << 2)) :
uint32((sym_idx & 15u) << 2);
582 const uint64 symbol = (word >> symbol_offset) & SYMBOL_MASK;
584 return Symbol( symbol );
589 const uint32 SYMBOL_MASK = 15u;
593 const index_type word_idx = sym_idx >> 5u;
595 uint64 word = stream[ word_idx ];
596 const uint32 symbol_offset = BIG_ENDIAN_T ? (60u - (
uint32(sym_idx & 15u) << 2)) :
uint32((sym_idx & 15u) << 2);
597 const uint64 symbol =
uint32(sym & SYMBOL_MASK) << symbol_offset;
600 word &= ~(SYMBOL_MASK << symbol_offset);
603 stream[ word_idx ] = word | symbol;
608 template <
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
613 template <
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
621 template <
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
632 template <
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
637 This r( m_stream, m_index );
644 template <
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
655 template <
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
660 This r( m_stream, m_index );
667 template <
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
678 template <
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
689 template <
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
694 return This( m_stream, m_index + distance );
699 template <
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
704 return This( m_stream, m_index - distance );
709 template <
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
719 template <
typename Stream>
722 return (*
this =
Symbol( ref ));
727 template <
typename Stream>
736 template <
typename Stream>
739 return m_stream.get();
744 template <
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
754 template <
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
764 template <
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
774 template <
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
782 template <
bool BIG_ENDIAN, u
int32 SYMBOL_SIZE,
typename Symbol,
typename InputStream,
typename IndexType,
typename ValueType>
798 it.
m_word_offset = BIG_ENDIAN ? (
WORD_SIZE - SYMBOL_SIZE - symbol_idx * SYMBOL_SIZE) : symbol_idx * SYMBOL_SIZE;
874 template <
bool BIG_ENDIAN, u
int32 SYMBOL_SIZE,
typename Symbol,
typename InputStream,
typename IndexType>
875 struct forward_packer<BIG_ENDIAN, SYMBOL_SIZE, Symbol, InputStream, IndexType, uint4>
884 static const uint32 SYMBOLS_PER_SUBWORD = SUBWORD_SIZE / SYMBOL_SIZE;
935 const uint32 word_offset = BIG_ENDIAN ? (SUBWORD_SIZE - SYMBOL_SIZE - word_mod * SYMBOL_SIZE) :
936 (word_mod * SYMBOL_SIZE);
938 return Symbol( (word_comp >> word_offset) &
SYMBOL_MASK );
942 template <
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
951 template <
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
960 template <
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
971 template <
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
976 This r( m_stream, m_index );
983 template <
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
994 template <
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
999 This r( m_stream, m_index );
1007 template <
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
1012 m_index += distance;
1019 template <
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
1024 m_index -= distance;
1031 template <
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
1036 return This( m_stream, m_index + distance );
1041 template <
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
1046 return This( m_stream, m_index - distance );
1051 template <
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
1061 template <
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
1071 template <
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
1081 template <
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
1091 template <
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
1103 template <
typename InputIterator,
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
1106 const IndexType input_len,
1107 InputIterator input_string,
1111 typedef typename packed_stream_type::storage_type word_type;
1113 const uint32 WORD_SIZE =
uint32( 8u *
sizeof(word_type) );
1115 const bool BIG_ENDIAN = BIG_ENDIAN_T;
1116 const uint32 SYMBOL_SIZE = SYMBOL_SIZE_T;
1117 const uint32 SYMBOLS_PER_WORD = WORD_SIZE / SYMBOL_SIZE;
1118 const uint32 SYMBOL_COUNT = 1u << SYMBOL_SIZE;
1119 const uint32 SYMBOL_MASK = SYMBOL_COUNT - 1u;
1121 InputStream words = packed_string.
stream();
1123 const IndexType stream_offset = packed_string.
index();
1124 const uint32 word_offset = stream_offset & (SYMBOLS_PER_WORD-1);
1130 word_rem = SYMBOLS_PER_WORD - word_offset;
1133 word_type word = words[ stream_offset / SYMBOLS_PER_WORD ];
1136 for (
uint32 i = 0; i < word_rem; ++i)
1139 const uint8 bp = input_string[i] & SYMBOL_MASK;
1141 const uint32 bit_idx = (word_offset + i) * SYMBOL_SIZE;
1142 const uint32 symbol_offset = BIG_ENDIAN ? (WORD_SIZE - SYMBOL_SIZE - bit_idx) : bit_idx;
1143 const word_type symbol = word_type(bp) << symbol_offset;
1146 word &= ~(word_type(SYMBOL_MASK) << symbol_offset);
1153 words[ stream_offset / SYMBOLS_PER_WORD ] = word;
1156 #if defined(_OPENMP) && !defined(NVBIO_DEVICE_COMPILATION)
1159 if (input_len > 1000000)
1161 #pragma omp parallel for
1162 for (
int64 i = word_rem; i <
int64( input_len ); i += SYMBOLS_PER_WORD)
1165 word_type word = 0u;
1170 for (
uint32 j = 0; j < SYMBOLS_PER_WORD; ++j)
1175 const uint8 bp = input_string[IndexType(i) + j] & SYMBOL_MASK;
1177 const uint32 bit_idx = j * SYMBOL_SIZE;
1178 const uint32 symbol_offset = BIG_ENDIAN ? (WORD_SIZE - SYMBOL_SIZE - bit_idx) : bit_idx;
1179 const word_type symbol = word_type(bp) << symbol_offset;
1187 const uint32 word_idx =
uint32( (stream_offset + IndexType(i)) / SYMBOLS_PER_WORD );
1189 words[ word_idx ] = word;
1195 for (IndexType i = word_rem; i < input_len; i += SYMBOLS_PER_WORD)
1198 word_type word = 0u;
1203 uint32 symbol_offset = BIG_ENDIAN ? WORD_SIZE - SYMBOL_SIZE : 0u;
1206 for (
uint32 j = 0; j < SYMBOLS_PER_WORD; ++j)
1211 const uint8 bp = input_string[IndexType(i) + j] & SYMBOL_MASK;
1215 const word_type symbol = word_type(bp) << symbol_offset;
1221 if (BIG_ENDIAN) symbol_offset -= SYMBOL_SIZE;
1222 else symbol_offset += SYMBOL_SIZE;
1227 const uint32 word_idx =
uint32( (stream_offset + IndexType(i)) / SYMBOLS_PER_WORD );
1229 words[ word_idx ] = word;
1236 #if defined(__CUDACC__)
1240 template <
typename InputIterator,
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
1243 const IndexType input_len,
1244 const InputIterator input_string,
1245 PackedStream<InputStream,Symbol,SYMBOL_SIZE_T,BIG_ENDIAN_T,IndexType> packed_string)
1247 typedef PackedStream<InputStream,Symbol,SYMBOL_SIZE_T,BIG_ENDIAN_T,IndexType> packed_stream_type;
1248 typedef typename packed_stream_type::storage_type word_type;
1250 const uint32 WORD_SIZE =
uint32( 8u *
sizeof(word_type) );
1252 const bool BIG_ENDIAN = BIG_ENDIAN_T;
1253 const uint32 SYMBOL_SIZE = SYMBOL_SIZE_T;
1254 const uint32 SYMBOLS_PER_WORD = WORD_SIZE / SYMBOL_SIZE;
1255 const uint32 SYMBOL_COUNT = 1u << SYMBOL_SIZE;
1256 const uint32 SYMBOL_MASK = SYMBOL_COUNT - 1u;
1258 const IndexType stream_offset = packed_string.index();
1259 const uint32 word_offset = stream_offset & (SYMBOLS_PER_WORD-1);
1260 const uint32 word_rem = SYMBOLS_PER_WORD - word_offset;
1262 InputStream words = packed_string.stream();
1269 word_type word = words[ stream_offset / SYMBOLS_PER_WORD ];
1272 for (
uint32 i = 0; i < word_rem; ++i)
1275 const uint8 bp = input_string[i] & SYMBOL_MASK;
1277 const uint32 bit_idx = (word_offset + i) * SYMBOL_SIZE;
1278 const uint32 symbol_offset = BIG_ENDIAN ? (WORD_SIZE - SYMBOL_SIZE - bit_idx) : bit_idx;
1279 const word_type symbol = word_type(bp) << symbol_offset;
1282 word &= ~(
uint64(SYMBOL_MASK) << symbol_offset);
1289 words[ stream_offset / SYMBOLS_PER_WORD ] = word;
1294 if (word_rem + (thread_id - 1u) * SYMBOLS_PER_WORD >= input_len)
1297 const uint32 i = word_rem + (thread_id - 1u) * SYMBOLS_PER_WORD;
1300 word_type word = 0u;
1305 for (
uint32 j = 0; j < SYMBOLS_PER_WORD; ++j)
1310 const uint8 bp = input_string[IndexType(i) + j] & SYMBOL_MASK;
1312 const uint32 bit_idx = j * SYMBOL_SIZE;
1313 const uint32 symbol_offset = BIG_ENDIAN ? (WORD_SIZE - SYMBOL_SIZE - bit_idx) : bit_idx;
1314 const word_type symbol = word_type(bp) << symbol_offset;
1322 const uint32 word_idx =
uint32( (stream_offset + IndexType(i)) / SYMBOLS_PER_WORD );
1324 words[ word_idx ] = word;
1331 template <
typename InputIterator,
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
1333 const IndexType input_len,
1334 const InputIterator input_string,
1335 PackedStream<InputStream,Symbol,SYMBOL_SIZE_T,BIG_ENDIAN_T,IndexType> packed_string)
1340 typedef PackedStream<InputStream,Symbol,SYMBOL_SIZE_T,BIG_ENDIAN_T,IndexType> packed_stream_type;
1341 typedef typename packed_stream_type::storage_type word_type;
1343 const uint32 WORD_SIZE =
uint32( 8u *
sizeof(word_type) );
1345 const uint32 SYMBOL_SIZE = SYMBOL_SIZE_T;
1346 const uint32 SYMBOLS_PER_WORD = WORD_SIZE / SYMBOL_SIZE;
1348 const IndexType stream_offset = packed_string.index();
1353 const uint32 n_words = word_end - word_begin;
1355 const uint32 blockdim = 128u;
1358 priv::assign_kernel<<<n_blocks,blockdim>>>( input_len, input_string, packed_string );
1366 template <
typename InputIterator,
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
1369 const device_tag tag,
1370 const IndexType input_len,
1371 const InputIterator input_string,
1372 PackedStream<InputStream,Symbol,SYMBOL_SIZE_T,BIG_ENDIAN_T,IndexType> packed_string)
1374 #if !defined(NVBIO_DEVICE_COMPILATION)
1379 priv::device_assign( input_len, input_string, packed_string );
1389 #endif // defined(__CUDACC__)
1393 template <
typename InputIterator,
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
1397 const IndexType input_len,
1398 const InputIterator input_string,
1406 template <
typename InputIterator,
typename InputStream,
typename Symbol, u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T,
typename IndexType>
1409 const IndexType input_len,
1410 const InputIterator input_string,
1417 assign( system_tag(), input_len, input_string, packed_string );
1431 template <u
int32 BLOCKDIM, u
int32 SYMBOL_SIZE,
bool BIG_ENDIAN,
typename InStreamIterator,
typename OutStreamIterator>
1435 typedef typename std::iterator_traits<InStreamIterator>::value_type word_type;
1437 const uint32 SYMBOLS_PER_WORD = (
sizeof(word_type)*8) / SYMBOL_SIZE;
1438 uint32 word_offset = in_offset & (SYMBOLS_PER_WORD-1);
1439 uint32 begin_word = in_offset / SYMBOLS_PER_WORD;
1440 uint32 end_word = (in_offset + N + SYMBOLS_PER_WORD-1) / SYMBOLS_PER_WORD;
1443 const uint32 N_words = (N + SYMBOLS_PER_WORD-1) / SYMBOLS_PER_WORD;
1444 word_type cur_word = in_stream[begin_word+0];
1445 for (
uint32 w = 0; w < N_words; ++w)
1447 if (BIG_ENDIAN ==
false)
1450 word_type out_word = cur_word >> (word_offset*SYMBOL_SIZE);
1453 cur_word = begin_word+w+1 < end_word ? in_stream[begin_word+w+1] : 0u;
1457 out_word |= cur_word << ((SYMBOLS_PER_WORD - word_offset)*SYMBOL_SIZE);
1459 out_stream[ stride*w ] = out_word;
1464 word_type out_word = cur_word << (word_offset*SYMBOL_SIZE);
1467 cur_word = begin_word+w+1 < end_word ? in_stream[begin_word+w+1] : 0u;
1471 out_word |= cur_word >> ((SYMBOLS_PER_WORD - word_offset)*SYMBOL_SIZE);
1473 out_stream[ stride*w ] = out_word;