NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
packedstream_inl.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
31 #if defined(_OPENMP)
32 #include <omp.h>
33 #endif
34 
35 namespace nvbio {
36 
37 template <bool BIG_ENDIAN_T, uint32 SYMBOL_SIZE, typename Symbol, typename InputStream, typename IndexType, typename ValueType>
38 struct packer {
39 };
40 
41 template <bool BIG_ENDIAN_T, uint32 SYMBOL_SIZE, typename Symbol, typename InputStream, typename IndexType>
42 struct packer<BIG_ENDIAN_T,SYMBOL_SIZE,Symbol,InputStream,IndexType,uint32>
43 {
44  static NVBIO_FORCEINLINE NVBIO_HOST_DEVICE Symbol get_symbol(InputStream stream, const IndexType sym_idx)
45  {
46  const uint32 SYMBOL_COUNT = 1u << SYMBOL_SIZE;
47  const uint32 SYMBOL_MASK = SYMBOL_COUNT - 1u;
48 
49  typedef typename unsigned_type<IndexType>::type index_type;
50 
51  const uint64 bit_idx = uint64(sym_idx) * SYMBOL_SIZE;
52  const index_type word_idx = index_type( bit_idx >> 5u );
53 
54  if (is_pow2<SYMBOL_SIZE>())
55  {
56  const uint32 word = stream[ word_idx ];
57  const uint32 symbol_offset = BIG_ENDIAN_T ? (32u - SYMBOL_SIZE - uint32(bit_idx & 31u)) : uint32(bit_idx & 31u);
58  const uint32 symbol = (word >> symbol_offset) & SYMBOL_MASK;
59 
60  return Symbol( symbol );
61  }
62  else
63  {
64  const uint32 word1 = stream[ word_idx ];
65  const uint32 symbol_offset = uint32(bit_idx & 31u);
66  const uint32 symbol1 = (word1 >> symbol_offset) & SYMBOL_MASK;
67 
68  // check if we need to read a second word
69  const uint32 read_bits = nvbio::min( 32u - symbol_offset, SYMBOL_SIZE );
70  const uint32 rem_bits = SYMBOL_SIZE - read_bits;
71  if (rem_bits)
72  {
73  const uint32 rem_mask = (1u << rem_bits) - 1u;
74 
75  const uint32 word2 = stream[ word_idx+1 ];
76  const uint32 symbol2 = word2 & rem_mask;
77 
78  return Symbol( symbol1 | (symbol2 << read_bits) );
79  }
80  else
81  return Symbol( symbol1 );
82  }
83  }
84 
85  static NVBIO_FORCEINLINE NVBIO_HOST_DEVICE void set_symbol(InputStream stream, const IndexType sym_idx, Symbol sym)
86  {
87  const uint32 SYMBOL_COUNT = 1u << SYMBOL_SIZE;
88  const uint32 SYMBOL_MASK = SYMBOL_COUNT - 1u;
89 
90  typedef typename unsigned_type<IndexType>::type index_type;
91 
92  const uint64 bit_idx = uint64(sym_idx) * SYMBOL_SIZE;
93  const index_type word_idx = index_type( bit_idx >> 5u );
94 
95  if (is_pow2<SYMBOL_SIZE>())
96  {
97  uint32 word = stream[ word_idx ];
98  const uint32 symbol_offset = BIG_ENDIAN_T ? (32u - SYMBOL_SIZE - uint32(bit_idx & 31u)) : uint32(bit_idx & 31u);
99  const uint32 symbol = uint32(sym & SYMBOL_MASK) << symbol_offset;
100 
101  // clear all bits
102  word &= ~(SYMBOL_MASK << symbol_offset);
103 
104  // set bits
105  stream[ word_idx ] = word | symbol;
106  }
107  else
108  {
109  uint32 word1 = stream[ word_idx ];
110  const uint32 symbol_offset = uint32(bit_idx & 31u);
111  const uint32 symbol1 = uint32(sym & SYMBOL_MASK) << symbol_offset;
112 
113  // clear all bits
114  word1 &= ~(SYMBOL_MASK << symbol_offset);
115 
116  // set bits
117  stream[ word_idx ] = word1 | symbol1;
118 
119  // check if we need to write a second word
120  const uint32 read_bits = nvbio::min( 32u - symbol_offset, SYMBOL_SIZE );
121  const uint32 rem_bits = SYMBOL_SIZE - read_bits;
122  if (rem_bits)
123  {
124  const uint32 rem_mask = (1u << rem_bits) - 1u;
125 
126  uint32 word2 = stream[ word_idx+1 ];
127  const uint32 symbol2 = uint32(sym & SYMBOL_MASK) >> read_bits;
128 
129  // clear all bits
130  word2 &= ~rem_mask;
131 
132  // set bits
133  stream[ word_idx+1 ] = word2 | symbol2;
134  }
135  }
136  }
137 };
138 
139 template <bool BIG_ENDIAN_T, uint32 SYMBOL_SIZE, typename Symbol, typename InputStream, typename IndexType>
140 struct packer<BIG_ENDIAN_T,SYMBOL_SIZE,Symbol,InputStream,IndexType,uint64>
141 {
142  static NVBIO_FORCEINLINE NVBIO_HOST_DEVICE Symbol get_symbol(InputStream stream, const IndexType sym_idx)
143  {
144  const uint32 SYMBOL_COUNT = 1u << SYMBOL_SIZE;
145  const uint32 SYMBOL_MASK = SYMBOL_COUNT - 1u;
146 
147  typedef typename unsigned_type<IndexType>::type index_type;
148 
149  const uint64 bit_idx = uint64(sym_idx) * SYMBOL_SIZE;
150  const index_type word_idx = index_type( bit_idx >> 6u );
151 
152  if (is_pow2<SYMBOL_SIZE>())
153  {
154  const uint64 word = stream[ word_idx ];
155  const uint32 symbol_offset = BIG_ENDIAN_T ? (64u - SYMBOL_SIZE - uint32(bit_idx & 63u)) : uint32(bit_idx & 63u);
156  const uint32 symbol = uint32((word >> symbol_offset) & SYMBOL_MASK);
157 
158  return Symbol( symbol );
159  }
160  else
161  {
162  const uint64 word1 = stream[ word_idx ];
163  const uint32 symbol_offset = uint32(bit_idx & 63u);
164  const uint32 symbol1 = uint32((word1 >> symbol_offset) & SYMBOL_MASK);
165 
166  // check if we need to read a second word
167  const uint32 read_bits = nvbio::min( 64u - symbol_offset, SYMBOL_SIZE );
168  const uint32 rem_bits = SYMBOL_SIZE - read_bits;
169  if (rem_bits)
170  {
171  const uint64 rem_mask = (uint64(1u) << rem_bits) - 1u;
172 
173  const uint64 word2 = stream[ word_idx+1 ];
174  const uint32 symbol2 = uint32(word2 & rem_mask);
175 
176  return Symbol( symbol1 | (symbol2 << read_bits) );
177  }
178  else
179  return Symbol( symbol1 );
180  }
181  }
182 
183  static NVBIO_FORCEINLINE NVBIO_HOST_DEVICE void set_symbol(InputStream stream, const IndexType sym_idx, Symbol sym)
184  {
185  const uint32 SYMBOL_COUNT = 1u << SYMBOL_SIZE;
186  const uint32 SYMBOL_MASK = SYMBOL_COUNT - 1u;
187 
188  typedef typename unsigned_type<IndexType>::type index_type;
189 
190  const uint64 bit_idx = uint64(sym_idx) * SYMBOL_SIZE;
191  const index_type word_idx = index_type( bit_idx >> 6u );
192 
193  if (is_pow2<SYMBOL_SIZE>())
194  {
195  uint64 word = stream[ word_idx ];
196  const uint32 symbol_offset = BIG_ENDIAN_T ? (64u - SYMBOL_SIZE - uint32(bit_idx & 63u)) : uint32(bit_idx & 63u);
197  const uint64 symbol = uint64(sym & SYMBOL_MASK) << symbol_offset;
198 
199  // clear all bits
200  word &= ~(uint64(SYMBOL_MASK) << symbol_offset);
201 
202  // set bits
203  stream[ word_idx ] = word | symbol;
204  }
205  else
206  {
207  uint64 word1 = stream[ word_idx ];
208  const uint32 symbol_offset = uint32(bit_idx & 63);
209  const uint64 symbol1 = uint64(sym & SYMBOL_MASK) << symbol_offset;
210 
211  // clear all bits
212  word1 &= ~(uint64(SYMBOL_MASK) << symbol_offset);
213 
214  // set bits
215  stream[ word_idx ] = word1 | symbol1;
216 
217  // check if we need to write a second word
218  const uint32 read_bits = nvbio::min( 64u - symbol_offset, SYMBOL_SIZE );
219  const uint32 rem_bits = SYMBOL_SIZE - read_bits;
220  if (rem_bits)
221  {
222  const uint64 rem_mask = (uint64(1u) << rem_bits) - 1u;
223 
224  uint64 word2 = stream[ word_idx+1 ];
225  const uint64 symbol2 = uint64(sym & SYMBOL_MASK) >> read_bits;
226 
227  // clear all bits
228  word2 &= ~rem_mask;
229 
230  // set bits
231  stream[ word_idx+1 ] = word2 | symbol2;
232  }
233  }
234  }
235 };
236 
237 template <bool BIG_ENDIAN_T, uint32 SYMBOL_SIZE, typename Symbol, typename InputStream, typename IndexType>
238 struct packer<BIG_ENDIAN_T,SYMBOL_SIZE,Symbol,InputStream,IndexType,uint8>
239 {
240  static NVBIO_FORCEINLINE NVBIO_HOST_DEVICE Symbol get_symbol(InputStream stream, const IndexType sym_idx)
241  {
242  const uint8 SYMBOL_COUNT = uint8(1u) << SYMBOL_SIZE;
243  const uint8 SYMBOL_MASK = SYMBOL_COUNT - uint8(1u);
244 
245  typedef typename unsigned_type<IndexType>::type index_type;
246 
247  const uint64 bit_idx = uint64(sym_idx) * SYMBOL_SIZE;
248  const index_type word_idx = index_type( bit_idx >> 3u );
249 
250  if (is_pow2<SYMBOL_SIZE>())
251  {
252  const uint8 word = stream[ word_idx ];
253  const uint8 symbol_offset = BIG_ENDIAN_T ? (8u - SYMBOL_SIZE - uint8(bit_idx & 7u)) : uint8(bit_idx & 7u);
254  const uint8 symbol = (word >> symbol_offset) & SYMBOL_MASK;
255 
256  return Symbol( symbol );
257  }
258  else
259  {
260  const uint8 word1 = stream[ word_idx ];
261  const uint8 symbol_offset = uint8(bit_idx & 7u);
262  const uint8 symbol1 = (word1 >> symbol_offset) & SYMBOL_MASK;
263 
264  // check if we need to read a second word
265  const uint32 read_bits = nvbio::min( 8u - symbol_offset, SYMBOL_SIZE );
266  const uint32 rem_bits = SYMBOL_SIZE - read_bits;
267  if (rem_bits)
268  {
269  const uint8 rem_mask = uint8((1u << rem_bits) - 1u);
270 
271  const uint8 word2 = stream[ word_idx+1 ];
272  const uint8 symbol2 = word2 & rem_mask;
273 
274  return Symbol( symbol1 | (symbol2 << read_bits) );
275  }
276  else
277  return Symbol( symbol1 );
278  }
279  }
280 
281  static NVBIO_FORCEINLINE NVBIO_HOST_DEVICE void set_symbol(InputStream stream, const IndexType sym_idx, Symbol sym)
282  {
283  const uint8 SYMBOL_COUNT = uint8(1u) << SYMBOL_SIZE;
284  const uint8 SYMBOL_MASK = SYMBOL_COUNT - uint8(1u);
285 
286  typedef typename unsigned_type<IndexType>::type index_type;
287 
288  const uint64 bit_idx = uint64(sym_idx) * SYMBOL_SIZE;
289  const index_type word_idx = index_type( bit_idx >> 3u );
290 
291  if (is_pow2<SYMBOL_SIZE>())
292  {
293  uint8 word = stream[ word_idx ];
294  const uint8 symbol_offset = BIG_ENDIAN_T ? (8u - SYMBOL_SIZE - uint8(bit_idx & 7u)) : uint8(bit_idx & 7u);
295  const uint8 symbol = uint32(sym & SYMBOL_MASK) << symbol_offset;
296 
297  // clear all bits
298  word &= ~(SYMBOL_MASK << symbol_offset);
299 
300  // set bits
301  stream[ word_idx ] = word | symbol;
302  }
303  else
304  {
305  uint8 word1 = stream[ word_idx ];
306  const uint8 symbol_offset = uint8(bit_idx & 7u);
307  const uint8 symbol1 = uint8(sym & SYMBOL_MASK) << symbol_offset;
308 
309  // clear all bits
310  word1 &= ~(SYMBOL_MASK << symbol_offset);
311 
312  // set bits
313  stream[ word_idx ] = word1 | symbol1;
314 
315  // check if we need to write a second word
316  const uint32 read_bits = nvbio::min( 8u - symbol_offset, SYMBOL_SIZE );
317  const uint32 rem_bits = SYMBOL_SIZE - read_bits;
318  if (rem_bits)
319  {
320  uint8 word2 = stream[ word_idx+1 ];
321  const uint8 symbol2 = uint32(sym & SYMBOL_MASK) >> read_bits;
322 
323  const uint8 rem_mask = uint8((1u << rem_bits) - 1u);
324 
325  // clear all bits
326  word2 &= ~rem_mask;
327 
328  // set bits
329  stream[ word_idx+1 ] = word2 | symbol2;
330  }
331  }
332  }
333 };
334 
335 
336 template <bool BIG_ENDIAN_T, typename Symbol, typename InputStream, typename IndexType>
337 struct packer<BIG_ENDIAN_T,2u,Symbol,InputStream,IndexType,uint32>
338 {
339  static NVBIO_FORCEINLINE NVBIO_HOST_DEVICE Symbol get_symbol(InputStream stream, const IndexType sym_idx)
340  {
341  const uint32 SYMBOL_MASK = 3u;
342 
343  typedef typename unsigned_type<IndexType>::type index_type;
344 
345  const index_type word_idx = sym_idx >> 4u;
346 
347  const uint32 word = stream[ word_idx ];
348  const uint32 symbol_offset = BIG_ENDIAN_T ? (30u - (uint32(sym_idx & 15u) << 1)) : uint32((sym_idx & 15u) << 1);
349  const uint32 symbol = (word >> symbol_offset) & SYMBOL_MASK;
350 
351  return Symbol( symbol );
352  }
353 
354  static NVBIO_FORCEINLINE NVBIO_HOST_DEVICE void set_symbol(InputStream stream, const IndexType sym_idx, Symbol sym)
355  {
356  const uint32 SYMBOL_MASK = 3u;
357 
358  typedef typename unsigned_type<IndexType>::type index_type;
359 
360  const index_type word_idx = sym_idx >> 4u;
361 
362  uint32 word = stream[ word_idx ];
363  const uint32 symbol_offset = BIG_ENDIAN_T ? (30u - (uint32(sym_idx & 15u) << 1)) : uint32((sym_idx & 15u) << 1);
364  const uint32 symbol = uint32(sym & SYMBOL_MASK) << symbol_offset;
365 
366  // clear all bits
367  word &= ~(SYMBOL_MASK << symbol_offset);
368 
369  // set bits
370  stream[ word_idx ] = word | symbol;
371  }
372 };
373 template <bool BIG_ENDIAN_T, typename Symbol, typename InputStream, typename IndexType>
374 struct packer<BIG_ENDIAN_T,4u,Symbol,InputStream,IndexType,uint32>
375 {
376  static NVBIO_FORCEINLINE NVBIO_HOST_DEVICE Symbol get_symbol(InputStream stream, const IndexType sym_idx)
377  {
378  const uint32 SYMBOL_MASK = 15u;
379 
380  typedef typename unsigned_type<IndexType>::type index_type;
381 
382  const index_type word_idx = sym_idx >> 3u;
383 
384  const uint32 word = stream[ word_idx ];
385  const uint32 symbol_offset = BIG_ENDIAN_T ? (28u - (uint32(sym_idx & 7u) << 2)) : uint32((sym_idx & 7u) << 2);
386  const uint32 symbol = (word >> symbol_offset) & SYMBOL_MASK;
387 
388  return Symbol( symbol );
389  }
390 
391  static NVBIO_FORCEINLINE NVBIO_HOST_DEVICE void set_symbol(InputStream stream, const IndexType sym_idx, Symbol sym)
392  {
393  const uint32 SYMBOL_MASK = 15u;
394 
395  typedef typename unsigned_type<IndexType>::type index_type;
396 
397  const index_type word_idx = sym_idx >> 3u;
398 
399  uint32 word = stream[ word_idx ];
400  const uint32 symbol_offset = BIG_ENDIAN_T ? (28u - (uint32(sym_idx & 7u) << 2)) : uint32((sym_idx & 7u) << 2);
401  const uint32 symbol = uint32(sym & SYMBOL_MASK) << symbol_offset;
402 
403  // clear all bits
404  word &= ~(SYMBOL_MASK << symbol_offset);
405 
406  // set bits
407  stream[ word_idx ] = word | symbol;
408  }
409 };
410 
411 template <bool BIG_ENDIAN_T, typename Symbol, typename InputStream, typename IndexType>
412 struct packer<BIG_ENDIAN_T,2u,Symbol,InputStream,IndexType,uint4>
413 {
414  static NVBIO_FORCEINLINE NVBIO_HOST_DEVICE Symbol get_symbol(InputStream stream, const IndexType sym_idx)
415  {
416  const uint32 SYMBOL_MASK = 3u;
417 
418  typedef typename unsigned_type<IndexType>::type index_type;
419 
420  const index_type word_idx = sym_idx >> 6u;
421 
422  const uint4 word = stream[ word_idx ];
423  const uint32 symbol_comp = (sym_idx & 63u) >> 4u;
424  const uint32 symbol_offset = BIG_ENDIAN_T ? (30u - (uint32(sym_idx & 15u) << 1)) : uint32((sym_idx & 15u) << 1);
425  const uint32 symbol = (comp( word, symbol_comp ) >> symbol_offset) & SYMBOL_MASK;
426 
427  return Symbol( symbol );
428  }
429 
430  static NVBIO_FORCEINLINE NVBIO_HOST_DEVICE void set_symbol(InputStream stream, const IndexType sym_idx, Symbol sym)
431  {
432  const uint32 SYMBOL_MASK = 3u;
433 
434  typedef typename unsigned_type<IndexType>::type index_type;
435 
436  const index_type word_idx = sym_idx >> 6u;
437 
438  uint4 word = stream[ word_idx ];
439  const uint32 symbol_comp = (sym_idx & 63u) >> 4u;
440  const uint32 symbol_offset = BIG_ENDIAN_T ? (30u - (uint32(sym_idx & 15u) << 1)) : uint32((sym_idx & 15u) << 1);
441  const uint32 symbol = uint32(sym & SYMBOL_MASK) << symbol_offset;
442 
443  // clear all bits
444  select( word, symbol_comp ) &= ~(SYMBOL_MASK << symbol_offset);
445  select( word, symbol_comp ) |= symbol;
446 
447  // set bits
448  stream[ word_idx ] = word;
449  }
450 };
451 template <bool BIG_ENDIAN_T, typename Symbol, typename InputStream, typename IndexType>
452 struct packer<BIG_ENDIAN_T,4u,Symbol,InputStream,IndexType,uint4>
453 {
454  static NVBIO_FORCEINLINE NVBIO_HOST_DEVICE Symbol get_symbol(InputStream stream, const IndexType sym_idx)
455  {
456  const uint32 SYMBOL_MASK = 15u;
457 
458  typedef typename unsigned_type<IndexType>::type index_type;
459 
460  const index_type word_idx = sym_idx >> 5u;
461 
462  const uint4 word = stream[ word_idx ];
463  const uint32 symbol_comp = (sym_idx & 31u) >> 3u;
464  const uint32 symbol_offset = BIG_ENDIAN_T ? (28u - (uint32(sym_idx & 7u) << 2)) : uint32((sym_idx & 7u) << 2);
465  const uint32 symbol = (comp( word, symbol_comp ) >> symbol_offset) & SYMBOL_MASK;
466 
467  return Symbol( symbol );
468  }
469 
470  static NVBIO_FORCEINLINE NVBIO_HOST_DEVICE void set_symbol(InputStream stream, const IndexType sym_idx, Symbol sym)
471  {
472  const uint32 SYMBOL_MASK = 15u;
473 
474  typedef typename unsigned_type<IndexType>::type index_type;
475 
476  const index_type word_idx = sym_idx >> 5u;
477 
478  uint4 word = stream[ word_idx ];
479  const uint32 symbol_comp = (sym_idx & 31u) >> 3u;
480  const uint32 symbol_offset = BIG_ENDIAN_T ? (28u - (uint32(sym_idx & 7u) << 2)) : uint32((sym_idx & 7u) << 2);
481  const uint32 symbol = uint32(sym & SYMBOL_MASK) << symbol_offset;
482 
483  // clear all bits
484  select( word, symbol_comp ) &= ~(SYMBOL_MASK << symbol_offset);
485  select( word, symbol_comp ) |= symbol;
486 
487  // set bits
488  stream[ word_idx ] = word;
489  }
490 };
491 template <bool BIG_ENDIAN_T, typename Symbol, typename InputStream, typename IndexType>
492 struct packer<BIG_ENDIAN_T,8u,Symbol,InputStream,IndexType,uint4>
493 {
494  static NVBIO_FORCEINLINE NVBIO_HOST_DEVICE Symbol get_symbol(InputStream stream, const IndexType sym_idx)
495  {
496  const uint32 SYMBOL_MASK = 255u;
497 
498  typedef typename unsigned_type<IndexType>::type index_type;
499 
500  const index_type word_idx = sym_idx >> 4u;
501 
502  const uint4 word = stream[ word_idx ];
503  const uint32 symbol_comp = (sym_idx & 15u) >> 2u;
504  const uint32 symbol_offset = BIG_ENDIAN_T ? (24u - (uint32(sym_idx & 3u) << 3)) : uint32((sym_idx & 3u) << 3);
505  const uint32 symbol = (comp( word, symbol_comp ) >> symbol_offset) & SYMBOL_MASK;
506 
507  return Symbol( symbol );
508  }
509 
510  static NVBIO_FORCEINLINE NVBIO_HOST_DEVICE void set_symbol(InputStream stream, const IndexType sym_idx, Symbol sym)
511  {
512  const uint32 SYMBOL_MASK = 255u;
513 
514  typedef typename unsigned_type<IndexType>::type index_type;
515 
516  const index_type word_idx = sym_idx >> 4u;
517 
518  uint4 word = stream[ word_idx ];
519  const uint32 symbol_comp = (sym_idx & 15u) >> 2u;
520  const uint32 symbol_offset = BIG_ENDIAN_T ? (24u - (uint32(sym_idx & 3u) << 3)) : uint32((sym_idx & 3u) << 3);
521  const uint32 symbol = uint32(sym & SYMBOL_MASK) << symbol_offset;
522 
523  // clear all bits
524  select( word, symbol_comp ) &= ~(SYMBOL_MASK << symbol_offset);
525  select( word, symbol_comp ) |= symbol;
526 
527  // set bits
528  stream[ word_idx ] = word;
529  }
530 };
531 
532 template <bool BIG_ENDIAN_T, typename Symbol, typename InputStream, typename IndexType>
533 struct packer<BIG_ENDIAN_T,2u,Symbol,InputStream,IndexType,uint64>
534 {
535  static NVBIO_FORCEINLINE NVBIO_HOST_DEVICE Symbol get_symbol(InputStream stream, const IndexType sym_idx)
536  {
537  const uint32 SYMBOL_MASK = 3u;
538 
539  typedef typename unsigned_type<IndexType>::type index_type;
540 
541  const index_type word_idx = sym_idx >> 5u;
542 
543  const uint64 word = stream[ word_idx ];
544  const uint32 symbol_offset = BIG_ENDIAN_T ? (62u - (uint32(sym_idx & 31u) << 1)) : uint32((sym_idx & 31u) << 1);
545  const uint64 symbol = (word >> symbol_offset) & SYMBOL_MASK;
546 
547  return Symbol( symbol );
548  }
549 
550  static NVBIO_FORCEINLINE NVBIO_HOST_DEVICE void set_symbol(InputStream stream, const IndexType sym_idx, Symbol sym)
551  {
552  const uint32 SYMBOL_MASK = 3u;
553 
554  typedef typename unsigned_type<IndexType>::type index_type;
555 
556  const index_type word_idx = sym_idx >> 5u;
557 
558  uint64 word = stream[ word_idx ];
559  const uint32 symbol_offset = BIG_ENDIAN_T ? (62u - (uint32(sym_idx & 31u) << 1)) : uint32((sym_idx & 31u) << 1);
560  const uint64 symbol = uint64(sym & SYMBOL_MASK) << symbol_offset;
561 
562  // clear all bits
563  word &= ~(uint64(SYMBOL_MASK) << symbol_offset);
564 
565  // set bits
566  stream[ word_idx ] = word | symbol;
567  }
568 };
569 template <bool BIG_ENDIAN_T, typename Symbol, typename InputStream, typename IndexType>
570 struct packer<BIG_ENDIAN_T,4u,Symbol,InputStream,IndexType,uint64>
571 {
572  static NVBIO_FORCEINLINE NVBIO_HOST_DEVICE Symbol get_symbol(InputStream stream, const IndexType sym_idx)
573  {
574  const uint32 SYMBOL_MASK = 15u;
575 
576  typedef typename unsigned_type<IndexType>::type index_type;
577 
578  const index_type word_idx = sym_idx >> 5u;
579 
580  const uint64 word = stream[ word_idx ];
581  const uint32 symbol_offset = BIG_ENDIAN_T ? (60u - (uint32(sym_idx & 15u) << 2)) : uint32((sym_idx & 15u) << 2);
582  const uint64 symbol = (word >> symbol_offset) & SYMBOL_MASK;
583 
584  return Symbol( symbol );
585  }
586 
587  static NVBIO_FORCEINLINE NVBIO_HOST_DEVICE void set_symbol(InputStream stream, const IndexType sym_idx, Symbol sym)
588  {
589  const uint32 SYMBOL_MASK = 15u;
590 
591  typedef typename unsigned_type<IndexType>::type index_type;
592 
593  const index_type word_idx = sym_idx >> 5u;
594 
595  uint64 word = stream[ word_idx ];
596  const uint32 symbol_offset = BIG_ENDIAN_T ? (60u - (uint32(sym_idx & 15u) << 2)) : uint32((sym_idx & 15u) << 2);
597  const uint64 symbol = uint32(sym & SYMBOL_MASK) << symbol_offset;
598 
599  // clear all bits
600  word &= ~(SYMBOL_MASK << symbol_offset);
601 
602  // set bits
603  stream[ word_idx ] = word | symbol;
604  }
605 };
606 
607 
608 template <typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
610 {
612 }
613 template <typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
615 {
617 }
618 
619 // pre-increment operator
620 //
621 template <typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
625 {
626  ++m_index;
627  return *this;
628 }
629 
630 // post-increment operator
631 //
632 template <typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
636 {
637  This r( m_stream, m_index );
638  ++m_index;
639  return r;
640 }
641 
642 // pre-decrement operator
643 //
644 template <typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
648 {
649  --m_index;
650  return *this;
651 }
652 
653 // post-decrement operator
654 //
655 template <typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
659 {
660  This r( m_stream, m_index );
661  --m_index;
662  return r;
663 }
664 
665 // add offset
666 //
667 template <typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
671 {
672  m_index += distance;
673  return *this;
674 }
675 
676 // subtract offset
677 //
678 template <typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
682 {
683  m_index -= distance;
684  return *this;
685 }
686 
687 // add offset
688 //
689 template <typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
693 {
694  return This( m_stream, m_index + distance );
695 }
696 
697 // subtract offset
698 //
699 template <typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
703 {
704  return This( m_stream, m_index - distance );
705 }
706 
707 // difference
708 //
709 template <typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
713 {
714  return sindex_type( m_index - it.m_index );
715 }
716 
717 // assignment operator
718 //
719 template <typename Stream>
721 {
722  return (*this = Symbol( ref ));
723 }
724 
725 // assignment operator
726 //
727 template <typename Stream>
729 {
730  m_stream.set( s );
731  return *this;
732 }
733 
734 // conversion operator
735 //
736 template <typename Stream>
738 {
739  return m_stream.get();
740 }
741 
744 template <typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
748 {
749  return it1.index() < it2.index();
750 }
751 
754 template <typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
758 {
759  return it1.index() > it2.index();
760 }
761 
764 template <typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
768 {
769  return it1.stream() == it2.stream() && it1.index() == it2.index();
770 }
771 
774 template <typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
778 {
779  return it1.stream() != it2.stream() || it1.index() != it2.index();
780 }
781 
782 template <bool BIG_ENDIAN, uint32 SYMBOL_SIZE, typename Symbol, typename InputStream, typename IndexType, typename ValueType>
784 {
786 
787  static const uint32 SYMBOL_COUNT = 1u << SYMBOL_SIZE;
788  static const uint32 SYMBOL_MASK = SYMBOL_COUNT - 1u;
789  static const uint32 WORD_SIZE = 8u * uint32( sizeof(ValueType) );
790  static const uint32 SYMBOLS_PER_WORD = WORD_SIZE / SYMBOL_SIZE;
791 
793  static void rebase(forward_stream_type& it)
794  {
795  const uint32 symbol_idx = it.m_index & (SYMBOLS_PER_WORD-1);
796 
798  it.m_word_offset = BIG_ENDIAN ? (WORD_SIZE - SYMBOL_SIZE - symbol_idx * SYMBOL_SIZE) : symbol_idx * SYMBOL_SIZE;
799 
800  it.m_word = it.m_stream[ it.m_word_index ];
801  }
802 
804  static void next(forward_stream_type& it)
805  {
806  it.m_index++;
807 
808  if (BIG_ENDIAN)
809  {
810  if (it.m_word_offset > 0)
811  it.m_word_offset -= SYMBOL_SIZE;
812  else
813  {
814  // need a new word
815  ++it.m_word_index;
816 
817  it.m_word = it.m_stream[ it.m_word_index ];
818  it.m_word_offset = WORD_SIZE - SYMBOL_SIZE;
819  }
820  }
821  else
822  {
823  if (it.m_word_offset < WORD_SIZE - SYMBOL_SIZE)
824  it.m_word_offset += SYMBOL_SIZE;
825  else
826  {
827  // need a new word
828  ++it.m_word_index;
829 
830  it.m_word = it.m_stream[ it.m_word_index ];
831  it.m_word_offset = 0;
832  }
833  }
834  }
836  static void prev(forward_stream_type& it)
837  {
838  it.m_index--;
839 
840  if (BIG_ENDIAN)
841  {
842  if (it.m_word_offset < WORD_SIZE - SYMBOL_SIZE)
843  it.m_word_offset += SYMBOL_SIZE;
844  else
845  {
846  // need a new word
847  --it.m_word_index;
848 
849  it.m_word = it.m_stream[ it.m_word_index ];
850  it.m_word_offset = 0u;
851  }
852  }
853  else
854  {
855  if (it.m_word_offset > 0)
856  it.m_word_offset -= SYMBOL_SIZE;
857  else
858  {
859  // need a new word
860  --it.m_word_index;
861 
862  it.m_word = it.m_stream[ it.m_word_index ];
863  it.m_word_offset = WORD_SIZE - SYMBOL_SIZE;
864  }
865  }
866  }
868  static Symbol fetch(const forward_stream_type& it)
869  {
870  return Symbol( (it.m_word >> it.m_word_offset) & SYMBOL_MASK );
871  }
872 };
873 
874 template <bool BIG_ENDIAN, uint32 SYMBOL_SIZE, typename Symbol, typename InputStream, typename IndexType>
875 struct forward_packer<BIG_ENDIAN, SYMBOL_SIZE, Symbol, InputStream, IndexType, uint4>
876 {
878 
879  static const uint32 SYMBOL_COUNT = 1u << SYMBOL_SIZE;
880  static const uint32 SYMBOL_MASK = SYMBOL_COUNT - 1u;
881  static const uint32 WORD_SIZE = 128;
882  static const uint32 SUBWORD_SIZE = 32;
883  static const uint32 SYMBOLS_PER_WORD = WORD_SIZE / SYMBOL_SIZE;
884  static const uint32 SYMBOLS_PER_SUBWORD = SUBWORD_SIZE / SYMBOL_SIZE;
885 
887  static void rebase(forward_stream_type& it)
888  {
889  const uint32 symbol_idx = it.m_index & (SYMBOLS_PER_WORD-1);
890 
892  it.m_word_offset = it.m_index & (SYMBOLS_PER_WORD-1);
893 
894  it.m_word = it.m_stream[ it.m_word_index ];
895  }
896 
898  static void next(forward_stream_type& it)
899  {
900  it.m_index++;
901 
902  if (it.m_word_offset < SYMBOLS_PER_WORD-1)
903  it.m_word_offset++;
904  else
905  {
906  // need a new word
907  ++it.m_word_index;
908 
909  it.m_word = it.m_stream[ it.m_word_index ];
910  it.m_word_offset = 0;
911  }
912  }
914  static void prev(forward_stream_type& it)
915  {
916  it.m_index--;
917 
918  if (it.m_word_offset > 0)
919  it.m_word_offset--;
920  else
921  {
922  // need a new word
923  --it.m_word_index;
924 
925  it.m_word = it.m_stream[ it.m_word_index ];
927  }
928  }
930  static Symbol fetch(const forward_stream_type& it)
931  {
932  const uint32 word_comp = comp( it.m_word, it.m_word_offset / SYMBOLS_PER_SUBWORD );
933  const uint32 word_mod = it.m_word_offset & (SYMBOLS_PER_SUBWORD-1);
934 
935  const uint32 word_offset = BIG_ENDIAN ? (SUBWORD_SIZE - SYMBOL_SIZE - word_mod * SYMBOL_SIZE) :
936  (word_mod * SYMBOL_SIZE);
937 
938  return Symbol( (word_comp >> word_offset) & SYMBOL_MASK );
939  }
940 };
941 
942 template <typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
945 {
947 }
948 
949 // rebase the iterator
950 //
951 template <typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
954 {
956 }
957 
958 // pre-increment operator
959 //
960 template <typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
964 {
966  return *this;
967 }
968 
969 // post-increment operator
970 //
971 template <typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
975 {
976  This r( m_stream, m_index );
978  return r;
979 }
980 
981 // pre-decrement operator
982 //
983 template <typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
987 {
989  return *this;
990 }
991 
992 // post-decrement operator
993 //
994 template <typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
998 {
999  This r( m_stream, m_index );
1001  return r;
1002 }
1003 
1004 
1005 // add offset
1006 //
1007 template <typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
1011 {
1012  m_index += distance;
1013  rebase();
1014  return *this;
1015 }
1016 
1017 // subtract offset
1018 //
1019 template <typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
1023 {
1024  m_index -= distance;
1025  rebase();
1026  return *this;
1027 }
1028 
1029 // add offset
1030 //
1031 template <typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
1035 {
1036  return This( m_stream, m_index + distance );
1037 }
1038 
1039 // subtract offset
1040 //
1041 template <typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
1045 {
1046  return This( m_stream, m_index - distance );
1047 }
1048 
1049 // difference
1050 //
1051 template <typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
1055 {
1056  return sindex_type( m_index - it.m_index );
1057 }
1058 
1061 template <typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
1065 {
1066  return it1.index() < it2.index();
1067 }
1068 
1071 template <typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
1075 {
1076  return it1.index() > it2.index();
1077 }
1078 
1081 template <typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
1085 {
1086  return it1.stream() == it2.stream() && it1.index() == it2.index();
1087 }
1088 
1091 template <typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
1095 {
1096  return it1.stream() != it2.stream() || it1.index() != it2.index();
1097 }
1098 
1099 namespace priv {
1100 
1101 // assign a sequence to a packed stream
1102 //
1103 template <typename InputIterator, typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
1106  const IndexType input_len,
1107  InputIterator input_string,
1109 {
1111  typedef typename packed_stream_type::storage_type word_type;
1112 
1113  const uint32 WORD_SIZE = uint32( 8u * sizeof(word_type) );
1114 
1115  const bool BIG_ENDIAN = BIG_ENDIAN_T;
1116  const uint32 SYMBOL_SIZE = SYMBOL_SIZE_T;
1117  const uint32 SYMBOLS_PER_WORD = WORD_SIZE / SYMBOL_SIZE;
1118  const uint32 SYMBOL_COUNT = 1u << SYMBOL_SIZE;
1119  const uint32 SYMBOL_MASK = SYMBOL_COUNT - 1u;
1120 
1121  InputStream words = packed_string.stream();
1122 
1123  const IndexType stream_offset = packed_string.index();
1124  const uint32 word_offset = stream_offset & (SYMBOLS_PER_WORD-1);
1125  uint32 word_rem = 0;
1126 
1127  if (word_offset)
1128  {
1129  // compute how many symbols we still need to encode to fill the current word
1130  word_rem = SYMBOLS_PER_WORD - word_offset;
1131 
1132  // fetch the word in question
1133  word_type word = words[ stream_offset / SYMBOLS_PER_WORD ];
1134 
1135  // loop through the word's bp's
1136  for (uint32 i = 0; i < word_rem; ++i)
1137  {
1138  // fetch the bp
1139  const uint8 bp = input_string[i] & SYMBOL_MASK;
1140 
1141  const uint32 bit_idx = (word_offset + i) * SYMBOL_SIZE;
1142  const uint32 symbol_offset = BIG_ENDIAN ? (WORD_SIZE - SYMBOL_SIZE - bit_idx) : bit_idx;
1143  const word_type symbol = word_type(bp) << symbol_offset;
1144 
1145  // clear all bits
1146  word &= ~(word_type(SYMBOL_MASK) << symbol_offset);
1147 
1148  // set bits
1149  word |= symbol;
1150  }
1151 
1152  // write out the word
1153  words[ stream_offset / SYMBOLS_PER_WORD ] = word;
1154  }
1155 
1156  #if defined(_OPENMP) && !defined(NVBIO_DEVICE_COMPILATION)
1157  // we use this solution because the 'if' clause in the 'pragma omp for' results in 30% slowdown
1158  // when the if is not taken and the loop is executed serially
1159  if (input_len > 1000000)
1160  {
1161  #pragma omp parallel for
1162  for (int64 i = word_rem; i < int64( input_len ); i += SYMBOLS_PER_WORD)
1163  {
1164  // encode a word's worth of characters
1165  word_type word = 0u;
1166 
1167  const uint32 n_symbols = nvbio::min( SYMBOLS_PER_WORD, uint32( input_len - IndexType(i) ) );
1168 
1169  // loop through the word's bp's
1170  for (uint32 j = 0; j < SYMBOLS_PER_WORD; ++j)
1171  {
1172  if (j < n_symbols)
1173  {
1174  // fetch the bp
1175  const uint8 bp = input_string[IndexType(i) + j] & SYMBOL_MASK;
1176 
1177  const uint32 bit_idx = j * SYMBOL_SIZE;
1178  const uint32 symbol_offset = BIG_ENDIAN ? (WORD_SIZE - SYMBOL_SIZE - bit_idx) : bit_idx;
1179  const word_type symbol = word_type(bp) << symbol_offset;
1180 
1181  // set bits
1182  word |= symbol;
1183  }
1184  }
1185 
1186  // write out the word
1187  const uint32 word_idx = uint32( (stream_offset + IndexType(i)) / SYMBOLS_PER_WORD );
1188 
1189  words[ word_idx ] = word;
1190  }
1191  }
1192  else
1193  #endif
1194  {
1195  for (IndexType i = word_rem; i < input_len; i += SYMBOLS_PER_WORD)
1196  {
1197  // encode a word's worth of characters
1198  word_type word = 0u;
1199 
1200  const uint32 n_symbols = nvbio::min( SYMBOLS_PER_WORD, uint32( input_len - IndexType(i) ) );
1201 
1202  // get the offset to the first symbol
1203  uint32 symbol_offset = BIG_ENDIAN ? WORD_SIZE - SYMBOL_SIZE : 0u;
1204 
1205  // loop through the word's bp's
1206  for (uint32 j = 0; j < SYMBOLS_PER_WORD; ++j)
1207  {
1208  if (j < n_symbols)
1209  {
1210  // fetch the bp
1211  const uint8 bp = input_string[IndexType(i) + j] & SYMBOL_MASK;
1212 
1213  //const uint32 bit_idx = j * SYMBOL_SIZE;
1214  //const uint32 symbol_offset = BIG_ENDIAN ? (WORD_SIZE - SYMBOL_SIZE - bit_idx) : bit_idx;
1215  const word_type symbol = word_type(bp) << symbol_offset;
1216 
1217  // set bits
1218  word |= symbol;
1219 
1220  // move the offset
1221  if (BIG_ENDIAN) symbol_offset -= SYMBOL_SIZE;
1222  else symbol_offset += SYMBOL_SIZE;
1223  }
1224  }
1225 
1226  // write out the word
1227  const uint32 word_idx = uint32( (stream_offset + IndexType(i)) / SYMBOLS_PER_WORD );
1228 
1229  words[ word_idx ] = word;
1230  }
1231  }
1232 }
1233 
1234 } // namespace priv
1235 
1236 #if defined(__CUDACC__)
1237 
1238 namespace priv {
1239 
1240 template <typename InputIterator, typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
1241 __global__
1242 void assign_kernel(
1243  const IndexType input_len,
1244  const InputIterator input_string,
1245  PackedStream<InputStream,Symbol,SYMBOL_SIZE_T,BIG_ENDIAN_T,IndexType> packed_string)
1246 {
1247  typedef PackedStream<InputStream,Symbol,SYMBOL_SIZE_T,BIG_ENDIAN_T,IndexType> packed_stream_type;
1248  typedef typename packed_stream_type::storage_type word_type;
1249 
1250  const uint32 WORD_SIZE = uint32( 8u * sizeof(word_type) );
1251 
1252  const bool BIG_ENDIAN = BIG_ENDIAN_T;
1253  const uint32 SYMBOL_SIZE = SYMBOL_SIZE_T;
1254  const uint32 SYMBOLS_PER_WORD = WORD_SIZE / SYMBOL_SIZE;
1255  const uint32 SYMBOL_COUNT = 1u << SYMBOL_SIZE;
1256  const uint32 SYMBOL_MASK = SYMBOL_COUNT - 1u;
1257 
1258  const IndexType stream_offset = packed_string.index(); // stream offset, in symbols
1259  const uint32 word_offset = stream_offset & (SYMBOLS_PER_WORD-1); // offset within the first word
1260  const uint32 word_rem = SYMBOLS_PER_WORD - word_offset; // # of remaining symbols to fill the first word
1261 
1262  InputStream words = packed_string.stream();
1263 
1264  const uint32 thread_id = threadIdx.x + blockIdx.x * blockDim.x;
1265 
1266  if (thread_id == 0)
1267  {
1268  // fetch the word in question
1269  word_type word = words[ stream_offset / SYMBOLS_PER_WORD ];
1270 
1271  // loop through the word's bp's
1272  for (uint32 i = 0; i < word_rem; ++i)
1273  {
1274  // fetch the bp
1275  const uint8 bp = input_string[i] & SYMBOL_MASK;
1276 
1277  const uint32 bit_idx = (word_offset + i) * SYMBOL_SIZE;
1278  const uint32 symbol_offset = BIG_ENDIAN ? (WORD_SIZE - SYMBOL_SIZE - bit_idx) : bit_idx;
1279  const word_type symbol = word_type(bp) << symbol_offset;
1280 
1281  // clear all bits
1282  word &= ~(uint64(SYMBOL_MASK) << symbol_offset);
1283 
1284  // set bits
1285  word |= symbol;
1286  }
1287 
1288  // write out the word
1289  words[ stream_offset / SYMBOLS_PER_WORD ] = word;
1290  }
1291  else
1292  {
1293  // check whether this thread should do something
1294  if (word_rem + (thread_id - 1u) * SYMBOLS_PER_WORD >= input_len)
1295  return;
1296 
1297  const uint32 i = word_rem + (thread_id - 1u) * SYMBOLS_PER_WORD;
1298 
1299  // encode a word's worth of characters
1300  word_type word = 0u;
1301 
1302  const uint32 n_symbols = nvbio::min( SYMBOLS_PER_WORD, uint32( input_len - IndexType(i) ) );
1303 
1304  // loop through the word's bp's
1305  for (uint32 j = 0; j < SYMBOLS_PER_WORD; ++j)
1306  {
1307  if (j < n_symbols)
1308  {
1309  // fetch the bp
1310  const uint8 bp = input_string[IndexType(i) + j] & SYMBOL_MASK;
1311 
1312  const uint32 bit_idx = j * SYMBOL_SIZE;
1313  const uint32 symbol_offset = BIG_ENDIAN ? (WORD_SIZE - SYMBOL_SIZE - bit_idx) : bit_idx;
1314  const word_type symbol = word_type(bp) << symbol_offset;
1315 
1316  // set bits
1317  word |= symbol;
1318  }
1319  }
1320 
1321  // write out the word
1322  const uint32 word_idx = uint32( (stream_offset + IndexType(i)) / SYMBOLS_PER_WORD );
1323 
1324  words[ word_idx ] = word;
1325  }
1326 }
1327 
1328 // assign a sequence to a packed stream
1329 // NOTE: this is a host ONLY function - marking it as host/device would cause compiler misbehaviours
1330 //
1331 template <typename InputIterator, typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
1332 void device_assign(
1333  const IndexType input_len,
1334  const InputIterator input_string,
1335  PackedStream<InputStream,Symbol,SYMBOL_SIZE_T,BIG_ENDIAN_T,IndexType> packed_string)
1336 {
1337  if (input_len == 0)
1338  return;
1339 
1340  typedef PackedStream<InputStream,Symbol,SYMBOL_SIZE_T,BIG_ENDIAN_T,IndexType> packed_stream_type;
1341  typedef typename packed_stream_type::storage_type word_type;
1342 
1343  const uint32 WORD_SIZE = uint32( 8u * sizeof(word_type) );
1344 
1345  const uint32 SYMBOL_SIZE = SYMBOL_SIZE_T;
1346  const uint32 SYMBOLS_PER_WORD = WORD_SIZE / SYMBOL_SIZE;
1347 
1348  const IndexType stream_offset = packed_string.index(); // stream offset, in symbols
1349 
1350  const uint32 word_begin = util::divide_rz( stream_offset, SYMBOLS_PER_WORD );
1351  const uint32 word_end = util::divide_ri( stream_offset + input_len, SYMBOLS_PER_WORD );
1352 
1353  const uint32 n_words = word_end - word_begin;
1354 
1355  const uint32 blockdim = 128u;
1356  const uint32 n_blocks = util::divide_ri( n_words, blockdim );
1357 
1358  priv::assign_kernel<<<n_blocks,blockdim>>>( input_len, input_string, packed_string );
1359  cuda::check_error("assign_kernel()");
1360 }
1361 
1362 } // namespace priv
1363 
1364 // assign a sequence to a packed stream
1365 //
1366 template <typename InputIterator, typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
1368 void assign(
1369  const device_tag tag,
1370  const IndexType input_len,
1371  const InputIterator input_string,
1372  PackedStream<InputStream,Symbol,SYMBOL_SIZE_T,BIG_ENDIAN_T,IndexType> packed_string)
1373 {
1374  #if !defined(NVBIO_DEVICE_COMPILATION)
1375  //
1376  // this function is being called on the host: spawn a kernel
1377  //
1378 
1379  priv::device_assign( input_len, input_string, packed_string );
1380  #else
1381  //
1382  // this function is being called on the device: call the serial implementation
1383  //
1384 
1385  priv::serial_assign( input_len, input_string, packed_string );
1386  #endif
1387 }
1388 
1389 #endif // defined(__CUDACC__)
1390 
1391 // assign a sequence to a packed stream
1392 //
1393 template <typename InputIterator, typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
1395 void assign(
1396  const host_tag tag,
1397  const IndexType input_len,
1398  const InputIterator input_string,
1400 {
1401  priv::serial_assign( input_len, input_string, packed_string );
1402 }
1403 
1404 // assign a sequence to a packed stream
1405 //
1406 template <typename InputIterator, typename InputStream, typename Symbol, uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename IndexType>
1408 void assign(
1409  const IndexType input_len,
1410  const InputIterator input_string,
1412 {
1413  // find the system tag of the output packed stream
1414  typedef typename iterator_system<InputStream>::type system_tag;
1415 
1416  // and chose which function to call based on it
1417  assign( system_tag(), input_len, input_string, packed_string );
1418 }
1419 
1420 //
1421 // A utility function to transpose a set of packed input streams:
1422 // the symbols of the i-th input stream is supposed to be stored contiguously in the range [offset(i), offset + N(i)]
1423 // the *words* of i-th output stream will be stored in strided fashion at out_stream[tid, tid + (N(i)+symbols_per_word-1/symbols_per_word) * stride]
1424 //
1425 // \param stride output stride
1426 // \param N length of this thread's string in the input stream
1427 // \param in_offset offset of this thread's string in the input stream
1428 // \param in_stream input stream
1429 // \param out_stream output stream
1430 //
1431 template <uint32 BLOCKDIM, uint32 SYMBOL_SIZE, bool BIG_ENDIAN, typename InStreamIterator, typename OutStreamIterator>
1433 void transpose_packed_streams(const uint32 stride, const uint32 N, const uint32 in_offset, const InStreamIterator in_stream, OutStreamIterator out_stream)
1434 {
1435  typedef typename std::iterator_traits<InStreamIterator>::value_type word_type;
1436 
1437  const uint32 SYMBOLS_PER_WORD = (sizeof(word_type)*8) / SYMBOL_SIZE;
1438  uint32 word_offset = in_offset & (SYMBOLS_PER_WORD-1);
1439  uint32 begin_word = in_offset / SYMBOLS_PER_WORD;
1440  uint32 end_word = (in_offset + N + SYMBOLS_PER_WORD-1) / SYMBOLS_PER_WORD;
1441 
1442  // write out the output symbols
1443  const uint32 N_words = (N + SYMBOLS_PER_WORD-1) / SYMBOLS_PER_WORD;
1444  word_type cur_word = in_stream[begin_word+0];
1445  for (uint32 w = 0; w < N_words; ++w)
1446  {
1447  if (BIG_ENDIAN == false)
1448  {
1449  // fill the first part of the output word
1450  word_type out_word = cur_word >> (word_offset*SYMBOL_SIZE);
1451 
1452  // fetch the next word
1453  cur_word = begin_word+w+1 < end_word ? in_stream[begin_word+w+1] : 0u;
1454 
1455  // fill the second part of the output word
1456  if (word_offset)
1457  out_word |= cur_word << ((SYMBOLS_PER_WORD - word_offset)*SYMBOL_SIZE);
1458 
1459  out_stream[ stride*w ] = out_word;
1460  }
1461  else
1462  {
1463  // fill the first part of the output word
1464  word_type out_word = cur_word << (word_offset*SYMBOL_SIZE);
1465 
1466  // fetch the next word
1467  cur_word = begin_word+w+1 < end_word ? in_stream[begin_word+w+1] : 0u;
1468 
1469  // fill the second part of the output word
1470  if (word_offset)
1471  out_word |= cur_word >> ((SYMBOLS_PER_WORD - word_offset)*SYMBOL_SIZE);
1472 
1473  out_stream[ stride*w ] = out_word;
1474  }
1475  }
1476 }
1477 
1478 } // namespace nvbio