33 #if defined(__CUDACC__)
36 #include <thrust/device_vector.h>
37 #include <thrust/scan.h>
38 #if THRUST_VERSION >= 100700
39 #include <thrust/execution_policy.h>
42 #endif // defined(__CUDACC__)
46 #if defined(__CUDACC__)
58 void vector_init_kernel(
74 typename OutLengthIterator>
76 void generic_string_lengths_kernel(
78 const InStringSet in_set,
79 OutLengthIterator out_lengths)
85 out_lengths[ tid ] = tid < N_strings ? in_set[tid].size() : 0u;
94 typename OutStringIterator,
95 typename OutOffsetIterator>
97 void generic_to_concat_kernel(
100 OutStringIterator out_string,
101 OutOffsetIterator out_offsets)
104 if (tid >= N_strings)
107 typename InStringSet::string_type in_string = in_set[tid];
111 const uint32 offset = out_offsets[tid];
113 out_string[offset + j] = in_string[j];
122 typename InStringSet,
123 typename OutStringIterator,
124 typename OutOffsetIterator>
126 void contig_to_concat_kernel(
129 OutStringIterator out_string,
130 OutOffsetIterator out_offsets)
134 if (wid >= N_strings)
137 typename InStringSet::string_type in_string = in_set[wid];
139 const uint32 length = in_string.size();
141 const uint32 offset = out_offsets[wid];
144 out_string[offset + j] = in_string[j];
152 typename InputIterator,
153 typename OutputIterator>
155 void generic_vector_copy_kernel(
158 OutputIterator output)
162 output[tid] = input[tid];
174 typename InStringSet,
175 typename OutStreamIterator,
176 typename OutOffsetIterator>
178 void generic_to_packed_concat_kernel(
182 OutStreamIterator out_stream,
183 OutOffsetIterator out_offsets)
186 const uint32 WORDS_PER_THREAD = 4u;
189 if (tid * WORDS_PER_THREAD >= N_words)
195 const uint32 SYMBOLS_PER_WORD = (8u*
sizeof(
uint32)) / SYMBOL_SIZE;
200 uint32 global_symbol = SYMBOLS_PER_WORD * WORDS_PER_THREAD * tid;
205 typename InStringSet::string_type in_string = in_set[
string_id];
207 const uint32 wid = warp_id();
208 const uint32 wtid = warp_tid();
211 volatile uint32* warp_sm = sm + wid * WARP_SIZE * WORDS_PER_THREAD;
213 for (
uint32 w = 0; w < WORDS_PER_THREAD && string_id < N_strings; ++w)
218 for (
uint32 s = 0; s < SYMBOLS_PER_WORD; ++s, ++global_symbol, ++local_symbol)
221 while (local_symbol >= in_string.size())
226 if (string_id == N_strings)
230 const uint8 in_c = in_string[ local_symbol ];
233 word |= (in_c << (s*SYMBOL_SIZE));
237 warp_sm[ wtid*WORDS_PER_THREAD + w ] = word;
242 WORDS_PER_THREAD * WARP_SIZE * wid;
244 for (
uint32 t = 0; t < WARP_SIZE; ++t)
246 if (wtid < WORDS_PER_THREAD)
247 out_stream[ base_offset + t*WORDS_PER_THREAD + wtid ] =
248 warp_sm[ t*WORDS_PER_THREAD + wtid ];
259 typename InStringSet,
260 typename OutStreamIterator,
261 typename OutOffsetIterator>
263 void concat_to_packed_concat_kernel(
267 OutStreamIterator out_stream,
268 OutOffsetIterator out_offsets)
277 if (tid <= N_strings)
278 out_offsets[tid] = in_set.offsets()[tid];
280 const uint32 SYMBOLS_PER_WORD = (8u*
sizeof(
uint32)) / SYMBOL_SIZE;
281 if (tid * SYMBOLS_PER_WORD >= N_symbols)
285 typename InStringSet::symbol_iterator in_symbols = in_set.base_string();
289 for (
uint32 s = 0, in_s = tid * SYMBOLS_PER_WORD; s < SYMBOLS_PER_WORD; ++s, ++in_s)
291 const uint8 in_c = in_s < N_symbols ? in_symbols[ in_s ] : 0u;
292 word |= in_c << (s*SYMBOL_SIZE);
296 out_stream[ tid ] = word;
304 typename InStringSet,
305 typename OutStringIterator,
306 typename OutLengthIterator>
308 void generic_to_strided_kernel(
312 OutStringIterator out_stream,
313 OutLengthIterator out_lengths)
316 if (tid >= N_strings)
319 typename InStringSet::string_type in_string = in_set[tid];
321 const uint32 length = in_string.size();
323 typedef strided_iterator<OutStringIterator> strided_stream_type;
325 strided_stream_type out_string( out_stream + tid, out_stride );
328 out_string[j] = in_string[j];
331 out_lengths[tid] =
length;
341 typename InStreamIterator,
342 typename InOffsetIterator,
343 typename OutStringIterator,
344 typename OutLengthIterator>
346 void packed_concat_to_strided_kernel(
349 InStreamIterator in_stream,
350 InOffsetIterator in_offsets,
351 OutStringIterator out_stream,
352 OutLengthIterator out_lengths)
355 if (tid >= N_strings)
358 const uint32 in_offset = in_offsets[tid];
359 const uint32 N = in_offsets[tid+1] - in_offset;
362 typedef typename std::iterator_traits<InStreamIterator>::value_type word_type;
364 const uint32 SYMBOLS_PER_WORD = (
sizeof(word_type)*8) /
BITS;
365 uint32 begin_word = in_offset / SYMBOLS_PER_WORD;
366 uint32 end_word = (in_offset + N + SYMBOLS_PER_WORD-1) / SYMBOLS_PER_WORD;
367 uint32 word_offset = in_offset & (SYMBOLS_PER_WORD-1);
371 for (
uint32 word = begin_word; word < end_word; ++word)
372 lmem[word - begin_word] = in_stream[ word ];
374 typedef PackedStream<const_cached_iterator<const uint32*>,
uint8,
BITS,BIG_ENDIAN> const_stream_type;
375 const_stream_type clmem_stream( &lmem[0] );
378 for (
uint32 i = 0; i < N; ++i)
379 out_stream[ tid + out_stride*i ] = clmem_stream[i + word_offset];
382 typedef PackedStream<const_cached_iterator<InStreamIterator>,
uint8,
BITS,BIG_ENDIAN> const_stream_type;
383 const_stream_type cstream( in_stream );
385 for (
uint32 i = 0; i < N; ++i)
386 out_stream[ tid + out_stride*i ] = cstream[i + in_offset];
388 out_lengths[tid] = N;
398 typename InStreamIterator,
399 typename InRangeIterator,
400 typename OutStringIterator,
401 typename OutLengthIterator>
403 void packed_sparse_to_strided_kernel(
406 InStreamIterator in_stream,
407 InRangeIterator in_ranges,
408 OutStringIterator out_stream,
409 OutLengthIterator out_lengths)
412 if (tid >= N_strings)
415 const uint2 range = in_ranges[tid];
416 const uint32 in_offset = range.x;
417 const uint32 N = range.y - in_offset;
420 typedef typename std::iterator_traits<InStreamIterator>::value_type word_type;
422 const uint32 SYMBOLS_PER_WORD = (
sizeof(word_type)*8) /
BITS;
423 uint32 word_offset = in_offset & (SYMBOLS_PER_WORD-1);
424 uint32 begin_word = in_offset / SYMBOLS_PER_WORD;
425 uint32 end_word = (in_offset + N + SYMBOLS_PER_WORD-1) / SYMBOLS_PER_WORD;
428 for (
uint32 word = begin_word; word < end_word; ++word)
429 lmem[word - begin_word] = in_stream[ word ];
431 typedef PackedStream<const_cached_iterator<const word_type*>,
uint8,
BITS,BIG_ENDIAN> const_stream_type;
432 const_stream_type clmem_stream( &lmem[0] );
435 for (
uint32 i = 0; i < N; ++i)
436 out_stream[ tid + out_stride*i ] = clmem_stream[i + word_offset];
439 typedef PackedStream<const_cached_iterator<InStreamIterator>,
uint8,
BITS,BIG_ENDIAN> const_stream_type;
440 const_stream_type cstream( in_stream );
442 for (
uint32 i = 0; i < N; ++i)
443 out_stream[ tid + out_stride*i ] = cstream[i + in_offset];
445 out_lengths[tid] = N;
453 typename InStringSet,
454 typename OutLengthIterator>
456 void contig_to_strided_uint8_kernel(
461 OutLengthIterator out_lengths)
465 const uint32 length = tid < N_strings ? in_set[ tid ].size() : 0u;
468 const uint32 wid = warp_id();
469 const uint32 wtid = warp_tid();
471 __shared__
volatile uint8 sm[
BLOCKDIM * WARP_SIZE ];
472 volatile uint8* warp_sm = sm + wid * WARP_SIZE * WARP_SIZE;
476 for (
uint32 block = 0; __any( block < length ); block += WARP_SIZE)
478 for (
uint32 t = 0; t < WARP_SIZE; ++t)
481 const uint32 t_string_id = blockIdx.x*
BLOCKDIM + wid*WARP_SIZE + t;
482 if (t_string_id >= N_strings)
486 typename InStringSet::string_type t_string = in_set[ t_string_id ];
489 warp_sm[ wtid*WARP_SIZE + t ] = (block + wtid < t_string.size()) ? t_string[block + wtid] : 0u;
493 if (block + WARP_SIZE <= length)
495 for (
uint32 s = 0; s < WARP_SIZE; ++s)
496 out_stream[ tid + (block + s)*out_stride ] = warp_sm[ s*WARP_SIZE + wtid ];
498 else if (block < length)
500 for (
uint32 s = 0; s < WARP_SIZE; ++s)
502 if (block + s < length)
503 out_stream[ tid + (block + s)*out_stride ] = warp_sm[ s*WARP_SIZE + wtid ];
510 out_lengths[tid] =
length;
518 typename InStringSet,
519 typename OutLengthIterator>
521 void generic_to_strided_uint8_kernel(
526 OutLengthIterator out_lengths)
529 const uint32 base_id = tid*4;
530 if (base_id >= N_strings)
533 typedef strided_iterator<uint32*> strided_stream_type;
535 strided_stream_type out_string( (
uint32*)(out_stream) + tid, out_stride/4u );
537 if (base_id + 3 < N_strings)
540 typename InStringSet::string_type in_string0 = in_set[base_id + 0];
541 typename InStringSet::string_type in_string1 = in_set[base_id + 1];
542 typename InStringSet::string_type in_string2 = in_set[base_id + 2];
543 typename InStringSet::string_type in_string3 = in_set[base_id + 3];
545 const uint32 length0 = in_string0.size();
546 const uint32 length1 = in_string1.size();
547 const uint32 length2 = in_string2.size();
548 const uint32 length3 = in_string3.size();
554 for (
uint32 j = 0; j < min_length; ++j)
556 const uint32 c0 = in_string0[j];
557 const uint32 c1 = in_string1[j];
558 const uint32 c2 = in_string2[j];
559 const uint32 c3 = in_string3[j];
561 out_string[j] = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
568 for (
uint32 j = min_length; j < max_length; ++j)
570 const uint32 c0 = j < length0 ? in_string0[j] : 0xFFu;
571 const uint32 c1 = j < length1 ? in_string1[j] : 0xFFu;
572 const uint32 c2 = j < length2 ? in_string2[j] : 0xFFu;
573 const uint32 c3 = j < length3 ? in_string3[j] : 0xFFu;
575 out_string[j] = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
578 out_lengths[base_id] = length0;
579 out_lengths[base_id + 1] = length1;
580 out_lengths[base_id + 2] = length2;
581 out_lengths[base_id + 3] = length3;
583 else if (base_id + 2 < N_strings)
586 typename InStringSet::string_type in_string0 = in_set[base_id + 0];
587 typename InStringSet::string_type in_string1 = in_set[base_id + 1];
588 typename InStringSet::string_type in_string2 = in_set[base_id + 2];
590 const uint32 length0 = in_string0.size();
591 const uint32 length1 = in_string1.size();
592 const uint32 length2 = in_string2.size();
595 for (
uint32 j = 0; j < min_length; ++j)
597 const uint32 c0 = in_string0[j];
598 const uint32 c1 = in_string1[j];
599 const uint32 c2 = in_string2[j];
602 out_string[j] = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
606 for (
uint32 j = min_length; j < max_length; ++j)
608 const uint32 c0 = j < length0 ? in_string0[j] : 0xFFu;
609 const uint32 c1 = j < length1 ? in_string1[j] : 0xFFu;
610 const uint32 c2 = j < length2 ? in_string2[j] : 0xFFu;
613 out_string[j] = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
616 out_lengths[base_id] = length0;
617 out_lengths[base_id + 1] = length1;
618 out_lengths[base_id + 2] = length2;
620 else if (base_id + 1 < N_strings)
623 typename InStringSet::string_type in_string0 = in_set[base_id + 0];
624 typename InStringSet::string_type in_string1 = in_set[base_id + 1];
626 const uint32 length0 = in_string0.size();
627 const uint32 length1 = in_string1.size();
630 for (
uint32 j = 0; j < min_length; ++j)
632 const uint32 c0 = in_string0[j];
633 const uint32 c1 = in_string1[j];
637 out_string[j] = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
641 for (
uint32 j = min_length; j < max_length; ++j)
643 const uint32 c0 = j < length0 ? in_string0[j] : 0xFFu;
644 const uint32 c1 = j < length1 ? in_string1[j] : 0xFFu;
648 out_string[j] = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
651 out_lengths[base_id] = length0;
652 out_lengths[base_id + 1] = length1;
657 typename InStringSet::string_type in_string = in_set[base_id + 0];
659 const uint32 length = in_string.size();
663 const uint32 c0 = in_string[j];
668 out_string[j] = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
671 out_lengths[base_id] =
length;
680 typename InStringSet,
681 typename OutLengthIterator>
683 void strided_packed_to_strided_uint8_kernel(
688 OutLengthIterator out_lengths)
691 const uint32 id = tid * 4;
697 typedef PackedStream<
699 typename InStringSet::symbol_type,
700 InStringSet::SYMBOL_SIZE,
701 InStringSet::BIG_ENDIAN> stream_type;
703 const uint32 SYMBOLS_PER_WORD = (
sizeof(
uint32)*8u) / InStringSet::SYMBOL_SIZE;
705 const uint32 length0 = in_set.lengths()[
id + 0];
706 const uint32 length1 = in_set.lengths()[
id + 1];
707 const uint32 length2 = in_set.lengths()[
id + 2];
708 const uint32 length3 = in_set.lengths()[
id + 3];
714 const uint32 N_words = (N + SYMBOLS_PER_WORD-1) / SYMBOLS_PER_WORD;
716 const uint32* in_stream = in_set.base_stream();
717 const uint32 in_stride = in_set.stride();
719 for (uint32 i = 0; i < N_words; ++i)
722 const uint4 in_words = *
reinterpret_cast<const uint4*
>( in_stream + i * in_stride + id );
725 sm[threadIdx.x +
BLOCKDIM*0] = in_words.x;
726 sm[threadIdx.x +
BLOCKDIM*1] = in_words.y;
727 sm[threadIdx.x +
BLOCKDIM*2] = in_words.z;
728 sm[threadIdx.x +
BLOCKDIM*3] = in_words.w;
730 stream_type streams[4] =
732 stream_type( &sm[threadIdx.x +
BLOCKDIM*0] ),
733 stream_type( &sm[threadIdx.x +
BLOCKDIM*1] ),
734 stream_type( &sm[threadIdx.x +
BLOCKDIM*2] ),
735 stream_type( &sm[threadIdx.x +
BLOCKDIM*3] )
739 for (uint32 j = 0; j < SYMBOLS_PER_WORD; ++j)
743 (streams[0][j] << 0) |
744 (streams[1][j] << 8) |
745 (streams[2][j] << 16) |
746 (streams[3][j] << 24);
749 *
reinterpret_cast<uint32*
>( out_stream + (i*SYMBOLS_PER_WORD + j)*out_stride +
id ) = word;
753 out_lengths[id] = length0;
754 out_lengths[
id + 1] = length1;
755 out_lengths[
id + 2] = length2;
756 out_lengths[
id + 3] = length3;
766 typename InStringSet,
767 typename OutStreamIterator,
768 typename OutLengthIterator>
770 void generic_to_strided_packed_kernel(
771 const uint32 N_strings,
772 const uint32 out_stride,
774 OutStreamIterator out_stream,
775 OutLengthIterator out_lengths)
777 const uint32 tid = threadIdx.x + blockIdx.x *
BLOCKDIM;
778 if (tid >= N_strings)
781 typename InStringSet::string_type in_string = in_set[tid];
783 const uint32 length = in_string.size();
785 typedef strided_iterator<OutStreamIterator> strided_stream_type;
786 typedef PackedStream<strided_stream_type,uint8,SYMBOL_SIZE,BIG_ENDIAN> packed_stream_type;
788 packed_stream_type out_string( strided_stream_type( out_stream + tid, out_stride ) );
790 for (uint32 j = 0; j <
length; ++j)
791 out_string[j] = in_string[j];
794 out_lengths[tid] =
length;
804 typename InStringSet,
805 typename OutStreamIterator,
806 typename OutLengthIterator>
808 void contig_to_strided_packed_kernel(
809 const uint32 N_strings,
810 const uint32 out_stride,
812 OutStreamIterator out_stream,
813 OutLengthIterator out_lengths)
815 const uint32 tid = threadIdx.x + blockIdx.x *
BLOCKDIM;
817 const uint32 length = tid < N_strings ? in_set[ tid ].size() : 0u;
820 const uint32 wid = warp_id();
821 const uint32 wtid = warp_tid();
823 __shared__
volatile uint8 sm[
BLOCKDIM * WARP_SIZE ];
824 volatile uint8* warp_sm = sm + wid * WARP_SIZE * WARP_SIZE;
826 typedef strided_iterator<OutStreamIterator> strided_stream_type;
827 typedef PackedStream<strided_stream_type,uint8,SYMBOL_SIZE,BIG_ENDIAN> packed_stream_type;
829 packed_stream_type out_string( strided_stream_type( out_stream + tid, out_stride ) );
833 for (uint32 block = 0; __any( block < length ); block += WARP_SIZE)
835 for (uint32 t = 0; t < WARP_SIZE; ++t)
838 const uint32 t_string_id = blockIdx.x*
BLOCKDIM + wid*WARP_SIZE + t;
839 if (t_string_id >= N_strings)
843 typename InStringSet::string_type t_string = in_set[ t_string_id ];
846 warp_sm[ t*WARP_SIZE + wtid ] = (block + wtid < t_string.size()) ? t_string[block + wtid] : 0u;
860 PackedStream<uint32*,uint8,SYMBOL_SIZE,BIG_ENDIAN> packed_word( &word );
862 const uint32 SYMBOLS_PER_WORD = (8u*
sizeof(
uint32))/SYMBOL_SIZE;
863 for (uint32 s = 0; s < WARP_SIZE; s += SYMBOLS_PER_WORD)
865 if (block + s < length)
867 for (uint32 b = 0; b < SYMBOLS_PER_WORD; ++b)
868 packed_word[b] = warp_sm[ wtid*WARP_SIZE + s + b ];
870 const uint32 word_idx = (block + s) / SYMBOLS_PER_WORD;
871 out_stream[ tid + word_idx*out_stride ] = word;
879 out_lengths[tid] =
length;
889 typename InStreamIterator,
890 typename InOffsetIterator,
891 typename OutStreamIterator,
892 typename OutLengthIterator>
894 void packed_concatenated_to_strided_packed_kernel(
895 const uint32 N_strings,
896 const uint32 out_stride,
897 InStreamIterator in_stream,
898 InOffsetIterator in_offsets,
899 OutStreamIterator out_stream,
900 OutLengthIterator out_lengths)
902 const uint32 tid = threadIdx.x + blockIdx.x *
BLOCKDIM;
903 if (tid >= N_strings)
906 const uint32 offset = in_offsets[tid];
907 const uint32 length = in_offsets[tid+1] - offset;
909 transpose_packed_streams<BLOCKDIM,SYMBOL_SIZE,BIG_ENDIAN>(
916 out_lengths[tid] =
length;
926 typename InStreamIterator,
927 typename InRangeIterator,
928 typename OutStreamIterator,
929 typename OutLengthIterator>
931 void packed_sparse_to_strided_packed_kernel(
932 const uint32 N_strings,
933 const uint32 out_stride,
934 InStreamIterator in_stream,
935 InRangeIterator in_ranges,
936 OutStreamIterator out_stream,
937 OutLengthIterator out_lengths)
939 const uint32 tid = threadIdx.x + blockIdx.x *
BLOCKDIM;
940 if (tid >= N_strings)
943 const uint2 range = in_ranges[tid];
944 const uint32 offset = range.x;
945 const uint32 length = range.y - range.x;
947 transpose_packed_streams<BLOCKDIM,SYMBOL_SIZE,BIG_ENDIAN>(
954 out_lengths[tid] =
length;
957 template <
typename OutStringSet>
960 template <
typename InStringSet>
961 struct source_dispatch
964 const InStringSet& in_string_set,
965 OutStringSet& out_string_set)
975 typename OutStringIterator,
976 typename OutOffsetIterator>
977 struct copy_dispatch<
978 ConcatenatedStringSet<OutStringIterator,OutOffsetIterator>
981 typedef ConcatenatedStringSet<OutStringIterator,OutOffsetIterator> out_string_set_type;
984 template <
typename in_
string_set_type>
985 struct source_dispatch
988 const in_string_set_type& in_string_set,
989 out_string_set_type& out_string_set)
993 if (out_string_set.size() != in_string_set.size())
996 const uint32 n_blocks = (in_string_set.size()+1 + BLOCKDIM-1)/BLOCKDIM;
999 generic_string_lengths_kernel<BLOCKDIM> <<<n_blocks,BLOCKDIM>>>(
1000 in_string_set.size(),
1002 out_string_set.offsets() );
1004 cudaThreadSynchronize();
1006 #if THRUST_VERSION <= 100503
1009 out_string_set.offsets(),
1010 out_string_set.offsets() + in_string_set.size()+1,
1011 out_string_set.offsets(),
1018 out_string_set.offsets(),
1019 out_string_set.offsets() + in_string_set.size()+1,
1020 out_string_set.offsets(),
1025 generic_to_concat_kernel<BLOCKDIM> <<<n_blocks,BLOCKDIM>>>(
1026 in_string_set.size(),
1028 out_string_set.base_string(),
1029 out_string_set.offsets() );
1031 cudaThreadSynchronize();
1037 typename InStringIterator,
1038 typename InRangeIterator>
1039 struct source_dispatch< SparseStringSet<InStringIterator,InRangeIterator> >
1041 typedef SparseStringSet<InStringIterator,InRangeIterator> in_string_set_type;
1044 const in_string_set_type& in_string_set,
1045 out_string_set_type& out_string_set)
1047 const uint32 BLOCKDIM = 128u;
1049 if (out_string_set.size() != in_string_set.size())
1052 const uint32 n_blocks = (in_string_set.size()+1 + BLOCKDIM-1)/BLOCKDIM;
1055 generic_string_lengths_kernel<BLOCKDIM> <<<n_blocks,BLOCKDIM>>>(
1056 in_string_set.size(),
1058 out_string_set.offsets() );
1060 cudaThreadSynchronize();
1062 #if THRUST_VERSION <= 100503
1065 out_string_set.offsets(),
1066 out_string_set.offsets() + in_string_set.size()+1,
1067 out_string_set.offsets(),
1074 out_string_set.offsets(),
1075 out_string_set.offsets() + in_string_set.size()+1,
1076 out_string_set.offsets(),
1083 const uint32 n_blocks = (in_string_set.size()+1 + WARPS_PER_BLOCK-1)/WARPS_PER_BLOCK;
1085 contig_to_concat_kernel<BLOCKDIM> <<<n_blocks,BLOCKDIM>>>(
1086 in_string_set.size(),
1088 out_string_set.base_string(),
1089 out_string_set.offsets() );
1091 cudaThreadSynchronize();
1096 template <
typename in_
string_set_type>
1098 const in_string_set_type& in_string_set,
1099 out_string_set_type& out_string_set)
1101 return source_dispatch<in_string_set_type>::enact( in_string_set, out_string_set );
1109 typename SymbolType,
1110 uint32 SYMBOL_SIZE_T,
1112 typename OutStreamIterator,
1113 typename OutOffsetIterator>
1114 struct copy_dispatch<
1115 ConcatenatedStringSet<
1116 PackedStream<OutStreamIterator,SymbolType,SYMBOL_SIZE_T,BIG_ENDIAN_T>,
1120 typedef ConcatenatedStringSet<
1121 PackedStream<OutStreamIterator,SymbolType,SYMBOL_SIZE_T,BIG_ENDIAN_T>,
1123 out_string_set_type;
1126 template <
typename in_
string_set_type>
1127 struct source_dispatch
1130 const in_string_set_type& in_string_set,
1131 out_string_set_type& out_string_set)
1133 const uint32 BLOCKDIM = 64u;
1135 if (out_string_set.size() != in_string_set.size())
1139 const uint32 n_blocks = (in_string_set.size()+1 + BLOCKDIM-1)/BLOCKDIM;
1142 generic_string_lengths_kernel<BLOCKDIM> <<<n_blocks,BLOCKDIM>>>(
1143 in_string_set.size(),
1145 out_string_set.offsets() );
1147 cudaThreadSynchronize();
1149 #if THRUST_VERSION <= 100503
1152 out_string_set.offsets(),
1153 out_string_set.offsets() + in_string_set.size()+1,
1154 out_string_set.offsets(),
1161 out_string_set.offsets(),
1162 out_string_set.offsets() + in_string_set.size()+1,
1163 out_string_set.offsets(),
1168 thrust::device_vector<uint32> d_total_length(1);
1170 generic_vector_copy_kernel<BLOCKDIM> <<<1,1>>> (
1172 out_string_set.offsets() + in_string_set.size(),
1173 thrust::raw_pointer_cast( &d_total_length.front() ) );
1175 cudaThreadSynchronize();
1177 const uint32 SYMBOLS_PER_WORD = (8u*
sizeof(
uint32)) / SYMBOL_SIZE_T;
1178 const uint32 N_symbols = d_total_length[0];
1179 const uint32 N_words = (N_symbols + SYMBOLS_PER_WORD-1) / SYMBOLS_PER_WORD;
1181 const uint32 n_blocks = (N_words + BLOCKDIM-1)/BLOCKDIM;
1183 generic_to_packed_concat_kernel<BLOCKDIM,SYMBOL_SIZE_T,BIG_ENDIAN_T> <<<n_blocks,BLOCKDIM>>>(
1184 in_string_set.size(),
1187 out_string_set.base_string().stream(),
1188 out_string_set.offsets() );
1190 cudaThreadSynchronize();
1197 typename InStringIterator,
1198 typename InOffsetIterator>
1199 struct source_dispatch<
1200 ConcatenatedStringSet<
1205 typedef ConcatenatedStringSet<
1211 const in_string_set_type& in_string_set,
1212 out_string_set_type& out_string_set)
1214 const uint32 BLOCKDIM = 64u;
1216 if (out_string_set.size() != in_string_set.size())
1220 thrust::device_vector<uint32> d_total_length(1);
1222 generic_vector_copy_kernel<BLOCKDIM> <<<1,1>>> (
1224 in_string_set.offsets() + in_string_set.size(),
1225 thrust::raw_pointer_cast( &d_total_length.front() ) );
1227 cudaThreadSynchronize();
1229 const uint32 SYMBOLS_PER_WORD = (8u*
sizeof(
uint32)) / SYMBOL_SIZE_T;
1230 const uint32 N_symbols = d_total_length[0];
1231 const uint32 N_words = (N_symbols + SYMBOLS_PER_WORD-1) / SYMBOLS_PER_WORD;
1233 const uint32 n_blocks = (N_words + BLOCKDIM-1)/BLOCKDIM;
1235 concat_to_packed_concat_kernel<BLOCKDIM,SYMBOL_SIZE_T,BIG_ENDIAN_T> <<<n_blocks,BLOCKDIM>>>(
1236 in_string_set.size(),
1239 out_string_set.base_string().stream(),
1240 out_string_set.offsets() );
1242 cudaThreadSynchronize();
1247 template <
typename in_
string_set_type>
1249 const in_string_set_type& in_string_set,
1250 out_string_set_type& out_string_set)
1252 return source_dispatch<in_string_set_type>::enact( in_string_set, out_string_set );
1260 typename OutStreamIterator,
1261 typename OutLengthIterator>
1262 struct copy_dispatch<
1268 typedef StridedStringSet<OutStreamIterator, OutLengthIterator> out_string_set_type;
1271 template <
typename in_
string_set_type>
1272 struct source_dispatch
1275 const in_string_set_type& in_string_set,
1276 out_string_set_type& out_string_set)
1278 const uint32 BLOCKDIM = 64u;
1280 if (out_string_set.size() != in_string_set.size() ||
1281 out_string_set.stride() < out_string_set.size())
1284 const uint32 n_blocks = (in_string_set.size() + BLOCKDIM-1)/BLOCKDIM;
1286 generic_to_strided_kernel<BLOCKDIM> <<<n_blocks,BLOCKDIM>>>(
1287 in_string_set.size(),
1288 out_string_set.stride(),
1290 out_string_set.base_string(),
1291 out_string_set.lengths() );
1293 cudaThreadSynchronize();
1299 typename InStreamIterator,
1300 typename SymbolType,
1301 uint32 SYMBOL_SIZE_T,
1303 typename InOffsetIterator>
1304 struct source_dispatch<
1305 ConcatenatedStringSet<
1306 PackedStream<InStreamIterator,SymbolType,SYMBOL_SIZE_T,BIG_ENDIAN_T>,
1310 typedef ConcatenatedStringSet<
1311 PackedStream<InStreamIterator,SymbolType,SYMBOL_SIZE_T,BIG_ENDIAN_T>,
1312 InOffsetIterator> in_string_set_type;
1315 const in_string_set_type& in_string_set,
1316 const out_string_set_type& out_string_set)
1318 const uint32 BLOCKDIM = 64u;
1320 if (out_string_set.size() != in_string_set.size() ||
1321 out_string_set.stride() < out_string_set.size())
1324 const uint32 n_blocks = (in_string_set.size() + BLOCKDIM-1)/BLOCKDIM;
1327 const InStreamIterator in_stream = in_string_set.base_string().stream();
1329 packed_concat_to_strided_kernel<
1333 <<<n_blocks,BLOCKDIM>>>(
1334 in_string_set.size(),
1335 out_string_set.stride(),
1337 in_string_set.offsets(),
1338 out_string_set.base_string(),
1339 out_string_set.lengths() );
1341 cudaThreadSynchronize();
1347 typename InStreamIterator,
1348 typename SymbolType,
1349 uint32 SYMBOL_SIZE_T,
1351 typename InOffsetIterator>
1352 struct source_dispatch<
1354 PackedStream<InStreamIterator,SymbolType,SYMBOL_SIZE_T,BIG_ENDIAN_T>,
1358 typedef SparseStringSet<
1359 PackedStream<InStreamIterator,SymbolType,SYMBOL_SIZE_T,BIG_ENDIAN_T>,
1360 InOffsetIterator> in_string_set_type;
1363 const in_string_set_type& in_string_set,
1364 const out_string_set_type& out_string_set)
1366 const uint32 BLOCKDIM = 64u;
1368 if (out_string_set.size() != in_string_set.size() ||
1369 out_string_set.stride() < out_string_set.size())
1372 const uint32 n_blocks = (in_string_set.size() + BLOCKDIM-1)/BLOCKDIM;
1375 const InStreamIterator in_stream = in_string_set.base_string().stream();
1377 packed_sparse_to_strided_kernel<
1381 <<<n_blocks,BLOCKDIM>>>(
1382 in_string_set.size(),
1383 out_string_set.stride(),
1385 in_string_set.ranges(),
1386 out_string_set.base_string(),
1387 out_string_set.lengths() );
1389 cudaThreadSynchronize();
1393 template <
typename in_
string_set_type>
1395 const in_string_set_type& in_string_set,
1396 out_string_set_type& out_string_set)
1398 return source_dispatch<in_string_set_type>::enact( in_string_set, out_string_set );
1406 typename OutLengthIterator>
1407 struct copy_dispatch<
1413 typedef StridedStringSet<uint8*, OutLengthIterator> out_string_set_type;
1416 template <
typename in_
string_set_type>
1417 struct source_dispatch
1420 const in_string_set_type& in_string_set,
1421 out_string_set_type& out_string_set)
1423 const uint32 BLOCKDIM = 64u;
1425 if (out_string_set.size() != in_string_set.size() ||
1426 out_string_set.stride() < out_string_set.size())
1430 if ((out_string_set.stride() & 3) == 0)
1432 const uint32 n_quads = (in_string_set.size()+3) / 4u;
1433 const uint32 n_blocks = (n_quads + BLOCKDIM-1)/BLOCKDIM;
1435 generic_to_strided_uint8_kernel<BLOCKDIM> <<<n_blocks,BLOCKDIM>>>(
1436 in_string_set.size(),
1437 out_string_set.stride(),
1439 out_string_set.base_string(),
1440 out_string_set.lengths() );
1445 const uint32 n_blocks = (in_string_set.size() + BLOCKDIM-1)/BLOCKDIM;
1447 generic_to_strided_kernel<BLOCKDIM> <<<n_blocks,BLOCKDIM>>>(
1448 in_string_set.size(),
1449 out_string_set.stride(),
1451 out_string_set.base_string(),
1452 out_string_set.lengths() );
1455 cudaThreadSynchronize();
1461 typename InStringIterator,
1462 typename InRangeIterator>
1463 struct source_dispatch<
1469 typedef SparseStringSet<
1475 const in_string_set_type& in_string_set,
1476 out_string_set_type& out_string_set)
1478 const uint32 BLOCKDIM = 64u;
1480 if (out_string_set.size() != in_string_set.size() ||
1481 out_string_set.stride() < out_string_set.size())
1484 const uint32 n_blocks = (in_string_set.size() + BLOCKDIM-1)/BLOCKDIM;
1486 contig_to_strided_uint8_kernel<BLOCKDIM> <<<n_blocks,BLOCKDIM>>>(
1487 in_string_set.size(),
1488 out_string_set.stride(),
1490 out_string_set.base_string(),
1491 out_string_set.lengths() );
1493 cudaThreadSynchronize();
1499 typename InStringIterator,
1500 typename InOffsetIterator>
1501 struct source_dispatch<
1502 ConcatenatedStringSet<
1507 typedef ConcatenatedStringSet<
1513 const in_string_set_type& in_string_set,
1514 out_string_set_type& out_string_set)
1516 const uint32 BLOCKDIM = 64u;
1518 if (out_string_set.size() != in_string_set.size() ||
1519 out_string_set.stride() < out_string_set.size())
1522 const uint32 n_blocks = (in_string_set.size() + BLOCKDIM-1)/BLOCKDIM;
1524 contig_to_strided_uint8_kernel<BLOCKDIM> <<<n_blocks,BLOCKDIM>>>(
1525 in_string_set.size(),
1526 out_string_set.stride(),
1528 out_string_set.base_string(),
1529 out_string_set.lengths() );
1531 cudaThreadSynchronize();
1537 typename SymbolType,
1538 uint32 SYMBOL_SIZE_T,
1539 uint32 BIG_ENDIAN_T,
1540 typename InLengthIterator>
1541 struct source_dispatch<
1542 StridedPackedStringSet<
1550 typedef StridedPackedStringSet<
1559 const in_string_set_type& in_string_set,
1560 out_string_set_type& out_string_set)
1562 const uint32 BLOCKDIM = 64u;
1564 if (out_string_set.size() != in_string_set.size() ||
1565 out_string_set.stride() < out_string_set.size())
1569 if ((out_string_set.stride() & 3) == 0)
1571 const uint32 n_quads = (in_string_set.size()+3u) / 4u;
1572 const uint32 n_blocks = (n_quads + BLOCKDIM-1)/BLOCKDIM;
1574 strided_packed_to_strided_uint8_kernel<BLOCKDIM> <<<n_blocks,BLOCKDIM>>>(
1575 in_string_set.size(),
1576 out_string_set.stride(),
1578 out_string_set.base_string(),
1579 out_string_set.lengths() );
1583 const uint32 n_blocks = (in_string_set.size() + BLOCKDIM-1)/BLOCKDIM;
1585 generic_to_strided_kernel<BLOCKDIM> <<<n_blocks,BLOCKDIM>>>(
1586 in_string_set.size(),
1587 out_string_set.stride(),
1589 out_string_set.base_string(),
1590 out_string_set.lengths() );
1592 cudaThreadSynchronize();
1597 typename SymbolType,
1598 uint32 SYMBOL_SIZE_T,
1599 uint32 BIG_ENDIAN_T,
1600 typename InLengthIterator>
1601 struct source_dispatch<
1602 StridedPackedStringSet<
1610 typedef StridedPackedStringSet<
1619 const in_string_set_type& in_string_set,
1620 out_string_set_type& out_string_set)
1622 const uint32 BLOCKDIM = 64u;
1624 if (out_string_set.size() != in_string_set.size() ||
1625 out_string_set.stride() < out_string_set.size())
1629 if ((out_string_set.stride() & 3) == 0)
1631 const uint32 n_quads = (in_string_set.size()+3u) / 4u;
1632 const uint32 n_blocks = (n_quads + BLOCKDIM-1)/BLOCKDIM;
1634 strided_packed_to_strided_uint8_kernel<BLOCKDIM> <<<n_blocks,BLOCKDIM>>>(
1635 in_string_set.size(),
1636 out_string_set.stride(),
1638 out_string_set.base_string(),
1639 out_string_set.lengths() );
1643 const uint32 n_blocks = (in_string_set.size() + BLOCKDIM-1)/BLOCKDIM;
1645 generic_to_strided_kernel<BLOCKDIM> <<<n_blocks,BLOCKDIM>>>(
1646 in_string_set.size(),
1647 out_string_set.stride(),
1649 out_string_set.base_string(),
1650 out_string_set.lengths() );
1652 cudaThreadSynchronize();
1658 typename InStreamIterator,
1659 typename SymbolType,
1660 uint32 SYMBOL_SIZE_T,
1662 typename InOffsetIterator>
1663 struct source_dispatch<
1664 ConcatenatedStringSet<
1665 PackedStream<InStreamIterator,SymbolType,SYMBOL_SIZE_T,BIG_ENDIAN_T>,
1669 typedef ConcatenatedStringSet<
1670 PackedStream<InStreamIterator,SymbolType,SYMBOL_SIZE_T,BIG_ENDIAN_T>,
1671 InOffsetIterator> in_string_set_type;
1674 const in_string_set_type& in_string_set,
1675 const out_string_set_type& out_string_set)
1677 const uint32 BLOCKDIM = 64u;
1679 if (out_string_set.size() != in_string_set.size() ||
1680 out_string_set.stride() < out_string_set.size())
1683 const uint32 n_blocks = (in_string_set.size() + BLOCKDIM-1)/BLOCKDIM;
1686 const InStreamIterator in_stream = in_string_set.base_string().stream();
1688 packed_concat_to_strided_kernel<
1692 <<<n_blocks,BLOCKDIM>>>(
1693 in_string_set.size(),
1694 out_string_set.stride(),
1696 in_string_set.offsets(),
1697 out_string_set.base_string(),
1698 out_string_set.lengths() );
1700 cudaThreadSynchronize();
1706 typename InStreamIterator,
1707 typename SymbolType,
1708 uint32 SYMBOL_SIZE_T,
1710 typename InOffsetIterator>
1711 struct source_dispatch<
1713 PackedStream<InStreamIterator,SymbolType,SYMBOL_SIZE_T,BIG_ENDIAN_T>,
1717 typedef SparseStringSet<
1718 PackedStream<InStreamIterator,SymbolType,SYMBOL_SIZE_T,BIG_ENDIAN_T>,
1719 InOffsetIterator> in_string_set_type;
1722 const in_string_set_type& in_string_set,
1723 const out_string_set_type& out_string_set)
1725 const uint32 BLOCKDIM = 64u;
1727 if (out_string_set.size() != in_string_set.size() ||
1728 out_string_set.stride() < out_string_set.size())
1731 const uint32 n_blocks = (in_string_set.size() + BLOCKDIM-1)/BLOCKDIM;
1734 const InStreamIterator in_stream = in_string_set.base_string().stream();
1736 packed_sparse_to_strided_kernel<
1740 <<<n_blocks,BLOCKDIM>>>(
1741 in_string_set.size(),
1742 out_string_set.stride(),
1744 in_string_set.ranges(),
1745 out_string_set.base_string(),
1746 out_string_set.lengths() );
1748 cudaThreadSynchronize();
1752 template <
typename in_
string_set_type>
1754 const in_string_set_type& in_string_set,
1755 out_string_set_type& out_string_set)
1757 return source_dispatch<in_string_set_type>::enact( in_string_set, out_string_set );
1766 typename SymbolType,
1767 uint32 SYMBOL_SIZE_T,
1769 typename OutStreamIterator,
1770 typename OutLengthIterator>
1771 struct copy_dispatch<
1772 StridedPackedStringSet<
1773 OutStreamIterator,SymbolType,SYMBOL_SIZE_T,BIG_ENDIAN_T,
1777 typedef StridedPackedStringSet<
1778 OutStreamIterator,SymbolType,SYMBOL_SIZE_T,BIG_ENDIAN_T,
1779 OutLengthIterator> out_string_set_type;
1782 template <
typename in_
string_set_type>
1783 struct source_dispatch
1786 const in_string_set_type& in_string_set,
1787 const out_string_set_type& out_string_set)
1789 const uint32 BLOCKDIM = 64u;
1791 if (out_string_set.size() != in_string_set.size() ||
1792 out_string_set.stride() < out_string_set.size())
1795 const uint32 n_blocks = (in_string_set.size() + BLOCKDIM-1)/BLOCKDIM;
1797 generic_to_strided_packed_kernel<
1801 <<<n_blocks,BLOCKDIM>>>(
1802 in_string_set.size(),
1803 out_string_set.stride(),
1805 out_string_set.base_stream(),
1806 out_string_set.lengths() );
1808 cudaThreadSynchronize();
1814 typename InStreamIterator,
1815 typename InOffsetIterator>
1816 struct source_dispatch<
1817 ConcatenatedStringSet<
1822 typedef ConcatenatedStringSet<
1824 InOffsetIterator> in_string_set_type;
1827 const in_string_set_type& in_string_set,
1828 const out_string_set_type& out_string_set)
1830 const uint32 BLOCKDIM = 128u;
1832 if (out_string_set.size() != in_string_set.size() ||
1833 out_string_set.stride() < out_string_set.size())
1836 const uint32 n_blocks = (in_string_set.size() + BLOCKDIM-1)/BLOCKDIM;
1838 contig_to_strided_packed_kernel<
1842 <<<n_blocks,BLOCKDIM>>>(
1843 in_string_set.size(),
1844 out_string_set.stride(),
1846 out_string_set.base_stream(),
1847 out_string_set.lengths() );
1849 cudaThreadSynchronize();
1855 typename InStreamIterator,
1856 typename InOffsetIterator>
1857 struct source_dispatch<
1863 typedef SparseStringSet<
1865 InOffsetIterator> in_string_set_type;
1868 const in_string_set_type& in_string_set,
1869 const out_string_set_type& out_string_set)
1871 const uint32 BLOCKDIM = 64u;
1873 if (out_string_set.size() != in_string_set.size() ||
1874 out_string_set.stride() < out_string_set.size())
1877 const uint32 n_blocks = (in_string_set.size() + BLOCKDIM-1)/BLOCKDIM;
1879 contig_to_strided_packed_kernel<
1883 <<<n_blocks,BLOCKDIM>>>(
1884 in_string_set.size(),
1885 out_string_set.stride(),
1887 out_string_set.base_stream(),
1888 out_string_set.lengths() );
1890 cudaThreadSynchronize();
1896 typename InStreamIterator,
1897 typename InOffsetIterator>
1898 struct source_dispatch<
1899 ConcatenatedStringSet<
1900 PackedStream<InStreamIterator,SymbolType,SYMBOL_SIZE_T,BIG_ENDIAN_T>,
1904 typedef ConcatenatedStringSet<
1905 PackedStream<InStreamIterator,SymbolType,SYMBOL_SIZE_T,BIG_ENDIAN_T>,
1906 InOffsetIterator> in_string_set_type;
1909 const in_string_set_type& in_string_set,
1910 const out_string_set_type& out_string_set)
1912 const uint32 BLOCKDIM = 64;
1914 if (out_string_set.size() != in_string_set.size() ||
1915 out_string_set.stride() < out_string_set.size())
1918 const uint32 n_blocks = (in_string_set.size() + BLOCKDIM-1)/BLOCKDIM;
1921 const InStreamIterator in_stream = in_string_set.base_string().stream();
1923 packed_concatenated_to_strided_packed_kernel<
1927 <<<n_blocks,BLOCKDIM>>>(
1928 in_string_set.size(),
1929 out_string_set.stride(),
1931 in_string_set.offsets(),
1932 out_string_set.base_stream(),
1933 out_string_set.lengths() );
1935 cudaThreadSynchronize();
1941 typename InStreamIterator,
1942 typename InOffsetIterator>
1943 struct source_dispatch<
1945 PackedStream<InStreamIterator,SymbolType,SYMBOL_SIZE_T,BIG_ENDIAN_T>,
1949 typedef SparseStringSet<
1950 PackedStream<InStreamIterator,SymbolType,SYMBOL_SIZE_T,BIG_ENDIAN_T>,
1951 InOffsetIterator> in_string_set_type;
1954 const in_string_set_type& in_string_set,
1955 const out_string_set_type& out_string_set)
1957 const uint32 BLOCKDIM = 64u;
1959 if (out_string_set.size() != in_string_set.size() ||
1960 out_string_set.stride() < out_string_set.size())
1963 const uint32 n_blocks = (in_string_set.size() + BLOCKDIM-1)/BLOCKDIM;
1966 const InStreamIterator in_stream = in_string_set.base_string().stream();
1968 packed_sparse_to_strided_packed_kernel<
1972 <<<n_blocks,BLOCKDIM>>>(
1973 in_string_set.size(),
1974 out_string_set.stride(),
1976 in_string_set.ranges(),
1977 out_string_set.base_stream(),
1978 out_string_set.lengths() );
1980 cudaThreadSynchronize();
1984 template <
typename in_
string_set_type>
1986 const in_string_set_type& in_string_set,
1987 out_string_set_type& out_string_set)
1989 return source_dispatch<in_string_set_type>::enact( in_string_set, out_string_set );
1999 typename InStringSet,
2000 typename StringIterator,
2001 typename OffsetIterator>
2003 const InStringSet& in_string_set,
2004 ConcatenatedStringSet<StringIterator,OffsetIterator>& out_string_set)
2006 typedef ConcatenatedStringSet<StringIterator,OffsetIterator> OutStringSet;
2017 typename InStringSet,
2018 typename StringIterator,
2019 typename LengthIterator>
2021 const InStringSet& in_string_set,
2022 StridedStringSet<StringIterator,LengthIterator>& out_string_set)
2024 typedef StridedStringSet<StringIterator,LengthIterator> OutStringSet;
2035 typename InStringSet,
2036 typename StreamIterator,
2037 typename SymbolType,
2038 uint32 SYMBOL_SIZE_T,
2040 typename LengthIterator>
2042 const InStringSet& in_string_set,
2043 StridedPackedStringSet<StreamIterator,SymbolType,SYMBOL_SIZE_T,BIG_ENDIAN_T,LengthIterator>& out_string_set)
2045 typedef StridedPackedStringSet<StreamIterator,SymbolType,SYMBOL_SIZE_T,BIG_ENDIAN_T,LengthIterator> OutStringSet;
2052 #endif // defined(__CUDACC__)
2054 template <
typename out_
string_set_type>
2058 template <
typename in_
string_set_type>
2062 const in_string_set_type& in_string_set,
2063 const out_string_set_type& out_string_set)
2065 if (out_string_set.size() != in_string_set.size() ||
2066 out_string_set.stride() < out_string_set.size())
2069 const uint32 n_strings = in_string_set.size();
2073 for (uint32 i_block = 0; i_block < n_strings; i_block +=
BLOCK_SIZE)
2075 const uint32 i_block_end =
std::min( i_block + BLOCK_SIZE, n_strings );
2078 for (uint32 i = i_block; i < i_block_end; ++i)
2081 for (uint32 j_block = 0; j_block < max_len; j_block +=
BLOCK_SIZE)
2083 for (uint32 i = i_block; i < i_block_end; ++i)
2085 typename in_string_set_type::string_type in_string = in_string_set[i];
2086 typename out_string_set_type::string_type out_string = out_string_set[i];
2088 const uint32 m = in_string.length();
2089 const uint32 j_block_end =
std::min( j_block + BLOCK_SIZE, m );
2091 for (uint32 j = j_block; j < j_block_end; ++j)
2092 out_string[j] = in_string[j];
2097 for (uint32 i = 0; i < n_strings; ++i)
2099 typename in_string_set_type::string_type in_string = in_string_set[i];
2100 typename out_string_set_type::string_type out_string = out_string_set[i];
2102 const uint32 m = in_string.length();
2103 for (uint32 j = 0; j < m; ++j)
2104 out_string[j] = in_string[j];
2110 template <
typename in_
string_set_type>
2112 const in_string_set_type& in_string_set,
2113 out_string_set_type& out_string_set)
2125 typename InStringSet,
2126 typename StringIterator,
2127 typename OffsetIterator>
2129 const InStringSet& in_string_set,
2143 typename InStringSet,
2144 typename StringIterator,
2145 typename LengthIterator>
2147 const InStringSet& in_string_set,
2161 typename InStringSet,
2162 typename StreamIterator,
2163 typename SymbolType,
2164 uint32 SYMBOL_SIZE_T,
2166 typename LengthIterator>
2168 const InStringSet& in_string_set,