36 template <
typename word_type>
59 template <
typename input_storage,
typename output_storage, u
int32 SYMBOL_SIZE,
typename index_type>
66 typedef typename std::iterator_traits<input_storage>::value_type word_type;
68 const uint32 WORD_SIZE = 8u *
sizeof(word_type);
69 const uint32 SYMBOLS_PER_WORD = WORD_SIZE / SYMBOL_SIZE;
71 input_storage in_words = in.
stream();
72 output_storage out_words = out.
stream();
74 const index_type k_start = in.
index();
75 const index_type n_start = out.
index();
77 index_type k = k_start;
78 index_type n = n_start;
80 const index_type k_end = k + len;
82 const index_type out_word_begin =
util::divide_ri( k, SYMBOLS_PER_WORD );
83 const index_type out_word_end =
util::divide_rz( k_end, SYMBOLS_PER_WORD );
86 if (out_word_end <= out_word_begin)
89 out[k++ - k_start] = in[n++ - n_start];
95 while (k < out_word_begin*SYMBOLS_PER_WORD)
96 out[k++ - k_start] = in[n++ - n_start];
98 for (index_type out_word = out_word_begin; out_word < out_word_end; ++out_word)
101 const uint32 n_word = n / SYMBOLS_PER_WORD;
102 const uint32 n_mod = n & (SYMBOLS_PER_WORD-1);
103 const uint32 n_syms = SYMBOLS_PER_WORD - n_mod;
106 word_type in_word = in_words[n_word] << (n_mod * SYMBOL_SIZE);
108 if (n_syms < SYMBOLS_PER_WORD)
111 in_word |= (in_words[n_word+1] >> (n_syms * SYMBOL_SIZE));
115 out_words[ out_word ] = in_word;
118 k += SYMBOLS_PER_WORD;
119 n += SYMBOLS_PER_WORD;
124 out[k++ - k_start] = in[n++ - n_start];
128 template <u
int32 SYMBOL_COUNT>
133 if ((k & (occ_intv-1)) == 0)
135 const uint32 block_idx = k >> occ_intv_log;
136 for (
uint32 q = 0; q < SYMBOL_COUNT; ++q)
137 occ[ SYMBOL_COUNT * block_idx + q ] = partials[q];
141 template <
typename input_storage,
typename output_storage, u
int32 SYMBOL_SIZE,
typename index_type>
147 const uint32 occ_intv_log,
151 const uint32* count_table)
153 typedef typename std::iterator_traits<input_storage>::value_type word_type;
155 const uint32 WORD_SIZE = 8u *
sizeof(word_type);
156 const uint32 SYMBOLS_PER_WORD = WORD_SIZE / SYMBOL_SIZE;
157 const uint32 SYMBOL_COUNT = 1u << SYMBOL_SIZE;
159 input_storage in_words = in.
stream();
160 output_storage out_words = out.
stream();
162 const index_type k_start = out.
index();
163 const index_type n_start = in.
index();
165 index_type k = k_start;
166 index_type n = n_start;
168 const index_type k_end = k + len;
170 const index_type out_word_begin =
util::divide_ri( k, SYMBOLS_PER_WORD );
171 const index_type out_word_end =
util::divide_rz( k_end, SYMBOLS_PER_WORD );
174 if (out_word_end <= out_word_begin)
179 save_occurrences<SYMBOL_COUNT>( k, occ_intv_log, occ_intv, partials, occ );
181 const uint8 c = in[n++ - n_start];
183 out[k++ - k_start] = c;
193 while (k < out_word_begin*SYMBOLS_PER_WORD)
196 save_occurrences<SYMBOL_COUNT>( k, occ_intv_log, occ_intv, partials, occ );
198 const uint8 c = in[n++ - n_start];
200 out[k++ - k_start] = c;
205 for (index_type out_word = out_word_begin; out_word < out_word_end; ++out_word)
208 const uint32 n_word = n / SYMBOLS_PER_WORD;
209 const uint32 n_mod = n & (SYMBOLS_PER_WORD-1);
210 const uint32 n_syms = SYMBOLS_PER_WORD - n_mod;
213 word_type in_word = in_words[n_word] << (n_mod * SYMBOL_SIZE);
215 if (n_syms < SYMBOLS_PER_WORD)
218 in_word |= (in_words[n_word+1] >> (n_syms * SYMBOL_SIZE));
222 save_occurrences<SYMBOL_COUNT>( out_word * SYMBOLS_PER_WORD, occ_intv_log, occ_intv, partials, occ );
224 if (SYMBOL_SIZE == 2)
228 partials[0] += (cnts >> 0) & 0xFF;
229 partials[1] += (cnts >> 8) & 0xFF;
230 partials[2] += (cnts >> 16) & 0xFF;
231 partials[3] += (cnts >> 24) & 0xFF;
236 for (
uint32 i = 0; i < SYMBOLS_PER_WORD; ++i)
238 const uint8 c = (in_word >> (WORD_SIZE - SYMBOL_SIZE - i * SYMBOL_SIZE)) & (SYMBOL_COUNT-1);
245 out_words[ out_word ] = in_word;
248 k += SYMBOLS_PER_WORD;
249 n += SYMBOLS_PER_WORD;
256 save_occurrences<SYMBOL_COUNT>( k, occ_intv_log, occ_intv, partials, occ );
258 const uint8 c = in[n++ - n_start];
260 out[k++ - k_start] = c;
268 template <u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T>
271 const uint32 segment_size,
273 m_page_size( page_size / sizeof(
word_type) ),
274 m_segment_size( segment_size / sizeof(
word_type) ),
275 m_occ_intv( occ_intv ),
276 m_occ_intv_w( occ_intv / SYMBOLS_PER_WORD ),
277 m_occ_intv_log( nvbio::
log2( occ_intv ) ),
288 template <u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T>
291 for (
uint32 i = 0; i < m_segments.size(); ++i)
292 free( m_segments[i] );
297 template <u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T>
301 (m_occ_intv / SYMBOLS_PER_WORD) / SYMBOL_COUNT;
303 const uint32 n_pages = m_segment_size / m_page_size;
304 const uint32 ext_page_size = m_page_size + m_page_size / occ_freq;
305 const uint32 ext_segment_size = n_pages * ext_page_size;
311 log_error(stderr,
"PagedText: failed allocating segment\n");
318 if (m_pool.size() < m_page_count + n_pages)
319 m_pool.resize( m_page_count + n_pages );
321 m_segments.push_back( segment );
323 for (
uint32 i = 0; i < n_pages; ++i)
324 m_pool[ m_pool_size++ ] = segment + ext_page_size * (n_pages - i - 1u);
326 m_page_count += n_pages;
332 template <u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T>
336 omp_set_lock( &m_lock );
338 if (m_pool_size == 0)
340 omp_unset_lock( &m_lock );
342 log_error(stderr,
"PagedText: exhausted page pool\n");
347 word_type* page = m_pool[ --m_pool_size ];
348 assert( page != NULL );
350 omp_unset_lock( &m_lock );
356 template <u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T>
359 assert( page != NULL );
360 omp_set_lock( &m_lock );
362 if (m_pool_size >= m_page_count)
364 log_error(stderr,
"exceeded pool size %u - released more pages than have been allocated\n", m_page_count);
368 m_pool[ m_pool_size++ ] = page;
370 omp_unset_lock( &m_lock );
375 template <u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T>
379 const uint32 page_idx = find_page( i );
380 const word_type* page = get_page( page_idx );
382 const uint32 local_i =
uint32( i - m_offsets[ page_idx ] );
383 assert( local_i < m_page_size * SYMBOLS_PER_WORD );
386 return packed_page[ local_i ];
391 template <u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T>
398 return symbol_frequency(c);
401 const uint32 page_idx = find_page( i );
403 return rank( page_idx, i, c );
408 template <u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T>
415 return symbol_frequency(c);
418 const word_type* page = get_page( page_idx );
421 const uint32 local_i =
uint32( i - m_offsets[ page_idx ] );
422 assert( local_i < m_page_size * SYMBOLS_PER_WORD );
425 const uint32 block_idx = local_i >> m_occ_intv_log;
426 const uint32 block_offset = local_i & (m_occ_intv-1);
430 m_counters[ SYMBOL_COUNT * page_idx + c ] +
431 occ[ SYMBOL_COUNT * block_idx + c ];
434 const uint32 word_idx = block_offset / SYMBOLS_PER_WORD;
437 const uint32 word_begin = block_idx*m_occ_intv_w;
439 return out +
popc_2bit( page + word_begin, word_idx, word_mod, c );
444 template <u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T>
448 m_pool.reserve( n_pages );
449 while (m_page_count < n_pages)
452 m_pages.reserve( n_pages );
453 m_new_pages.reserve( n_pages );
454 m_counters.reserve( (n_pages+1) * SYMBOL_COUNT );
455 m_new_counters.reserve( (n_pages+1) * SYMBOL_COUNT );
456 m_offsets.reserve( n_pages + 1 );
457 m_new_offsets.reserve( n_pages + 1 );
462 template <u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T>
466 m_pool.reserve( n_pages );
467 while (m_pool_size < n_pages)
473 template <u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T>
481 template <u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T>
485 (m_occ_intv / SYMBOLS_PER_WORD) / SYMBOL_COUNT;
487 const uint32 ext_page_size = m_page_size + m_page_size / occ_freq;
490 return n_pages * ext_page_size *
sizeof(
word_type);
495 template <u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T>
498 const uint32 PAGE_SYMBOLS = m_page_size * SYMBOLS_PER_WORD;
503 reserve_pages( n_pages );
506 m_pages.resize( n_pages );
507 for (
uint32 i = 0; i < n_pages; ++i)
508 m_pages[i] = alloc_page();
511 m_offsets.resize( n_pages + 1 );
513 #pragma omp parallel for
515 m_offsets[i] =
uint64(i) * PAGE_SYMBOLS;
517 m_offsets[ n_pages ] = n;
520 m_counters.resize( (n_pages+1) * SYMBOL_COUNT,
uint64(0) );
524 #pragma omp parallel for
525 for (int32 i = 0; i <
int32(n_pages); ++i)
539 uint64* cnts = &m_counters[ i * SYMBOL_COUNT ];
545 if ((j & (m_occ_intv-1)) == 0)
547 for (uint32 q = 0; q < SYMBOL_COUNT; ++q)
553 const uint8 cc = c[ begin + j ] & (SYMBOL_COUNT-1);
561 for (
uint32 j = 0; j < SYMBOL_COUNT; ++j)
567 thrust::plus<uint64>(),
573 const uint64* cnts = symbol_frequencies();
575 for (
uint32 j = 0; j < SYMBOL_COUNT; ++j)
580 log_error(stderr,
"mismatching occurrence counters: expected %llu symbols, got %llu\n", n, n_occ );
585 build_buckets( m_offsets.back(), (
uint32)m_offsets.size(), &m_offsets[0], BUCKET_SIZE, m_buckets );
589 template <u
int32 SYMBOL_SIZE,
bool BIG_ENDIAN>
629 const uint32 cnt = std::accumulate( partials, partials + SYMBOL_COUNT, 0u );
633 log_error(stderr,
"alloc_page(%u) : expected %u occurrences, got %u\n", *out_leaf, k_out, cnt);
656 void operator() (
const uint32 in_leaf)
const
660 uint32 out_leaf = leaf_ids[ in_leaf ];
661 const uint32 out_leaf_begin = leaf_ids[ in_leaf ];
662 const uint32 out_leaf_end = leaf_ids[ in_leaf+1u ];
663 const uint64 in_leaf_begin =
text->m_offsets[ in_leaf ];
664 const uint64 in_leaf_end =
text->m_offsets[ in_leaf + 1u ];
665 const uint32 in_leaf_size = in_leaf_end - in_leaf_begin;
668 const uint32 g_end = in_leaf < in_leaves-1 ?
671 if (g_begin == g_end)
680 text->m_new_offsets[ out_leaf ] = in_leaf_begin + g_begin;
683 text->m_new_pages[ out_leaf ] = in_page;
686 for (
uint32 q = 0; q < SYMBOL_COUNT; ++q)
688 text->m_new_counters[ SYMBOL_COUNT * out_leaf + q ] =
689 text->m_counters[ SYMBOL_COUNT * (in_leaf+1u) + q ] -
690 text->m_counters[ SYMBOL_COUNT * (in_leaf+0u) + q ];
703 const uint32 elements_per_page =
util::divide_ri( in_leaf_size + g_end - g_begin, out_leaf_end - out_leaf_begin );
706 text->m_new_offsets[ out_leaf ] = in_leaf_begin + g_begin;
709 text->m_new_pages[ out_leaf ] = out_page;
714 uint32 partials[SYMBOL_COUNT];
715 for (
uint32 q = 0; q < SYMBOL_COUNT; ++q)
718 for (
uint32 j = g_begin; j < g_end; ++j)
722 const uint8 cc = c[j] & (SYMBOL_COUNT-1);
728 assert( m <= LEAF_SYMBOLS );
733 copy( r, in_stream + k_in, out_stream + k_out,
text->m_occ_intv_log,
text->m_occ_intv, partials, occ,
text->m_count_table );
749 copy( m - r, in_stream + k_in + r, out_stream,
text->m_occ_intv_log,
text->m_occ_intv, partials, occ,
text->m_count_table );
757 if (k_out < elements_per_page)
760 save_occurrences<SYMBOL_COUNT>( k_out,
text->m_occ_intv_log,
text->m_occ_intv, partials, occ );
762 out_stream[ k_out++ ] = cc;
777 save_occurrences<SYMBOL_COUNT>( k_out,
text->m_occ_intv_log,
text->m_occ_intv, partials, occ );
779 out_stream[ k_out++ ] = cc;
785 if (in_leaf_size > k_in)
787 const uint32 m = in_leaf_size - k_in;
788 assert( m <= LEAF_SYMBOLS );
793 copy( r, in_stream + k_in, out_stream + k_out,
text->m_occ_intv_log,
text->m_occ_intv, partials, occ,
text->m_count_table );
809 copy( m - r, in_stream + k_in + r, out_stream,
text->m_occ_intv_log,
text->m_occ_intv, partials, occ,
text->m_count_table );
814 for (
uint32 q = 0; q < SYMBOL_COUNT; ++q)
815 text->m_new_counters[ SYMBOL_COUNT * out_leaf + q ] = partials[q];
818 text->release_page( in_page );
820 if (out_leaf+1 != out_leaf_end)
822 log_error(stderr,
"mismatching number of output leaves: leaf[%u/%u] : expected %u, got %u\n",
824 out_leaf_end - out_leaf_begin,
825 out_leaf - out_leaf_begin);
826 log_error(stderr,
" in-size : %u\n", in_leaf_size);
845 template <u
int32 SYMBOL_SIZE,
bool BIG_ENDIAN>
848 static const uint32 SYMBOL_COUNT = 1u << SYMBOL_SIZE;
861 out_leaves ( _out_leaves ),
862 in_leaves ( _in_leaves ),
866 void operator() (
const uint32 out_leaf)
const
870 const uint64 out_leaf_begin =
uint64( out_leaf ) * LEAF_SYMBOLS;
872 const uint32 out_leaf_size =
uint32( out_leaf_end - out_leaf_begin );
880 text->m_new_offsets[ out_leaf ] = out_leaf_begin;
883 text->m_new_pages[ out_leaf ] = out_page;
885 uint32 partials[SYMBOL_COUNT];
886 for (
uint32 q = 0; q < SYMBOL_COUNT; ++q)
893 for (; k_out < out_leaf_size && in_leaf < in_leaves; ++in_leaf)
895 const uint64 in_leaf_begin =
text->m_offsets[ in_leaf ];
896 const uint64 in_leaf_end =
text->m_offsets[ in_leaf+1 ];
897 const uint32 in_leaf_size =
uint32( in_leaf_end - in_leaf_begin );
901 assert( in_page != NULL );
903 const uint32 k_in = in_leaf_begin >= out_leaf_begin ? 0u :
uint32( out_leaf_begin - in_leaf_begin );
906 copy( r, in_stream + k_in, out_stream + k_out,
text->m_occ_intv_log,
text->m_occ_intv, partials, occ,
text->m_count_table );
912 for (
uint32 q = 0; q < SYMBOL_COUNT; ++q)
913 text->m_new_counters[ SYMBOL_COUNT * out_leaf + q ] = partials[q];
915 const uint32 cnt = std::accumulate( partials, partials + SYMBOL_COUNT, 0u );
918 log_error(stderr,
"merge_pages(%u) : expected %u occurrences, got %u\n", out_leaf, k_out, cnt);
932 template <u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T>
935 const uint32 LEAF_SYMBOLS = m_page_size * SYMBOLS_PER_WORD;
937 const uint32 n_leaves = m_offsets.
size() - 1u;
952 thrust::adjacent_difference(
954 m_offsets.begin() + n_leaves + 1u,
955 leaf_sizes.begin() );
958 const uint64 old_size = m_offsets.back();
959 m_offsets.back() =
uint64(-1);
968 nvbio::upper_bound<host_tag>(
983 log_debug(stderr,
" touched leaves %u, (%.2f%% - %.1fMB)\n", n_touched, 100.0f *
float(n_touched) /
float(n_leaves),
float(n_touched)*(m_page_size*
sizeof(
word_type)) /
float(1024*1024));
986 nvbio::transform<host_tag>(
988 thrust::make_permutation_iterator( leaf_sizes.begin(), ins_leaves.begin() ),
990 thrust::make_permutation_iterator( leaf_sizes.begin(), ins_leaves.begin() ),
991 thrust::plus<uint32>() );
999 nvbio::lower_bound<host_tag>(
1007 g_leaves[ n_leaves ] = n;
1010 thrust::adjacent_difference(
1012 g_leaves.begin() + n_leaves + 1u,
1013 ins_counts.begin() );
1016 nvbio::transform<host_tag>(
1021 thrust::plus<uint32>() );
1045 m_offsets.back() = old_size;
1056 new_leaf_ids.begin(),
1057 thrust::plus<uint32>(),
1060 const uint32 out_leaves = new_leaf_ids[ n_leaves ];
1063 m_new_pages.resize( out_leaves );
1064 m_new_offsets.resize( out_leaves+1 );
1065 m_new_counters.resize( (out_leaves+1) * SYMBOL_COUNT,
uint64(0) );
1072 const uint32 BATCH_SIZE = 4*1024;
1074 reserve_pages( out_leaves +
nvbio::min( n_leaves, BATCH_SIZE ) );
1079 const float utilization = (float( size() + n ) / float(LEAF_SYMBOLS)) /
float( out_leaves );
1081 log_debug(stderr,
" copy pages %u -> %u (utilization : %.1f%%)\n",
1082 n_leaves, out_leaves,
1083 100.0f * utilization );
1095 for (
uint32 batch_begin = 0; batch_begin < n_leaves; batch_begin += BATCH_SIZE)
1102 nvbio::for_each<host_tag>(
1103 batch_end - batch_begin,
1104 thrust::make_counting_iterator<uint32>( batch_begin ),
1118 m_new_offsets[ out_leaves ] = m_offsets[ n_leaves ] + n;
1121 m_pages.swap( m_new_pages );
1122 m_offsets.swap( m_new_offsets );
1123 m_counters.swap( m_new_counters );
1126 for (
uint32 j = 0; j < SYMBOL_COUNT; ++j)
1132 thrust::plus<uint64>(),
1138 const uint64* cnts = symbol_frequencies();
1140 for (
uint32 j = 0; j < SYMBOL_COUNT; ++j)
1143 if (n_occ != m_offsets[ out_leaves ])
1145 log_error(stderr,
"mismatching occurrence counters: expected %llu symbols, got %llu\n", m_offsets[ out_leaves ], n_occ );
1153 build_buckets( m_offsets.back(), (
uint32)m_offsets.size(), &m_offsets[0], BUCKET_SIZE, m_buckets );
1158 template <u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T>
1161 const uint32 LEAF_SYMBOLS = m_page_size * SYMBOLS_PER_WORD;
1163 const uint32 in_leaves = page_count();
1164 const uint64 n_symbols = size();
1167 log_debug(stderr,
" defrag %u -> %u\n", in_leaves, out_leaves );
1172 m_new_pages.resize( out_leaves );
1173 m_new_offsets.resize( out_leaves+1 );
1174 m_new_counters.resize( (out_leaves+1) * SYMBOL_COUNT,
uint64(0) );
1176 const uint32 BATCH_SIZE = 4*1024;
1178 uint32 in_leaf_begin = 0;
1187 for (
uint32 batch_begin = 0; batch_begin < out_leaves; batch_begin += BATCH_SIZE)
1189 const uint32 batch_end =
nvbio::min( out_leaves, batch_begin + BATCH_SIZE );
1192 reserve_free_pages( batch_end - batch_begin );
1195 nvbio::for_each<host_tag>(
1196 batch_end - batch_begin,
1197 thrust::make_counting_iterator<uint32>( batch_begin ),
1203 for (
uint32 i = in_leaf_begin; i < in_leaf_end; ++i)
1205 release_page( m_pages[i] );
1209 in_leaf_begin = in_leaf_end;
1213 for (
uint32 i = in_leaf_begin; i < in_leaves; ++i)
1215 release_page( m_pages[i] );
1220 m_new_offsets[ out_leaves ] = n_symbols;
1223 m_pages.swap( m_new_pages );
1224 m_offsets.swap( m_new_offsets );
1225 m_counters.swap( m_new_counters );
1227 for (
uint32 i = 0; i < page_count()-1; ++i)
1229 const uint32 cnt = std::accumulate(
1230 &m_counters[i*SYMBOL_COUNT],
1231 &m_counters[i*SYMBOL_COUNT] + SYMBOL_COUNT, 0u );
1233 if (cnt != LEAF_SYMBOLS)
1234 log_error(stderr,
"mismatching occurrence counters: at page[%u], expected %llu symbols, got %llu\n", i, LEAF_SYMBOLS, cnt );
1238 for (
uint32 j = 0; j < SYMBOL_COUNT; ++j)
1244 thrust::plus<uint64>(),
1250 const uint64* cnts = symbol_frequencies();
1252 for (
uint32 j = 0; j < SYMBOL_COUNT; ++j)
1255 if (n_occ != n_symbols)
1257 log_error(stderr,
"mismatching occurrence counters: expected %llu symbols, got %llu\n", n_symbols, n_occ );
1265 template <u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T>
1268 return &m_counters[ page_count() * SYMBOL_COUNT ];
1273 template <u
int32 SYMBOL_SIZE_T,
bool BIG_ENDIAN_T>
1276 const uint32 b = i >> LOG_BUCKET_SIZE;
1277 const uint32 lo = m_buckets[b];
1278 const uint32 hi = m_buckets[b+1];