39 const uint32 n_words = (n+1+31) >> 5;
40 const uint32 n_blocks = (n+1+63) >> 6;
43 for (
uint32 i = 0; i < n_words; ++i)
50 for (
uint32 i = 0; i <= n; ++i)
57 if ((sa[i] & (K-1)) == 0)
68 for (
uint32 i = 0; i <= n; ++i)
70 if ((sa[i] & (K-1)) == 0)
79 template <
typename FMIndexType>
81 const FMIndexType& fmi,
84 const uint32 n = fmi.length();
88 const uint32 n_words = (n+1+31) >> 5;
89 const uint32 n_blocks = (n+1+63) >> 6;
92 for (
uint32 i = 0; i < n_words; ++i)
103 for (
uint32 i = 0; i < n; ++i)
105 if ((sa & (K-1)) == 0)
108 m_bitmask[ isa >> 5 ] |= (1u << (isa & 31u));
115 if ((sa & (K-1)) == 0)
118 m_bitmask[ isa >> 5 ] |= (1u << (isa & 31u));
124 for (
uint32 i = 1; i < n_blocks; ++i)
138 for (
uint32 i = 0; i < n; ++i)
140 if ((sa & (K-1)) == 0)
141 m_ssa[ index( isa ) ] = sa;
147 if ((sa & (K-1)) == 0)
148 m_ssa[ index( isa ) ] = sa;
156 inline uint32 SSA_value_multiple::index(
const uint32 i)
const
159 const uint32 word = i >> 5;
167 const uint32 block = i >> 6;
171 const uint32 shifted_mask = word_mask & ((1u << (i&31u)) - 1u);
172 pos +=
popc( shifted_mask );
175 for (
uint32 j = block*2; j < word; ++j)
185 m_stored( ssa.m_stored )
219 template <
typename SSAIterator,
typename BitmaskIterator,
typename BlockIterator>
223 const uint32 word = i >> 5;
224 const uint32 word_mask = m_bitmask[ word ];
225 const uint32 is_there = word_mask & (1u << (i&31u));
234 const uint32 block = i >> 6;
235 uint32 pos = m_blocks[ block ];
238 const uint32 shifted_mask = word_mask & ((1u << (i&31u)) - 1u);
239 pos +=
popc( shifted_mask );
242 for (
uint32 j = block*2; j < word; ++j)
243 pos +=
popc( m_bitmask[j] );
251 template <
typename SSAIterator,
typename BitmaskIterator,
typename BlockIterator>
255 const uint32 word = i >> 5;
256 const uint32 word_mask = m_bitmask[ word ];
257 const uint32 is_there = word_mask & (1u << (i&31u));
263 template <u
int32 K,
typename index_type>
266 const index_type* sa)
268 const uint32 n_items = (n+1+
K-1) /
K;
271 m_ssa.resize( n_items );
274 for (
uint32 i = 0; i < n_items; ++i)
282 template <u
int32 K,
typename index_type>
283 template <
typename FMIndexType>
285 const FMIndexType& fmi)
287 const uint32 n = fmi.length();
288 const uint32 n_items = (n+1+
K-1) /
K;
291 m_ssa.resize( n_items );
294 index_type isa = 0, sa = n;
296 for (index_type i = 0; i < n; ++i)
298 if ((isa & (
K-1)) == 0)
305 if ((isa & (
K-1)) == 0)
308 m_ssa[0] = index_type(-1);
314 template <u
int32 K,
typename index_type>
319 m_ssa.resize( ssa.
m_ssa.size() );
321 const uint32 n_items = (m_n+1+
K-1) /
K;
323 cudaMemcpy( &m_ssa[0], thrust::raw_pointer_cast(&ssa.
m_ssa[0]),
sizeof(index_type)*n_items, cudaMemcpyDeviceToHost );
329 template <u
int32 K,
typename index_type>
334 m_ssa.resize( ssa.
m_ssa.size() );
336 const uint32 n_items = (m_n+1+
K-1) /
K;
338 cudaMemcpy( &m_ssa[0], thrust::raw_pointer_cast(&ssa.
m_ssa[0]),
sizeof(index_type)*n_items, cudaMemcpyDeviceToHost );
344 template <u
int32 K,
typename index_type>
350 m_ssa.resize( n_items );
352 cudaMemcpy( thrust::raw_pointer_cast(&
m_ssa[0]), &ssa.
m_ssa[0],
sizeof(index_type)*n_items, cudaMemcpyHostToDevice );
357 template <u
int32 K,
typename index_type,
typename FMIndexType>
358 __global__
void SSA_index_multiple_setup_kernel(
361 const FMIndexType fmi,
365 const uint32 grid_size = blockDim.x * gridDim.x;
368 for (
uint32 idx = range_begin + thread_id; idx < range_end; idx += grid_size)
374 index_type isa = idx *
K;
384 while ((isa & (K-1)) != 0);
386 ssa[ isa/
K ] = steps;
394 template <u
int32 K,
typename index_type>
395 template <
typename FMIndexType>
397 const FMIndexType& fmi)
405 template <u
int32 K,
typename index_type>
406 template <
typename FMIndexType>
407 void SSA_index_multiple_device<K,index_type>::init(
408 const FMIndexType& fmi)
411 const uint32 n_items = (m_n+1+K-1) / K;
413 m_ssa.resize( n_items );
416 cudaMalloc( &d_link,
sizeof(index_type)*n_items );
426 SSA_index_multiple_setup_kernel<K,index_type,FMIndexType>, BLOCK_SIZE, 0);
428 const uint32 BATCH_SIZE = 128*1024;
429 for (
uint32 range_begin = 0; range_begin < n_items; range_begin += BATCH_SIZE)
432 const uint32 n_blocks =
std::min( max_blocks, (range_end - range_begin + BLOCK_SIZE-1) / BLOCK_SIZE );
434 SSA_index_multiple_setup_kernel<K> <<<n_blocks,BLOCK_SIZE>>>(
438 thrust::raw_pointer_cast(&m_ssa[0]),
441 cudaThreadSynchronize();
446 std::vector<index_type> h_link( n_items );
447 std::vector<index_type> h_ssa( n_items );
449 cudaMemcpy( &h_ssa[0], thrust::raw_pointer_cast(&m_ssa[0]),
sizeof(index_type)*n_items, cudaMemcpyDeviceToHost );
450 cudaMemcpy( &h_link[0], d_link,
sizeof(index_type)*n_items, cudaMemcpyDeviceToHost );
460 typedef typename signed_type<index_type>::type sindex_type;
462 index_type isa_div_k = 0;
463 sindex_type sa = m_n;
466 if (isa_div_k >= n_items)
467 throw std::runtime_error(
"SSA_index_multiple_device: index out of bounds\n");
469 isa_div_k = h_link[ isa_div_k ];
470 sa -= h_ssa[ isa_div_k ];
472 h_ssa[ isa_div_k ] = index_type( sa );
474 h_ssa[0] = index_type(-1);
477 cudaMemcpy( thrust::raw_pointer_cast(&m_ssa[0]), &h_ssa[0],
sizeof(index_type)*n_items, cudaMemcpyHostToDevice );
486 template <u
int32 K,
typename Iterator>
489 if ((i & (K-1)) == 0)
500 template <u
int32 K,
typename Iterator>
503 return ((i & (K-1)) == 0);