|
void | device_arch (uint32 &major, uint32 &minor) |
|
uint32 | max_grid_size () |
|
size_t | multiprocessor_count () |
|
size_t | smem_allocation_unit (const cudaDeviceProp &properties) |
|
size_t | reg_allocation_unit (const cudaDeviceProp &properties, const size_t regsPerThread) |
|
size_t | warp_allocation_multiple (const cudaDeviceProp &properties) |
|
size_t | num_sides_per_multiprocessor (const cudaDeviceProp &properties) |
|
size_t | max_blocks_per_multiprocessor (const cudaDeviceProp &properties) |
|
size_t | num_regs_per_block (const cudaDeviceProp &properties, const cudaFuncAttributes &attributes, const size_t CTA_SIZE) |
|
template<typename KernelFunction > |
cudaFuncAttributes | function_attributes (KernelFunction kernel) |
|
template<typename KernelFunction > |
size_t | max_active_blocks_per_multiprocessor (KernelFunction kernel, const size_t CTA_SIZE, const size_t dynamic_smem_bytes) |
|
template<typename KernelFunction > |
size_t | max_active_blocks (KernelFunction kernel, const size_t CTA_SIZE, const size_t dynamic_smem_bytes) |
|
template<typename KernelFunction > |
size_t | num_registers (KernelFunction kernel) |
|
template<typename KernelFunction > |
size_t | max_blocksize_with_highest_occupancy (KernelFunction kernel, size_t dynamic_smem_bytes_per_thread) |
|
bool | is_tcc_enabled () |
|
void | check_error (const char *message) |
|
template<uint32 N> |
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE void | syncthreads () |
|
size_t | max_active_blocks_per_multiprocessor (const cudaDeviceProp &properties, const cudaFuncAttributes &attributes, size_t CTA_SIZE, size_t dynamic_smem_bytes) |
|
size_t | max_blocksize_with_highest_occupancy (const cudaDeviceProp &properties, const cudaFuncAttributes &attributes, size_t dynamic_smem_bytes_per_thread) |
|
template<typename T > |
void | copy (const thrust::device_vector< T > &dvec, thrust::host_vector< T > &hvec) |
|
template<typename T > |
void | copy (host_device_buffer< T > &dvec, thrust::host_vector< T > &hvec) |
|
template<typename T > |
const T * | device_pointer (const thrust::device_vector< T > &dvec) |
|
template<typename T > |
T * | device_pointer (thrust::device_vector< T > &dvec) |
|
template<typename T > |
const T * | device_pointer (const host_device_buffer< T > &dvec) |
|
template<typename T > |
T * | device_pointer (host_device_buffer< T > &dvec) |
|
template<typename T > |
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE ldg_pointer
< T > | make_ldg_pointer (const T *it) |
|
template<CacheLoadModifier MOD, typename T > |
NVBIO_FORCEINLINE
NVBIO_HOST_DEVICE load_pointer
< T, MOD > | make_load_pointer (const T *it) |
|
template<typename VectorType > |
void | alloc_temp_storage (VectorType &vec, const uint64 size) |
|
template<typename PredicateIterator > |
bool | any (const uint32 n, const PredicateIterator pred) |
|
template<typename PredicateIterator > |
bool | all (const uint32 n, const PredicateIterator pred) |
|
template<typename Iterator > |
bool | is_sorted (const uint32 n, const Iterator values) |
|
template<typename Iterator , typename Headflags > |
bool | is_segment_sorted (const uint32 n, const Iterator values, const Headflags flags) |
|
template<typename InputIterator , typename BinaryOp > |
std::iterator_traits
< InputIterator >::value_type | reduce (const uint32 n, InputIterator d_in, BinaryOp op, thrust::device_vector< uint8 > &d_temp_storage) |
|
template<typename InputIterator , typename OutputIterator , typename BinaryOp > |
void | inclusive_scan (const uint32 n, InputIterator d_in, OutputIterator d_out, BinaryOp op, thrust::device_vector< uint8 > &d_temp_storage) |
|
template<typename InputIterator , typename OutputIterator , typename BinaryOp , typename Identity > |
void | exclusive_scan (const uint32 n, InputIterator d_in, OutputIterator d_out, BinaryOp op, Identity identity, thrust::device_vector< uint8 > &d_temp_storage) |
|
template<typename InputIterator , typename FlagsIterator , typename OutputIterator > |
uint32 | copy_flagged (const uint32 n, InputIterator d_in, FlagsIterator d_flags, OutputIterator d_out, thrust::device_vector< uint8 > &d_temp_storage) |
|
template<typename InputIterator , typename OutputIterator , typename Predicate > |
uint32 | copy_if (const uint32 n, InputIterator d_in, OutputIterator d_out, const Predicate pred, thrust::device_vector< uint8 > &d_temp_storage) |
|
template<typename InputIterator , typename OutputIterator , typename CountIterator > |
uint32 | runlength_encode (const uint32 n, InputIterator d_in, OutputIterator d_out, CountIterator d_counts, thrust::device_vector< uint8 > &d_temp_storage) |
|
template<typename KeyIterator , typename ValueIterator , typename OutputKeyIterator , typename OutputValueIterator , typename ReductionOp > |
uint32 | reduce_by_key (const uint32 n, KeyIterator d_keys_in, ValueIterator d_values_in, OutputKeyIterator d_keys_out, OutputValueIterator d_values_out, ReductionOp reduction_op, thrust::device_vector< uint8 > &d_temp_storage) |
|
template<typename PredicateIterator > |
__global__ void | any_kernel (const uint32 n, const PredicateIterator pred, uint32 *r) |
|
template<typename PredicateIterator > |
__global__ void | all_kernel (const uint32 n, const PredicateIterator pred, uint32 *r) |
|
template<typename T > |
__device__ __forceinline__ T | bit_scan (bool p) |
|
template<typename T > |
__device__ __forceinline__ T | scan_warp (T val, const int32 tidx, volatile T *red) |
|
template<typename T > |
__device__ __forceinline__ T | scan_warp_total (volatile T *red) |
|
template<uint32 COUNT, typename T , typename Op > |
__device__ __forceinline__ T | scan (T val, const Op op, const T init, volatile T *red) |
|
template<uint32 COUNT, typename T > |
__device__ __forceinline__ T | scan (T val, volatile T *red) |
|
template<uint32 COUNT, typename T > |
__device__ __forceinline__ T | scan_total (volatile T *red) |
|
__device__ __forceinline__ uint32 | alloc (uint32 n, uint32 *pool, const int32 warp_tid, volatile uint32 *warp_red, volatile uint32 *warp_broadcast) |
|
template<uint32 N> |
__device__ __forceinline__ uint32 | alloc (bool pred, uint32 *pool, const int32 warp_tid, volatile uint32 *warp_broadcast) |
|
template<uint32 COUNT> |
__device__ __forceinline__ bool | all (const bool p, volatile uint8 *sm=NULL) |
|
template<uint32 COUNT> |
__device__ __forceinline__ bool | any (const bool p, volatile uint8 *sm=NULL) |
|
WorkQueueStatsView | view (WorkQueueStats *stats) |
|
template<typename InStringSet , typename StringIterator , typename OffsetIterator > |
void | copy (const InStringSet &in_string_set, ConcatenatedStringSet< StringIterator, OffsetIterator > &out_string_set) |
|
template<typename InStringSet , typename StringIterator , typename LengthIterator > |
void | copy (const InStringSet &in_string_set, StridedStringSet< StringIterator, LengthIterator > &out_string_set) |
|
template<typename InStringSet , typename StreamIterator , typename SymbolType , uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename LengthIterator > |
void | copy (const InStringSet &in_string_set, StridedPackedStringSet< StreamIterator, SymbolType, SYMBOL_SIZE_T, BIG_ENDIAN_T, LengthIterator > &out_string_set) |
|
template<typename string_type , typename suffix_iterator , typename output_handler > |
void | blockwise_suffix_sort (const typename string_type::index_type string_len, string_type string, const typename string_type::index_type n_suffixes, suffix_iterator suffixes, output_handler &output, const DCS *dcs, BWTParams *params) |
|
template<typename string_type > |
void | blockwise_build (DCS &dcs, const typename string_type::index_type string_len, string_type string, BWTParams *params) |
|
__global__ void | prefix_doubling_kernel (const uint32 n_slots, const uint32 n_suffixes, const uint32 j, const uint32 *suffixes, const uint32 *inv_keys, uint32 *out_keys) |
|
void | prefix_doubling (const uint32 n_slots, const uint32 n_suffixes, const uint32 j, const uint32 *suffixes, const uint32 *inv_keys, uint32 *out_keys) |
|
template<uint32 BLOCKDIM> |
__global__ void | build_head_flags_kernel (const uint32 n_flags, const uint32 *keys, uint8 *flags, uint32 *blocks) |
|
template<uint32 BLOCKDIM> |
__global__ void | extract_segments_kernel (const uint32 n_flags, const uint8 *flags, const uint32 *blocks, const uint32 *slots, uint32 *keys, uint32 *segments) |
|
uint32 | extract_segments (const uint32 n_flags, const uint32 *in_keys, uint8 *flags, uint32 *blocks, const uint32 *slots, uint32 *keys, uint32 *segments) |
|
__global__ void | compact_kernel (const uint32 n, const uint8 *stencil, const uint32 *keys, const uint8 *flags, const uint32 *slots, const uint32 *indices, uint8 *out_flags, uint32 *out_slots, uint32 *out_indices) |
|
void | compact (const uint32 n, const uint8 *stencil, const uint32 *keys, const uint8 *flags, const uint32 *slots, const uint32 *indices, uint8 *out_flags, uint32 *out_slots, uint32 *out_indices) |
|
template<typename string_type > |
string_type::index_type | find_primary (const typename string_type::index_type string_len, const string_type string) |
|
template<typename string_type , typename output_iterator > |
void | suffix_sort (const typename stream_traits< string_type >::index_type string_len, const string_type string, output_iterator output, BWTParams *params) |
|
template<typename string_type , typename output_handler > |
void | blockwise_suffix_sort (const typename string_type::index_type string_len, string_type string, output_handler &output, BWTParams *params) |
|
template<typename string_type , typename output_iterator > |
string_type::index_type | bwt (const typename string_type::index_type string_len, string_type string, output_iterator output, BWTParams *params) |
|
template<typename string_set_type , typename output_handler > |
void | suffix_sort (const string_set_type &string_set, output_handler &output, BWTParams *params=NULL) |
|
template<uint32 SYMBOL_SIZE, bool BIG_ENDIAN, typename storage_type , typename output_handler > |
void | bwt (const ConcatenatedStringSet< PackedStream< storage_type, uint8, SYMBOL_SIZE, BIG_ENDIAN, uint64 >, uint64 * > string_set, output_handler &output, BWTParams *params=NULL) |
|
void | scan_test () |
|