Namespaces
	wq

Classes
struct	Arch

struct	condition

struct	condition_set_view

struct	condition_set_storage

struct	host_device_buffer

struct	host_device_buffer_zero_copy

struct	host_device_buffer_sync

struct	ldg_pointer

struct	load_pointer

struct	PingPongQueuesView

struct	PingPongQueues

struct	is_sorted_iterator

struct	is_segment_sorted_iterator

struct	scan_dispatch

struct	scan_dispatch< T, Op, 32 >

struct	scan_dispatch< T, Op, 16 >

struct	scan_dispatch< T, Op, 8 >

struct	scan_dispatch< T, Op, 4 >

struct	scan_dispatch< T, Op, 2 >

struct	all_dispatch

struct	all_dispatch< 32 >

struct	all_dispatch< 2 >

struct	all_dispatch< 4 >

struct	all_dispatch< 8 >

struct	all_dispatch< 16 >

struct	all_dispatch< 64 >

struct	all_dispatch< 128 >

struct	all_dispatch< 256 >

struct	any_dispatch

struct	any_dispatch< 32 >

struct	any_dispatch< 2 >

struct	any_dispatch< 4 >

struct	any_dispatch< 8 >

struct	any_dispatch< 16 >

struct	any_dispatch< 64 >

struct	any_dispatch< 128 >

struct	any_dispatch< 256 >

struct	SortBuffers

struct	SortEnactor

struct	syncblocks

struct	syncblocks_storage

struct	Timer

struct	ScopedTimer

struct	InplaceQueueTag

struct	WorkQueueStatsView

struct	DefaultMover

struct	WorkQueue

struct	WorkQueueStats

struct	MultiPassQueueTag

struct	WorkQueue< MultiPassQueueTag, WorkUnitT, BLOCKDIM >

struct	OrderedQueueTag

struct	WorkQueue< OrderedQueueTag, WorkUnitT, BLOCKDIM >

struct	PersistentWarpsQueueTag

struct	PersistentThreadsQueueTag

struct	WorkQueue< PersistentWarpsQueueTag, WorkUnitT, BLOCKDIM >

struct	WorkQueue< PersistentThreadsQueueTag, WorkUnitT, BLOCKDIM >

struct	DelayList

struct	DiscardDelayList

struct	CompressionSort

struct	PrefixDoublingSufSort

struct	HostBWTConfigGPUBucketer

struct	HostBWTConfigCPUBucketer

struct	DeviceBWTConfig

struct	LargeBWTStatus

struct	LargeBWTSkeleton

Enumerations
enum	CacheLoadModifier { LOAD_DEFAULT, LOAD_CA, LOAD_CG, LOAD_CS, LOAD_CV, LOAD_LDG, LOAD_VOLATILE }
	Enumeration of cache modifiers for memory load operations. More...

enum	WorkQueueStatsEvent { STREAM_EVENT = 0, RUN_EVENT = 1 }

Functions
void	device_arch (uint32 &major, uint32 &minor)

uint32	max_grid_size ()

size_t	multiprocessor_count ()

size_t	smem_allocation_unit (const cudaDeviceProp &properties)

size_t	reg_allocation_unit (const cudaDeviceProp &properties, const size_t regsPerThread)

size_t	warp_allocation_multiple (const cudaDeviceProp &properties)

size_t	num_sides_per_multiprocessor (const cudaDeviceProp &properties)

size_t	max_blocks_per_multiprocessor (const cudaDeviceProp &properties)

size_t	num_regs_per_block (const cudaDeviceProp &properties, const cudaFuncAttributes &attributes, const size_t CTA_SIZE)

template<typename KernelFunction >
cudaFuncAttributes	function_attributes (KernelFunction kernel)

template<typename KernelFunction >
size_t	max_active_blocks_per_multiprocessor (KernelFunction kernel, const size_t CTA_SIZE, const size_t dynamic_smem_bytes)

template<typename KernelFunction >
size_t	max_active_blocks (KernelFunction kernel, const size_t CTA_SIZE, const size_t dynamic_smem_bytes)

template<typename KernelFunction >
size_t	num_registers (KernelFunction kernel)

template<typename KernelFunction >
size_t	max_blocksize_with_highest_occupancy (KernelFunction kernel, size_t dynamic_smem_bytes_per_thread)

bool	is_tcc_enabled ()

void	check_error (const char *message)

template<uint32 N>
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE void	syncthreads ()

size_t	max_active_blocks_per_multiprocessor (const cudaDeviceProp &properties, const cudaFuncAttributes &attributes, size_t CTA_SIZE, size_t dynamic_smem_bytes)

size_t	max_blocksize_with_highest_occupancy (const cudaDeviceProp &properties, const cudaFuncAttributes &attributes, size_t dynamic_smem_bytes_per_thread)

template<typename T >
void	copy (const thrust::device_vector< T > &dvec, thrust::host_vector< T > &hvec)

template<typename T >
void	copy (host_device_buffer< T > &dvec, thrust::host_vector< T > &hvec)

template<typename T >
const T *	device_pointer (const thrust::device_vector< T > &dvec)

template<typename T >
T *	device_pointer (thrust::device_vector< T > &dvec)

template<typename T >
const T *	device_pointer (const host_device_buffer< T > &dvec)

template<typename T >
T *	device_pointer (host_device_buffer< T > &dvec)

template<typename T >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE ldg_pointer < T >	make_ldg_pointer (const T *it)

template<CacheLoadModifier MOD, typename T >
NVBIO_FORCEINLINE NVBIO_HOST_DEVICE load_pointer < T, MOD >	make_load_pointer (const T *it)

template<typename VectorType >
void	alloc_temp_storage (VectorType &vec, const uint64 size)

template<typename PredicateIterator >
bool	any (const uint32 n, const PredicateIterator pred)

template<typename PredicateIterator >
bool	all (const uint32 n, const PredicateIterator pred)

template<typename Iterator >
bool	is_sorted (const uint32 n, const Iterator values)

template<typename Iterator , typename Headflags >
bool	is_segment_sorted (const uint32 n, const Iterator values, const Headflags flags)

template<typename InputIterator , typename BinaryOp >
std::iterator_traits < InputIterator >::value_type	reduce (const uint32 n, InputIterator d_in, BinaryOp op, thrust::device_vector< uint8 > &d_temp_storage)

template<typename InputIterator , typename OutputIterator , typename BinaryOp >
void	inclusive_scan (const uint32 n, InputIterator d_in, OutputIterator d_out, BinaryOp op, thrust::device_vector< uint8 > &d_temp_storage)

template<typename InputIterator , typename OutputIterator , typename BinaryOp , typename Identity >
void	exclusive_scan (const uint32 n, InputIterator d_in, OutputIterator d_out, BinaryOp op, Identity identity, thrust::device_vector< uint8 > &d_temp_storage)

template<typename InputIterator , typename FlagsIterator , typename OutputIterator >
uint32	copy_flagged (const uint32 n, InputIterator d_in, FlagsIterator d_flags, OutputIterator d_out, thrust::device_vector< uint8 > &d_temp_storage)

template<typename InputIterator , typename OutputIterator , typename Predicate >
uint32	copy_if (const uint32 n, InputIterator d_in, OutputIterator d_out, const Predicate pred, thrust::device_vector< uint8 > &d_temp_storage)

template<typename InputIterator , typename OutputIterator , typename CountIterator >
uint32	runlength_encode (const uint32 n, InputIterator d_in, OutputIterator d_out, CountIterator d_counts, thrust::device_vector< uint8 > &d_temp_storage)

template<typename KeyIterator , typename ValueIterator , typename OutputKeyIterator , typename OutputValueIterator , typename ReductionOp >
uint32	reduce_by_key (const uint32 n, KeyIterator d_keys_in, ValueIterator d_values_in, OutputKeyIterator d_keys_out, OutputValueIterator d_values_out, ReductionOp reduction_op, thrust::device_vector< uint8 > &d_temp_storage)

template<typename PredicateIterator >
__global__ void	any_kernel (const uint32 n, const PredicateIterator pred, uint32 *r)

template<typename PredicateIterator >
__global__ void	all_kernel (const uint32 n, const PredicateIterator pred, uint32 *r)

template<typename T >
__device__ __forceinline__ T	bit_scan (bool p)

template<typename T >
__device__ __forceinline__ T	scan_warp (T val, const int32 tidx, volatile T *red)

template<typename T >
__device__ __forceinline__ T	scan_warp_total (volatile T *red)

template<uint32 COUNT, typename T , typename Op >
__device__ __forceinline__ T	scan (T val, const Op op, const T init, volatile T *red)

template<uint32 COUNT, typename T >
__device__ __forceinline__ T	scan (T val, volatile T *red)

template<uint32 COUNT, typename T >
__device__ __forceinline__ T	scan_total (volatile T *red)

__device__ __forceinline__ uint32	alloc (uint32 n, uint32 pool, const int32 warp_tid, volatile uint32 warp_red, volatile uint32 *warp_broadcast)

template<uint32 N>
__device__ __forceinline__ uint32	alloc (bool pred, uint32 pool, const int32 warp_tid, volatile uint32 warp_broadcast)

template<uint32 COUNT>
__device__ __forceinline__ bool	all (const bool p, volatile uint8 *sm=NULL)

template<uint32 COUNT>
__device__ __forceinline__ bool	any (const bool p, volatile uint8 *sm=NULL)

WorkQueueStatsView	view (WorkQueueStats *stats)

template<typename InStringSet , typename StringIterator , typename OffsetIterator >
void	copy (const InStringSet &in_string_set, ConcatenatedStringSet< StringIterator, OffsetIterator > &out_string_set)

template<typename InStringSet , typename StringIterator , typename LengthIterator >
void	copy (const InStringSet &in_string_set, StridedStringSet< StringIterator, LengthIterator > &out_string_set)

template<typename InStringSet , typename StreamIterator , typename SymbolType , uint32 SYMBOL_SIZE_T, bool BIG_ENDIAN_T, typename LengthIterator >
void	copy (const InStringSet &in_string_set, StridedPackedStringSet< StreamIterator, SymbolType, SYMBOL_SIZE_T, BIG_ENDIAN_T, LengthIterator > &out_string_set)

template<typename string_type , typename suffix_iterator , typename output_handler >
void	blockwise_suffix_sort (const typename string_type::index_type string_len, string_type string, const typename string_type::index_type n_suffixes, suffix_iterator suffixes, output_handler &output, const DCS dcs, BWTParams params)

template<typename string_type >
void	blockwise_build (DCS &dcs, const typename string_type::index_type string_len, string_type string, BWTParams *params)

__global__ void	prefix_doubling_kernel (const uint32 n_slots, const uint32 n_suffixes, const uint32 j, const uint32 suffixes, const uint32 inv_keys, uint32 *out_keys)

void	prefix_doubling (const uint32 n_slots, const uint32 n_suffixes, const uint32 j, const uint32 suffixes, const uint32 inv_keys, uint32 *out_keys)

template<uint32 BLOCKDIM>
__global__ void	build_head_flags_kernel (const uint32 n_flags, const uint32 keys, uint8 flags, uint32 *blocks)

template<uint32 BLOCKDIM>
__global__ void	extract_segments_kernel (const uint32 n_flags, const uint8 flags, const uint32 blocks, const uint32 slots, uint32 keys, uint32 *segments)

uint32	extract_segments (const uint32 n_flags, const uint32 in_keys, uint8 flags, uint32 blocks, const uint32 slots, uint32 keys, uint32 segments)

__global__ void	compact_kernel (const uint32 n, const uint8 stencil, const uint32 keys, const uint8 flags, const uint32 slots, const uint32 indices, uint8 out_flags, uint32 out_slots, uint32 out_indices)

void	compact (const uint32 n, const uint8 stencil, const uint32 keys, const uint8 flags, const uint32 slots, const uint32 indices, uint8 out_flags, uint32 out_slots, uint32 out_indices)

template<typename string_type >
string_type::index_type	find_primary (const typename string_type::index_type string_len, const string_type string)

template<typename string_type , typename output_iterator >
void	suffix_sort (const typename stream_traits< string_type >::index_type string_len, const string_type string, output_iterator output, BWTParams *params)

template<typename string_type , typename output_handler >
void	blockwise_suffix_sort (const typename string_type::index_type string_len, string_type string, output_handler &output, BWTParams *params)

template<typename string_type , typename output_iterator >
string_type::index_type	bwt (const typename string_type::index_type string_len, string_type string, output_iterator output, BWTParams *params)

template<typename string_set_type , typename output_handler >
void	suffix_sort (const string_set_type &string_set, output_handler &output, BWTParams *params=NULL)

template<uint32 SYMBOL_SIZE, bool BIG_ENDIAN, typename storage_type , typename output_handler >
void	bwt (const ConcatenatedStringSet< PackedStream< storage_type, uint8, SYMBOL_SIZE, BIG_ENDIAN, uint64 >, uint64 * > string_set, output_handler &output, BWTParams *params=NULL)

void	scan_test ()

Detailed Description

Strings

Enumeration Type Documentation

enum nvbio::cuda::CacheLoadModifier

Enumeration of cache modifiers for memory load operations.

Enumerator
LOAD_DEFAULT	Default (no modifier)
LOAD_CA	Cache at all levels.
LOAD_CG	Cache at global level.
LOAD_CS	Cache streaming (likely to be accessed once)
LOAD_CV	Cache as volatile (including cached system lines)
LOAD_LDG	Cache as texture.
LOAD_VOLATILE	Volatile (any memory space)

Definition at line 197 of file ldg.h.

Function Documentation

template<uint32 COUNT>

__device__ __forceinline__ bool nvbio::cuda::all	(	const bool	p,
		volatile uint8 *	sm = `NULL`
	)

generalized all primitive for arbitrarily (power of 2) sized thread groups

Template Parameters

COUNT thread-group size

Parameters

p	per-thread predicate
sm	shared-memory array needed when COUNT is larger than 32

Returns: true iff the predicate is true for all threads in the thread-group

Definition at line 394 of file scan.h.

template<typename PredicateIterator >

__global__ void nvbio::cuda::all_kernel	(	const uint32	n,
		const PredicateIterator	pred,
		uint32 *	r
	)

Definition at line 79 of file primitives_inl.h.

__device__ __forceinline__ uint32 nvbio::cuda::alloc	(	uint32	n,
		uint32 *	pool,
		const int32	warp_tid,
		volatile uint32 *	warp_red,
		volatile uint32 *	warp_broadcast
	)

alloc n elements per thread from a common pool, using a synchronous warp scan

Parameters

n	number of elements to alloc
warp_tid	warp thread index
warp_red	temporary warp scan storage (2*WARP_SIZE elements)
warp_broadcast	temporary warp broadcasting storage

Definition at line 245 of file scan.h.

template<uint32 N>

__device__ __forceinline__ uint32 nvbio::cuda::alloc	(	bool	pred,
		uint32 *	pool,
		const int32	warp_tid,
		volatile uint32 *	warp_broadcast
	)

alloc zero or exactly N elements per thread from a common pool

Parameters

p	allocation predicate
warp_tid	warp thread id
warp_broadcast	temporary warp broadcasting storage

Definition at line 262 of file scan.h.

template<uint32 COUNT>

__device__ __forceinline__ bool nvbio::cuda::any	(	const bool	p,
		volatile uint8 *	sm = `NULL`
	)

generalized any primitive for arbitrarily (power of 2) sized thread groups

Template Parameters

COUNT thread-group size

Parameters

p	per-thread predicate
sm	shared-memory array needed when COUNT is larger than 32

Returns: true iff the predicate is true for any threads in the thread-group

Definition at line 517 of file scan.h.

template<typename PredicateIterator >

__global__ void nvbio::cuda::any_kernel	(	const uint32	n,
		const PredicateIterator	pred,
		uint32 *	r
	)

Definition at line 59 of file primitives_inl.h.

template<typename T >

__device__ __forceinline__ T nvbio::cuda::bit_scan ( bool p)

intra-warp inclusive scan

Parameters

val	per-threrad input value

Definition at line 43 of file scan.h.

template<uint32 BLOCKDIM>

__global__ void nvbio::cuda::build_head_flags_kernel	(	const uint32	n_flags,
		const uint32 *	keys,
		uint8 *	flags,
		uint32 *	blocks
	)

build a set of head flags looking at adjacent keys and compute their block sums

Definition at line 210 of file prefix_doubling_sufsort.h.

void nvbio::cuda::check_error ( const char * message)

inline

Definition at line 263 of file arch_inl.h.

void nvbio::cuda::compact	(	const uint32	n,
		const uint8 *	stencil,
		const uint32 *	keys,
		const uint8 *	flags,
		const uint32 *	slots,
		const uint32 *	indices,
		uint8 *	out_flags,
		uint32 *	out_slots,
		uint32 *	out_indices
	)

inline

scatter flags/slots/indices to the position specified by keys (-1) only if the given stencil is true

Definition at line 429 of file prefix_doubling_sufsort.h.

__global__ void nvbio::cuda::compact_kernel	(	const uint32	n,
		const uint8 *	stencil,
		const uint32 *	keys,
		const uint8 *	flags,
		const uint32 *	slots,
		const uint32 *	indices,
		uint8 *	out_flags,
		uint32 *	out_slots,
		uint32 *	out_indices
	)

scatter flags/slots/indices to the position specified by keys (-1) only if the given stencil is true

Definition at line 375 of file prefix_doubling_sufsort.h.

template<typename T >

void nvbio::cuda::copy	(	const thrust::device_vector< T > &	dvec,
		thrust::host_vector< T > &	hvec
	)

Definition at line 155 of file host_device_buffer.h.

template<typename T >

void nvbio::cuda::copy	(	host_device_buffer< T > &	dvec,
		thrust::host_vector< T > &	hvec
	)

Definition at line 163 of file host_device_buffer.h.

void nvbio::cuda::device_arch	(	uint32 &	major,
		uint32 &	minor
	)

inline

Definition at line 32 of file arch_inl.h.

template<typename T >

const T* nvbio::cuda::device_pointer ( const thrust::device_vector< T > & dvec)

Definition at line 176 of file host_device_buffer.h.

template<typename T >

T* nvbio::cuda::device_pointer ( thrust::device_vector< T > & dvec)

Definition at line 182 of file host_device_buffer.h.

template<typename T >

const T* nvbio::cuda::device_pointer ( const host_device_buffer< T > & dvec)

Definition at line 188 of file host_device_buffer.h.

template<typename T >

T* nvbio::cuda::device_pointer ( host_device_buffer< T > & dvec)

Definition at line 194 of file host_device_buffer.h.

uint32 nvbio::cuda::extract_segments	(	const uint32	n_flags,
		const uint32 *	in_keys,
		uint8 *	flags,
		uint32 *	blocks,
		const uint32 *	slots,
		uint32 *	keys,
		uint32 *	segments
	)

inline

given a sorted set of n input keys, flags and slots, this function:

updates the set of flags or'ing them with the head flags of the segments of equal keys
computes a new "compact" set of keys in the range [1,#distinct-keys]
computes a set of segment heads segments[1,...,#distinct-keys]: segments[i] := slot[m] + 1, where m is the smallest integer such that keys[m] = i.

Definition at line 330 of file prefix_doubling_sufsort.h.

template<uint32 BLOCKDIM>

__global__ void nvbio::cuda::extract_segments_kernel	(	const uint32	n_flags,
		const uint8 *	flags,
		const uint32 *	blocks,
		const uint32 *	slots,
		uint32 *	keys,
		uint32 *	segments
	)

extract the slots corresponding to the first key in each segment

Definition at line 261 of file prefix_doubling_sufsort.h.

template<typename KernelFunction >

cudaFuncAttributes nvbio::cuda::function_attributes ( KernelFunction kernel)

inline

Definition at line 101 of file arch_inl.h.

bool nvbio::cuda::is_tcc_enabled ( )

inline

Definition at line 254 of file arch_inl.h.

template<typename T >

NVBIO_FORCEINLINE NVBIO_HOST_DEVICE ldg_pointer<T> nvbio::cuda::make_ldg_pointer ( const T * it)

make a ldg_pointer

Definition at line 188 of file ldg.h.

template<CacheLoadModifier MOD, typename T >

NVBIO_FORCEINLINE NVBIO_HOST_DEVICE load_pointer<T,MOD> nvbio::cuda::make_load_pointer ( const T * it)

make a load_pointer

Definition at line 367 of file ldg.h.

template<typename KernelFunction >

size_t nvbio::cuda::max_active_blocks	(	KernelFunction	kernel,
		const size_t	CTA_SIZE,
		const size_t	dynamic_smem_bytes
	)

Definition at line 193 of file arch_inl.h.

template<typename KernelFunction >

size_t nvbio::cuda::max_active_blocks_per_multiprocessor	(	KernelFunction	kernel,
		const size_t	CTA_SIZE,
		const size_t	dynamic_smem_bytes
	)

Definition at line 179 of file arch_inl.h.

size_t nvbio::cuda::max_active_blocks_per_multiprocessor	(	const cudaDeviceProp &	properties,
		const cudaFuncAttributes &	attributes,
		size_t	CTA_SIZE,
		size_t	dynamic_smem_bytes
	)

inline

Definition at line 154 of file arch_inl.h.

size_t nvbio::cuda::max_blocks_per_multiprocessor ( const cudaDeviceProp & properties)

inline

Definition at line 116 of file arch_inl.h.

template<typename KernelFunction >

size_t nvbio::cuda::max_blocksize_with_highest_occupancy	(	KernelFunction	kernel,
		size_t	dynamic_smem_bytes_per_thread
	)

Definition at line 242 of file arch_inl.h.

size_t nvbio::cuda::max_blocksize_with_highest_occupancy	(	const cudaDeviceProp &	properties,
		const cudaFuncAttributes &	attributes,
		size_t	dynamic_smem_bytes_per_thread
	)

inline

Definition at line 213 of file arch_inl.h.

uint32 nvbio::cuda::max_grid_size ( )

inline

Definition at line 44 of file arch_inl.h.

size_t nvbio::cuda::multiprocessor_count ( )

inline

Definition at line 52 of file arch_inl.h.

template<typename KernelFunction >

size_t nvbio::cuda::num_registers ( KernelFunction kernel)

Definition at line 207 of file arch_inl.h.

size_t nvbio::cuda::num_regs_per_block	(	const cudaDeviceProp &	properties,
		const cudaFuncAttributes &	attributes,
		const size_t	CTA_SIZE
	)

inline

Definition at line 134 of file arch_inl.h.

size_t nvbio::cuda::num_sides_per_multiprocessor ( const cudaDeviceProp & properties)

inline

Definition at line 122 of file arch_inl.h.

void nvbio::cuda::prefix_doubling	(	const uint32	n_slots,
		const uint32	n_suffixes,
		const uint32	j,
		const uint32 *	suffixes,
		const uint32 *	inv_keys,
		uint32 *	out_keys
	)

inline

perform prefix-doubling on the selected suffixes

Definition at line 188 of file prefix_doubling_sufsort.h.

__global__ void nvbio::cuda::prefix_doubling_kernel	(	const uint32	n_slots,
		const uint32	n_suffixes,
		const uint32	j,
		const uint32 *	suffixes,
		const uint32 *	inv_keys,
		uint32 *	out_keys
	)

Definition at line 143 of file prefix_doubling_sufsort.h.

size_t nvbio::cuda::reg_allocation_unit	(	const cudaDeviceProp &	properties,
		const size_t	regsPerThread
	)

inline

Definition at line 70 of file arch_inl.h.

template<uint32 COUNT, typename T , typename Op >

__device__ __forceinline__ T nvbio::cuda::scan	(	T	val,
		const Op	op,
		const T	init,
		volatile T *	red
	)

intra-warp inclusive scan

Parameters

val	per-threrad input value
tidx	warp thread index
red	scan result storage (2*WARP_SIZE elements)

Definition at line 216 of file scan.h.

template<uint32 COUNT, typename T >

__device__ __forceinline__ T nvbio::cuda::scan	(	T	val,
		volatile T *	red
	)

intra-warp inclusive scan

Parameters

val	per-threrad input value
tidx	warp thread index
red	scan result storage (2*WARP_SIZE elements)

Definition at line 225 of file scan.h.

void nvbio::cuda::scan_test ( )

template<uint32 COUNT, typename T >

__device__ __forceinline__ T nvbio::cuda::scan_total ( volatile T * red)

return the total from a scan_warp

Parameters

red	scan result storage

Definition at line 232 of file scan.h.

template<typename T >

__device__ __forceinline__ T nvbio::cuda::scan_warp	(	T	val,
		const int32	tidx,
		volatile T *	red
	)

intra-warp inclusive scan

Parameters

val	per-threrad input value
tidx	warp thread index
red	scan result storage (2*WARP_SIZE elements)

Definition at line 55 of file scan.h.

template<typename T >

__device__ __forceinline__ T nvbio::cuda::scan_warp_total ( volatile T * red)

return the total from a scan_warp

Parameters

red	scan result storage

Definition at line 73 of file scan.h.

size_t nvbio::cuda::smem_allocation_unit ( const cudaDeviceProp & properties)

inline

Definition at line 64 of file arch_inl.h.

template<uint32 N>

NVBIO_FORCEINLINE NVBIO_HOST_DEVICE void nvbio::cuda::syncthreads ( )

a generic syncthreads() implementation to synchronize contiguous blocks of N threads at a time

Definition at line 279 of file arch_inl.h.

size_t nvbio::cuda::warp_allocation_multiple ( const cudaDeviceProp & properties)

inline

Definition at line 95 of file arch_inl.h.

Namespaces

Classes

Enumerations

Functions

Detailed Description

Enumeration Type Documentation

Function Documentation