CUB
|
#include "specializations/block_reduce_raking.cuh"
#include "specializations/block_reduce_raking_commutative_only.cuh"
#include "specializations/block_reduce_warp_reductions.cuh"
#include "../config.cuh"
#include "../util_ptx.cuh"
#include "../util_type.cuh"
#include "../thread/thread_operators.cuh"
Classes | |
class | cub::BlockReduce< T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH > |
The BlockReduce class provides collective methods for computing a parallel reduction of items partitioned across a CUDA thread block.
![]()
. | |
struct | cub::BlockReduce< T, BLOCK_DIM_X, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::TempStorage |
The operations exposed by BlockReduce require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the __shared__ keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or union 'd with other storage allocation types to facilitate memory reuse. More... | |
Namespaces | |
cub | |
Enumerations | |
enum | cub::BlockReduceAlgorithm { cub::BLOCK_REDUCE_RAKING_COMMUTATIVE_ONLY, cub::BLOCK_REDUCE_RAKING, cub::BLOCK_REDUCE_WARP_REDUCTIONS } |
The cub::BlockReduce class provides collective methods for computing a parallel reduction of items partitioned across a CUDA thread block.