CUB
|
#include <iterator>
#include <type_traits>
#include <cub/block/block_store.cuh>
#include <cub/config.cuh>
#include <cub/util_ptx.cuh>
#include <cub/util_type.cuh>
#include <cub/warp/warp_exchange.cuh>
Classes | |
class | WarpStore< T, ITEMS_PER_THREAD, ALGORITHM, LOGICAL_WARP_THREADS, PTX_ARCH > |
The WarpStore class provides collective data movement methods for writing a blocked arrangement of items partitioned across a CUDA warp to a linear segment of memory. More... | |
struct | WarpStore< T, ITEMS_PER_THREAD, ALGORITHM, LOGICAL_WARP_THREADS, PTX_ARCH >::StoreInternal< WARP_STORE_TRANSPOSE, DUMMY >::_TempStorage |
struct | WarpStore< T, ITEMS_PER_THREAD, ALGORITHM, LOGICAL_WARP_THREADS, PTX_ARCH >::StoreInternal< WARP_STORE_TRANSPOSE, DUMMY >::TempStorage |
struct | WarpStore< T, ITEMS_PER_THREAD, ALGORITHM, LOGICAL_WARP_THREADS, PTX_ARCH >::TempStorage |
Enumerations | |
enum | WarpStoreAlgorithm { WARP_STORE_DIRECT, WARP_STORE_STRIPED, WARP_STORE_VECTORIZE, WARP_STORE_TRANSPOSE } |
cub::WarpStoreAlgorithm enumerates alternative algorithms for cub::WarpStore to write a blocked arrangement of items across a CUDA warp to a linear segment of memory. More... | |
Operations for writing linear segments of data from the CUDA warp
enum WarpStoreAlgorithm |
cub::WarpStoreAlgorithm enumerates alternative algorithms for cub::WarpStore to write a blocked arrangement of items across a CUDA warp to a linear segment of memory.
Enumerator | |
---|---|
WARP_STORE_DIRECT |
|
WARP_STORE_STRIPED |
|
WARP_STORE_VECTORIZE |
A blocked arrangement of data is written directly to memory using CUDA's built-in vectorized stores as a coalescing optimization. For example,
|
WARP_STORE_TRANSPOSE |
|