CUB
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups
Classes | Namespaces | Enumerations
block_store.cuh File Reference
#include <iterator>
#include "block_exchange.cuh"
#include "../util_ptx.cuh"
#include "../util_macro.cuh"
#include "../util_type.cuh"
#include "../util_namespace.cuh"

Classes

class  cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >
 The BlockStore class provides collective data movement methods for writing a blocked arrangement of items partitioned across a CUDA thread block to a linear segment of memory.

block_store_logo.png
.
More...
 
struct  cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_TRANSPOSE, DUMMY >::_TempStorage
 Shared memory storage layout type. More...
 
struct  cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_TRANSPOSE, DUMMY >::TempStorage
 Alias wrapper allowing storage to be unioned. More...
 
struct  cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_WARP_TRANSPOSE, DUMMY >::_TempStorage
 Shared memory storage layout type. More...
 
struct  cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_WARP_TRANSPOSE, DUMMY >::TempStorage
 Alias wrapper allowing storage to be unioned. More...
 
struct  cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED, DUMMY >::_TempStorage
 Shared memory storage layout type. More...
 
struct  cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::StoreInternal< BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED, DUMMY >::TempStorage
 Alias wrapper allowing storage to be unioned. More...
 
struct  cub::BlockStore< T, BLOCK_DIM_X, ITEMS_PER_THREAD, ALGORITHM, BLOCK_DIM_Y, BLOCK_DIM_Z, PTX_ARCH >::TempStorage
 The operations exposed by BlockStore require a temporary memory allocation of this nested type for thread communication. This opaque storage can be allocated directly using the __shared__ keyword. Alternatively, it can be aliased to externally allocated memory (shared or global) or union'd with other storage allocation types to facilitate memory reuse. More...
 

Namespaces

 cub
 Optional outer namespace(s)
 

Enumerations

enum  cub::BlockStoreAlgorithm {
  cub::BLOCK_STORE_DIRECT, cub::BLOCK_STORE_VECTORIZE, cub::BLOCK_STORE_TRANSPOSE, cub::BLOCK_STORE_WARP_TRANSPOSE,
  cub::BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED
}
 cub::BlockStoreAlgorithm enumerates alternative algorithms for cub::BlockStore to write a blocked arrangement of items across a CUDA thread block to a linear segment of memory. More...
 

Functions

Blocked arrangement I/O (direct)
template<typename T , int ITEMS_PER_THREAD, typename OutputIteratorT >
__device__ __forceinline__ void cub::StoreDirectBlocked (int linear_tid, OutputIteratorT block_itr, T(&items)[ITEMS_PER_THREAD])
 Store a blocked arrangement of items across a thread block into a linear segment of items. More...
 
template<typename T , int ITEMS_PER_THREAD, typename OutputIteratorT >
__device__ __forceinline__ void cub::StoreDirectBlocked (int linear_tid, OutputIteratorT block_itr, T(&items)[ITEMS_PER_THREAD], int valid_items)
 Store a blocked arrangement of items across a thread block into a linear segment of items, guarded by range. More...
 
template<typename T , int ITEMS_PER_THREAD>
__device__ __forceinline__ void cub::StoreDirectBlockedVectorized (int linear_tid, T *block_ptr, T(&items)[ITEMS_PER_THREAD])
 Store a blocked arrangement of items across a thread block into a linear segment of items. More...
 
Striped arrangement I/O (direct)
template<int BLOCK_THREADS, typename T , int ITEMS_PER_THREAD, typename OutputIteratorT >
__device__ __forceinline__ void cub::StoreDirectStriped (int linear_tid, OutputIteratorT block_itr, T(&items)[ITEMS_PER_THREAD])
 Store a striped arrangement of data across the thread block into a linear segment of items. More...
 
template<int BLOCK_THREADS, typename T , int ITEMS_PER_THREAD, typename OutputIteratorT >
__device__ __forceinline__ void cub::StoreDirectStriped (int linear_tid, OutputIteratorT block_itr, T(&items)[ITEMS_PER_THREAD], int valid_items)
 Store a striped arrangement of data across the thread block into a linear segment of items, guarded by range. More...
 
Warp-striped arrangement I/O (direct)
template<typename T , int ITEMS_PER_THREAD, typename OutputIteratorT >
__device__ __forceinline__ void cub::StoreDirectWarpStriped (int linear_tid, OutputIteratorT block_itr, T(&items)[ITEMS_PER_THREAD])
 Store a warp-striped arrangement of data across the thread block into a linear segment of items. More...
 
template<typename T , int ITEMS_PER_THREAD, typename OutputIteratorT >
__device__ __forceinline__ void cub::StoreDirectWarpStriped (int linear_tid, OutputIteratorT block_itr, T(&items)[ITEMS_PER_THREAD], int valid_items)
 Store a warp-striped arrangement of data across the thread block into a linear segment of items, guarded by range. More...
 

Detailed Description

Operations for writing linear segments of data from the CUDA thread block