CUB
|
Classes | |
struct | cub::CachingDeviceAllocator |
A simple caching allocator for device memory allocations. More... | |
struct | cub::SwitchDevice |
RAII helper which saves the current device and switches to the specified device on construction and switches to the saved device on destruction. More... | |
struct | cub::KernelConfig |
struct | cub::ChainedPolicy< PTX_VERSION, PolicyT, PrevPolicyT > |
Helper for dispatching into a policy chain. More... | |
struct | cub::ChainedPolicy< PTX_VERSION, PolicyT, PolicyT > |
Helper for dispatching into a policy chain (end-of-chain specialization) More... | |
Macros | |
#define | CubDebug(e) CUB_NS_QUALIFIER::Debug((cudaError_t) (e), __FILE__, __LINE__) |
Debug macro. More... | |
#define | CubDebugExit(e) if (CUB_NS_QUALIFIER::Debug((cudaError_t) (e), __FILE__, __LINE__)) { exit(1); } |
Debug macro with exit. More... | |
#define | _CubLog(format,...) printf(format,__VA_ARGS__); |
Log macro for printf statements. More... | |
Functions | |
__host__ __device__ __forceinline__ cudaError_t | cub::Debug (cudaError_t error, const char *filename, int line) |
CUB error reporting macro (prints error messages to stderr) More... | |
CUB_RUNTIME_FUNCTION int | cub::CurrentDevice () |
Returns the current device or -1 if an error occurred. More... | |
CUB_RUNTIME_FUNCTION int | cub::DeviceCountUncached () |
Returns the number of CUDA devices available or -1 if an error occurred. More... | |
CUB_RUNTIME_FUNCTION int | cub::DeviceCount () |
Returns the number of CUDA devices available. More... | |
CUB_RUNTIME_FUNCTION cudaError_t | cub::PtxVersionUncached (int &ptx_version) |
Retrieves the PTX version that will be used on the current device (major * 100 + minor * 10). More... | |
__host__ cudaError_t | cub::PtxVersionUncached (int &ptx_version, int device) |
Retrieves the PTX version that will be used on device (major * 100 + minor * 10). More... | |
__host__ cudaError_t | cub::PtxVersion (int &ptx_version, int device) |
Retrieves the PTX version that will be used on device (major * 100 + minor * 10). More... | |
CUB_RUNTIME_FUNCTION cudaError_t | cub::PtxVersion (int &ptx_version) |
Retrieves the PTX version that will be used on the current device (major * 100 + minor * 10). More... | |
CUB_RUNTIME_FUNCTION cudaError_t | cub::SmVersionUncached (int &sm_version, int device=CurrentDevice()) |
Retrieves the SM version of device (major * 100 + minor * 10) More... | |
CUB_RUNTIME_FUNCTION cudaError_t | cub::SmVersion (int &sm_version, int device=CurrentDevice()) |
Retrieves the SM version of device (major * 100 + minor * 10) More... | |
CUB_RUNTIME_FUNCTION cudaError_t | cub::SyncStream (cudaStream_t stream) |
template<typename KernelPtr > | |
CUB_RUNTIME_FUNCTION cudaError_t | cub::MaxSmOccupancy (int &max_sm_occupancy, KernelPtr kernel_ptr, int block_threads, int dynamic_smem_bytes=0) |
Computes maximum SM occupancy in thread blocks for executing the given kernel function pointer kernel_ptr on the current device with block_threads per thread block. More... | |
#define CubDebug | ( | e) | CUB_NS_QUALIFIER::Debug((cudaError_t) (e), __FILE__, __LINE__) |
Debug macro.
#define CubDebugExit | ( | e) | if (CUB_NS_QUALIFIER::Debug((cudaError_t) (e), __FILE__, __LINE__)) { exit(1); } |
Debug macro with exit.
#define _CubLog | ( | format, | |
... | |||
) | printf(format,__VA_ARGS__); |
Log macro for printf statements.
__host__ __device__ __forceinline__ cudaError_t cub::Debug | ( | cudaError_t | error, |
const char * | filename, | ||
int | line | ||
) |
CUB error reporting macro (prints error messages to stderr)
If CUB_STDERR
is defined and error
is not cudaSuccess
, the corresponding error message is printed to stderr
(or stdout
in device code) along with the supplied source context.
|
inline |
Returns the current device or -1 if an error occurred.
|
inline |
Returns the number of CUDA devices available or -1 if an error occurred.
|
inline |
Returns the number of CUDA devices available.
|
inline |
Retrieves the PTX version that will be used on the current device (major * 100 + minor * 10).
|
inline |
Retrieves the PTX version that will be used on device
(major * 100 + minor * 10).
|
inline |
Retrieves the PTX version that will be used on device
(major * 100 + minor * 10).
|
inline |
Retrieves the PTX version that will be used on the current device (major * 100 + minor * 10).
|
inline |
Retrieves the SM version of device
(major * 100 + minor * 10)
|
inline |
Retrieves the SM version of device
(major * 100 + minor * 10)
|
inline |
Synchronize the specified stream
.
|
inline |
Computes maximum SM occupancy in thread blocks for executing the given kernel function pointer kernel_ptr
on the current device with block_threads
per thread block.
[out] | max_sm_occupancy | maximum number of thread blocks that can reside on a single SM |
[in] | kernel_ptr | Kernel pointer for which to compute SM occupancy |
[in] | block_threads | Number of threads per thread block |
[in] | dynamic_smem_bytes | Dynamically allocated shared memory in bytes. Default is 0. |