Fermat
|
This module implements CUDA specific data-structures and functions
Modules | |
CUDA Allocators | |
Hash Maps | |
Parallel Primitives | |
Sort Enactors | |
CUDA Atomics | |
Classes | |
struct | cugar::cuda::arch |
struct | cugar::cuda::cuda_devices |
struct | cugar::cuda::load_pointer< T, MOD > |
struct | cugar::cuda::store_reference< T, STORE_MOD, LOAD_MOD > |
struct | cugar::cuda::store_pointer< T, STORE_MOD, LOAD_MOD > |
struct | cugar::cuda::Timer |
struct | cugar::cuda::ScopedTimer< T > |
Enumerations | |
enum | cugar::cuda::CacheLoadModifier { cugar::cuda::LOAD_DEFAULT, cugar::cuda::LOAD_CA, cugar::cuda::LOAD_CG, cugar::cuda::LOAD_CS, cugar::cuda::LOAD_CV, cugar::cuda::LOAD_LDG, cugar::cuda::LOAD_VOLATILE } |
Enumeration of cache modifiers for memory load operations. More... | |
enum | cugar::cuda::CacheStoreModifier { cugar::cuda::STORE_DEFAULT, cugar::cuda::STORE_WB, cugar::cuda::STORE_CG, cugar::cuda::STORE_CS, cugar::cuda::STORE_WT, cugar::cuda::STORE_VOLATILE } |
Enumeration of cache modifiers for memory load operations. More... | |
Functions | |
cudaDeviceProp | cugar::cuda::get_device_properties () |
void | cugar::cuda::device_arch (uint32 &major, uint32 &minor) |
uint32 | cugar::cuda::max_grid_size () |
size_t | cugar::cuda::multiprocessor_count () |
size_t | cugar::cuda::smem_allocation_unit (const cudaDeviceProp &properties) |
size_t | cugar::cuda::reg_allocation_unit (const cudaDeviceProp &properties, const size_t regsPerThread) |
size_t | cugar::cuda::warp_allocation_multiple (const cudaDeviceProp &properties) |
size_t | cugar::cuda::num_sides_per_multiprocessor (const cudaDeviceProp &properties) |
size_t | cugar::cuda::max_blocks_per_multiprocessor (const cudaDeviceProp &properties) |
size_t | cugar::cuda::num_regs_per_block (const cudaDeviceProp &properties, const cudaFuncAttributes &attributes, const size_t CTA_SIZE) |
template<typename KernelFunction > | |
cudaFuncAttributes | cugar::cuda::function_attributes (KernelFunction kernel) |
template<typename KernelFunction > | |
size_t | cugar::cuda::max_active_blocks_per_multiprocessor (KernelFunction kernel, const size_t CTA_SIZE, const size_t dynamic_smem_bytes) |
template<typename KernelFunction > | |
size_t | cugar::cuda::max_active_blocks (KernelFunction kernel, const size_t CTA_SIZE, const size_t dynamic_smem_bytes) |
template<typename KernelFunction > | |
size_t | cugar::cuda::num_registers (KernelFunction kernel) |
template<typename KernelFunction > | |
size_t | cugar::cuda::max_blocksize_with_highest_occupancy (KernelFunction kernel, size_t dynamic_smem_bytes_per_thread) |
bool | cugar::cuda::is_tcc_enabled () |
void | cugar::cuda::check_error (const char *message) |
void | cugar::cuda::check_error (cudaError_t, const char *message) |
template<uint32 N> | |
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE void | cugar::cuda::syncthreads () |
template<CacheLoadModifier LOAD_MOD, typename T > | |
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE T | cugar::cuda::load (const T *ptr) |
template<CacheStoreModifier STORE_MOD, typename T > | |
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE void | cugar::cuda::store (T *ptr, const T &value) |
template<CacheLoadModifier MOD, typename T > | |
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE load_pointer< T, MOD > | cugar::cuda::make_load_pointer (const T *it) |
template<CacheStoreModifier STORE_MOD, CacheLoadModifier LOAD_MOD, typename T > | |
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE store_pointer< T, STORE_MOD, LOAD_MOD > | cugar::cuda::make_store_pointer (const T *it) |
cugar::cuda::Timer::Timer () | |
cugar::cuda::Timer::~Timer () | |
void | cugar::cuda::Timer::start () |
void | cugar::cuda::Timer::stop () |
float | cugar::cuda::Timer::seconds () const |
Enumeration of cache modifiers for memory load operations.
Enumeration of cache modifiers for memory load operations.
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE T cugar::cuda::load | ( | const T * | ptr | ) |
issue a load
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE load_pointer<T,MOD> cugar::cuda::make_load_pointer | ( | const T * | it | ) |
make a load_pointer
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE store_pointer<T,STORE_MOD,LOAD_MOD> cugar::cuda::make_store_pointer | ( | const T * | it | ) |
make a store_pointer
|
inline |
elapsed seconds
|
inline |
start timing
|
inline |
stop timing
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE void cugar::cuda::store | ( | T * | ptr, |
const T & | value | ||
) |
issue a store
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE void cugar::cuda::syncthreads | ( | ) |
a generic syncthreads() implementation to synchronize contiguous blocks of N threads at a time
|
inline |
constructor
|
inline |
destructor