Fermat
Modules | Classes | Enumerations | Functions
CUDA

Detailed Description

This module implements CUDA specific data-structures and functions

Modules

 CUDA Allocators
 
 Hash Maps
 
 Parallel Primitives
 
 Sort Enactors
 
 CUDA Atomics
 

Classes

struct  cugar::cuda::arch
 
struct  cugar::cuda::cuda_devices
 
struct  cugar::cuda::load_pointer< T, MOD >
 
struct  cugar::cuda::store_reference< T, STORE_MOD, LOAD_MOD >
 
struct  cugar::cuda::store_pointer< T, STORE_MOD, LOAD_MOD >
 
struct  cugar::cuda::Timer
 
struct  cugar::cuda::ScopedTimer< T >
 

Enumerations

enum  cugar::cuda::CacheLoadModifier {
  cugar::cuda::LOAD_DEFAULT, cugar::cuda::LOAD_CA, cugar::cuda::LOAD_CG, cugar::cuda::LOAD_CS,
  cugar::cuda::LOAD_CV, cugar::cuda::LOAD_LDG, cugar::cuda::LOAD_VOLATILE
}
 Enumeration of cache modifiers for memory load operations. More...
 
enum  cugar::cuda::CacheStoreModifier {
  cugar::cuda::STORE_DEFAULT, cugar::cuda::STORE_WB, cugar::cuda::STORE_CG, cugar::cuda::STORE_CS,
  cugar::cuda::STORE_WT, cugar::cuda::STORE_VOLATILE
}
 Enumeration of cache modifiers for memory load operations. More...
 

Functions

cudaDeviceProp cugar::cuda::get_device_properties ()
 
void cugar::cuda::device_arch (uint32 &major, uint32 &minor)
 
uint32 cugar::cuda::max_grid_size ()
 
size_t cugar::cuda::multiprocessor_count ()
 
size_t cugar::cuda::smem_allocation_unit (const cudaDeviceProp &properties)
 
size_t cugar::cuda::reg_allocation_unit (const cudaDeviceProp &properties, const size_t regsPerThread)
 
size_t cugar::cuda::warp_allocation_multiple (const cudaDeviceProp &properties)
 
size_t cugar::cuda::num_sides_per_multiprocessor (const cudaDeviceProp &properties)
 
size_t cugar::cuda::max_blocks_per_multiprocessor (const cudaDeviceProp &properties)
 
size_t cugar::cuda::num_regs_per_block (const cudaDeviceProp &properties, const cudaFuncAttributes &attributes, const size_t CTA_SIZE)
 
template<typename KernelFunction >
cudaFuncAttributes cugar::cuda::function_attributes (KernelFunction kernel)
 
template<typename KernelFunction >
size_t cugar::cuda::max_active_blocks_per_multiprocessor (KernelFunction kernel, const size_t CTA_SIZE, const size_t dynamic_smem_bytes)
 
template<typename KernelFunction >
size_t cugar::cuda::max_active_blocks (KernelFunction kernel, const size_t CTA_SIZE, const size_t dynamic_smem_bytes)
 
template<typename KernelFunction >
size_t cugar::cuda::num_registers (KernelFunction kernel)
 
template<typename KernelFunction >
size_t cugar::cuda::max_blocksize_with_highest_occupancy (KernelFunction kernel, size_t dynamic_smem_bytes_per_thread)
 
bool cugar::cuda::is_tcc_enabled ()
 
void cugar::cuda::check_error (const char *message)
 
void cugar::cuda::check_error (cudaError_t, const char *message)
 
template<uint32 N>
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE void cugar::cuda::syncthreads ()
 
template<CacheLoadModifier LOAD_MOD, typename T >
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE T cugar::cuda::load (const T *ptr)
 
template<CacheStoreModifier STORE_MOD, typename T >
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE void cugar::cuda::store (T *ptr, const T &value)
 
template<CacheLoadModifier MOD, typename T >
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE load_pointer< T, MOD > cugar::cuda::make_load_pointer (const T *it)
 
template<CacheStoreModifier STORE_MOD, CacheLoadModifier LOAD_MOD, typename T >
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE store_pointer< T, STORE_MOD, LOAD_MOD > cugar::cuda::make_store_pointer (const T *it)
 
 cugar::cuda::Timer::Timer ()
 
 cugar::cuda::Timer::~Timer ()
 
void cugar::cuda::Timer::start ()
 
void cugar::cuda::Timer::stop ()
 
float cugar::cuda::Timer::seconds () const
 

Enumeration Type Documentation

◆ CacheLoadModifier

Enumeration of cache modifiers for memory load operations.

Enumerator
LOAD_DEFAULT 

Default (no modifier)

LOAD_CA 

Cache at all levels.

LOAD_CG 

Cache at global level.

LOAD_CS 

Cache streaming (likely to be accessed once)

LOAD_CV 

Cache as volatile (including cached system lines)

LOAD_LDG 

Cache as texture.

LOAD_VOLATILE 

Volatile (any memory space)

◆ CacheStoreModifier

Enumeration of cache modifiers for memory load operations.

Enumerator
STORE_DEFAULT 

Default (no modifier)

STORE_WB 

Cache write-back all coherent levels.

STORE_CG 

Cache at global level.

STORE_CS 

Cache streaming (likely to be accessed once)

STORE_WT 

Cache write-through (to system memory)

STORE_VOLATILE 

Volatile shared (any memory space)

Function Documentation

◆ load()

template<CacheLoadModifier LOAD_MOD, typename T >
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE T cugar::cuda::load ( const T *  ptr)

issue a load

◆ make_load_pointer()

template<CacheLoadModifier MOD, typename T >
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE load_pointer<T,MOD> cugar::cuda::make_load_pointer ( const T *  it)

make a load_pointer

◆ make_store_pointer()

template<CacheStoreModifier STORE_MOD, CacheLoadModifier LOAD_MOD, typename T >
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE store_pointer<T,STORE_MOD,LOAD_MOD> cugar::cuda::make_store_pointer ( const T *  it)

make a store_pointer

◆ seconds()

float cugar::cuda::Timer::seconds ( ) const
inline

elapsed seconds

◆ start()

void cugar::cuda::Timer::start ( )
inline

start timing

◆ stop()

void cugar::cuda::Timer::stop ( )
inline

stop timing

◆ store()

template<CacheStoreModifier STORE_MOD, typename T >
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE void cugar::cuda::store ( T *  ptr,
const T &  value 
)

issue a store

◆ syncthreads()

template<uint32 N>
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE void cugar::cuda::syncthreads ( )

a generic syncthreads() implementation to synchronize contiguous blocks of N threads at a time

◆ Timer()

cugar::cuda::Timer::Timer ( )
inline

constructor

◆ ~Timer()

cugar::cuda::Timer::~Timer ( )
inline

destructor