30 #include <cugar/basic/types.h> 31 #include <cugar/basic/numbers.h> 32 #include <cugar/basic/exceptions.h> 33 #include <cugar/basic/threads.h> 34 #include <cuda_runtime.h> 35 #include <thrust/version.h> 38 #include <thrust/device_vector.h> 39 #include <thrust/host_vector.h> 53 static const uint32 LOG_WARP_SIZE = 5;
54 static const uint32 WARP_SIZE = 1u << LOG_WARP_SIZE;
77 inline cudaDeviceProp get_device_properties();
80 inline void device_arch(uint32& major, uint32& minor);
83 inline uint32 max_grid_size();
86 inline size_t multiprocessor_count();
89 inline size_t smem_allocation_unit(
const cudaDeviceProp& properties);
92 inline size_t reg_allocation_unit(
const cudaDeviceProp& properties,
const size_t regsPerThread);
95 inline size_t warp_allocation_multiple(
const cudaDeviceProp& properties);
98 inline size_t num_sides_per_multiprocessor(
const cudaDeviceProp& properties);
101 inline size_t max_blocks_per_multiprocessor(
const cudaDeviceProp& properties);
104 inline size_t num_regs_per_block(
const cudaDeviceProp& properties,
const cudaFuncAttributes& attributes,
const size_t CTA_SIZE);
106 template <
typename KernelFunction>
107 inline cudaFuncAttributes function_attributes(KernelFunction kernel);
109 template <
typename KernelFunction>
110 size_t max_active_blocks_per_multiprocessor(KernelFunction kernel,
const size_t CTA_SIZE,
const size_t dynamic_smem_bytes);
112 template <
typename KernelFunction>
113 size_t max_active_blocks(KernelFunction kernel,
const size_t CTA_SIZE,
const size_t dynamic_smem_bytes);
115 template <
typename KernelFunction>
116 size_t num_registers(KernelFunction kernel);
118 template <
typename KernelFunction>
119 size_t max_blocksize_with_highest_occupancy(KernelFunction kernel,
size_t dynamic_smem_bytes_per_thread);
121 inline bool is_tcc_enabled();
123 inline void check_error(
const char *message);
124 inline void check_error(cudaError_t,
const char *message);
130 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
139 #include <cugar/basic/cuda/arch_inl.h> int device_count
device count
Definition: arch.h:61
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE void syncthreads()
Definition: arch_inl.h:344
Definition: threads.h:145
Define a vector_view POD type and plain_view() for std::vector.
Definition: diff.h:38
cudaDeviceProp * properties
device properties
Definition: arch.h:62