Fermat
arch.h
1 /*
2  * cugar
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
30 #include <cugar/basic/types.h>
31 #include <cugar/basic/numbers.h>
32 #include <cugar/basic/exceptions.h>
33 #include <cugar/basic/threads.h>
34 #include <cuda_runtime.h>
35 #include <thrust/version.h>
36 
37 // used for thrust_copy_dtoh only
38 #include <thrust/device_vector.h>
39 #include <thrust/host_vector.h>
40 
41 namespace cugar {
42 namespace cuda {
43 
46 
50 
51 struct CUGAR_API arch
52 {
53  static const uint32 LOG_WARP_SIZE = 5;
54  static const uint32 WARP_SIZE = 1u << LOG_WARP_SIZE;
55 };
56 
59 struct CUGAR_API cuda_devices
60 {
62  cudaDeviceProp* properties;
63 
66  static cuda_devices* get();
67 
68 private:
69  // private constructor
70  cuda_devices();
71 
72  static Mutex s_mutex; // internal mutex
73  static cuda_devices* volatile s_cuda_devices; // internal pointer to the singleton object
74 };
75 
76 // get device properties (for the current device)
77 inline cudaDeviceProp get_device_properties();
78 
79 // granularity of shared memory allocation (for the current device)
80 inline void device_arch(uint32& major, uint32& minor);
81 
82 // granularity of the maximum grid size (for the current device)
83 inline uint32 max_grid_size();
84 
85 // number of multiprocessors (for the current device)
86 inline size_t multiprocessor_count();
87 
88 // granularity of shared memory allocation
89 inline size_t smem_allocation_unit(const cudaDeviceProp& properties);
90 
91 // granularity of register allocation
92 inline size_t reg_allocation_unit(const cudaDeviceProp& properties, const size_t regsPerThread);
93 
94 // granularity of warp allocation
95 inline size_t warp_allocation_multiple(const cudaDeviceProp& properties);
96 
97 // number of "sides" into which the multiprocessor is partitioned
98 inline size_t num_sides_per_multiprocessor(const cudaDeviceProp& properties);
99 
100 // maximum number of blocks per multiprocessor
101 inline size_t max_blocks_per_multiprocessor(const cudaDeviceProp& properties);
102 
103 // number of registers allocated per block
104 inline size_t num_regs_per_block(const cudaDeviceProp& properties, const cudaFuncAttributes& attributes, const size_t CTA_SIZE);
105 
106 template <typename KernelFunction>
107 inline cudaFuncAttributes function_attributes(KernelFunction kernel);
108 
109 template <typename KernelFunction>
110 size_t max_active_blocks_per_multiprocessor(KernelFunction kernel, const size_t CTA_SIZE, const size_t dynamic_smem_bytes);
111 
112 template <typename KernelFunction>
113 size_t max_active_blocks(KernelFunction kernel, const size_t CTA_SIZE, const size_t dynamic_smem_bytes);
114 
115 template <typename KernelFunction>
116 size_t num_registers(KernelFunction kernel);
117 
118 template <typename KernelFunction>
119 size_t max_blocksize_with_highest_occupancy(KernelFunction kernel, size_t dynamic_smem_bytes_per_thread);
120 
121 inline bool is_tcc_enabled();
122 
123 inline void check_error(const char *message);
124 inline void check_error(cudaError_t, const char *message);
125 
129 template <uint32 N>
130 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
131 void syncthreads();
132 
135 
136 } // namespace cuda
137 } // namespace cugar
138 
139 #include <cugar/basic/cuda/arch_inl.h>
int device_count
device count
Definition: arch.h:61
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE void syncthreads()
Definition: arch_inl.h:344
Definition: threads.h:145
Definition: arch.h:51
Define a vector_view POD type and plain_view() for std::vector.
Definition: diff.h:38
cudaDeviceProp * properties
device properties
Definition: arch.h:62
Definition: arch.h:59