NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
arch.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
30 #include <nvbio/basic/types.h>
31 #include <nvbio/basic/numbers.h>
32 #include <nvbio/basic/console.h>
33 #include <nvbio/basic/exceptions.h>
34 #include <cuda_runtime.h>
35 #include <thrust/version.h>
36 
37 // used for thrust_copy_dtoh only
38 #include <thrust/device_vector.h>
39 #include <thrust/host_vector.h>
40 
41 namespace nvbio {
42 namespace cuda {
43 
44 struct Arch
45 {
46  static const uint32 LOG_WARP_SIZE = 5;
47  static const uint32 WARP_SIZE = 1u << LOG_WARP_SIZE;
48 };
49 
50 // granularity of shared memory allocation (for the current device)
51 inline void device_arch(uint32& major, uint32& minor);
52 
53 // granularity of the maximum grid size (for the current device)
54 inline uint32 max_grid_size();
55 
56 // number of multiprocessors (for the current device)
57 inline size_t multiprocessor_count();
58 
59 // granularity of shared memory allocation
60 inline size_t smem_allocation_unit(const cudaDeviceProp& properties);
61 
62 // granularity of register allocation
63 inline size_t reg_allocation_unit(const cudaDeviceProp& properties, const size_t regsPerThread);
64 
65 // granularity of warp allocation
66 inline size_t warp_allocation_multiple(const cudaDeviceProp& properties);
67 
68 // number of "sides" into which the multiprocessor is partitioned
69 inline size_t num_sides_per_multiprocessor(const cudaDeviceProp& properties);
70 
71 // maximum number of blocks per multiprocessor
72 inline size_t max_blocks_per_multiprocessor(const cudaDeviceProp& properties);
73 
74 // number of registers allocated per block
75 inline size_t num_regs_per_block(const cudaDeviceProp& properties, const cudaFuncAttributes& attributes, const size_t CTA_SIZE);
76 
77 template <typename KernelFunction>
78 inline cudaFuncAttributes function_attributes(KernelFunction kernel);
79 
80 template <typename KernelFunction>
81 size_t max_active_blocks_per_multiprocessor(KernelFunction kernel, const size_t CTA_SIZE, const size_t dynamic_smem_bytes);
82 
83 template <typename KernelFunction>
84 size_t max_active_blocks(KernelFunction kernel, const size_t CTA_SIZE, const size_t dynamic_smem_bytes);
85 
86 template <typename KernelFunction>
87 size_t num_registers(KernelFunction kernel);
88 
89 template <typename KernelFunction>
90 size_t max_blocksize_with_highest_occupancy(KernelFunction kernel, size_t dynamic_smem_bytes_per_thread);
91 
92 inline bool is_tcc_enabled();
93 
94 inline void check_error(const char *message);
95 
99 template <uint32 N>
101 void syncthreads();
102 
103 } // namespace cuda
104 } // namespace nvbio
105