NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
syncblocks_inl.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
30 #include <nvbio/basic/types.h>
31 #include <nvbio/basic/numbers.h>
32 #include <thrust/device_vector.h>
33 #include <thrust/iterator/constant_iterator.h>
34 
35 namespace nvbio {
36 namespace cuda {
37 
38 // constructor
39 //
41 syncblocks::syncblocks(int32* counter) : m_counter( counter ) {}
42 
43 // implements an inter-CTA synchronization primitive
44 //
46 bool syncblocks::enact(const uint32 max_iter)
47 {
48  __threadfence();
49  __syncthreads();
50 
51  // each block does an atomicAdd on an integer, waiting for all CTAs to be
52  // counted. When this happens, a global semaphore is released.
53  // The CTA counter is always increased across multiple calls to syncblocks,
54  // so that its value will say which syncblocks each CTA is participating
55  // too.
56  // Similarly, the semaphore is always increasing. As soon as the semaphore
57  // is higher than the syncblocks a CTA has just entered, the semaphore is
58  // considered 'released' for that syncblocks.
59  __shared__ volatile bool ret;
60  if (threadIdx.x == 0)
61  {
62  const uint32 grid_size = gridDim.x * gridDim.y * gridDim.z;
63 
64  int32* semaphore = (m_counter + 1);
65 
66  // add 1 atomically to the shared counter
67  const uint32 slot = atomicAdd( m_counter, 1 );
68 
69  // compute which syncblocks we are particpating too based on the result we got from the atomicAdd
70  const uint32 iteration = slot / grid_size;
71 
72  const bool is_last_block = (slot - iteration*grid_size) == (grid_size-1);
73  if (is_last_block)
74  {
75  // release the semaphore
76  atomicAdd( semaphore, 1 );
77  }
78 
79  // wait for the semaphore write to become public
80  __threadfence();
81 
82  // spin until the semaphore is released
83  for (uint32 iter = 0; iter < max_iter && *(volatile int32*)semaphore <= iteration; ++iter) {}
84 
85  ret = (*(volatile int32*)semaphore > iteration);
86  }
87 
88  // synchronize all threads in this CTA
89  __syncthreads();
90  return ret;
91 }
92 
93 // constructor
94 //
96 {
97  // alloc a counter and a semaphore
98  m_counter.resize( 2, 0 );
99 }
100 
101 // return a syncblocks object
102 //
104 {
105  return syncblocks( thrust::raw_pointer_cast( &m_counter.front() ) );
106 }
107 
108 // clear the syncblocks, useful if one wants to reuse it
109 // across differently sized kernel launches.
110 //
112 {
113  thrust::fill( m_counter.begin(), m_counter.end(), 0 );
114 }
115 
116 } // namespace cuda
117 } // namespace nvbio