Fermat
threads.h
1 /*
2  * cugar
3  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
30 #include <cugar/basic/types.h>
31 #include <cugar/basic/numbers.h>
32 #include <cugar/basic/atomics.h>
33 #include <cugar/basic/shared_pointer.h>
34 #include <queue>
35 
36 namespace cugar {
37 
48 
51 
55 
56 CUGAR_API uint32 num_physical_cores();
57 CUGAR_API uint32 num_logical_cores();
58 
59 struct ThreadBaseImpl;
60 struct MutexImpl;
61 
63 {
64 public:
65  CUGAR_API ThreadBase();
66  CUGAR_API ~ThreadBase();
67 
68  void set_id(const uint32 id) { m_id = id; }
69  uint32 get_id() const { return m_id; }
70 
72  CUGAR_API void create(void* (*func)(void*), void* arg);
73 
75  CUGAR_API void join();
76 
77 private:
78  uint32 m_id;
80 };
81 
102 template <typename DerivedThreadType>
103 class Thread : public ThreadBase
104 {
105 public:
107  void create() { ThreadBase::create( DerivedThreadType::execute, static_cast<DerivedThreadType*>(this) ); }
108 
110  void join() { ThreadBase::join(); }
111 
112 private:
114  static void* execute(void* arg)
115  {
116  DerivedThreadType* data = reinterpret_cast<DerivedThreadType*>( arg );
117  data->run();
118  return NULL;
119  }
120 };
121 
122 //template class CUGAR_API internals::SharedCount<AtomicInt32>;
123 //template class CUGAR_API SharedPointer<MutexImpl, AtomicInt32>;
124 
145 class CUGAR_API Mutex
146 {
147 public:
148  Mutex();
149  ~Mutex();
150 
151  void lock();
152  void unlock();
153 
154 private:
156 };
157 
182 {
183 public:
184  ScopedLock(Mutex* mutex) : m_mutex( mutex ) { m_mutex->lock(); }
185  ~ScopedLock() { m_mutex->unlock(); }
186 
187 private:
188  Mutex* m_mutex;
189 };
190 
199 template <typename WorkItemT, typename ProgressCallbackT>
201 {
202 public:
203  typedef WorkItemT WorkItem;
204  typedef ProgressCallbackT ProgressCallback;
205 
207  WorkQueue() : m_callback(), m_size(0u) {}
208 
210  void push(const WorkItem work) { m_queue.push( work ); m_size++; }
211 
213  void locked_push(const WorkItem work)
214  {
215  ScopedLock block( &m_lock );
216  m_queue.push( work ); m_size++;
217  }
218 
220  bool pop(WorkItem& work)
221  {
222  ScopedLock block( &m_lock );
223  if (m_queue.empty())
224  return false;
225 
226  work = m_queue.front();
227  m_queue.pop();
228 
229  m_callback( m_size - (uint32)m_queue.size() - 1u, m_size );
230  return true;
231  }
232 
234  void set_callback(const ProgressCallback callback) { m_callback = callback; }
235 
236 private:
237  ProgressCallback m_callback;
238  std::queue<WorkItem> m_queue;
239  Mutex m_lock;
240  uint32 m_size;
241 };
242 
244 inline uint32 balance_batch_size(uint32 batch_size, uint32 total_count, uint32 thread_count)
245 {
246  // How many batches we'd get with the proposed batch_size
247  const uint32 batch_count = divide_ri(total_count, batch_size);
248  // How many rounds we'd need for those batches
249  const uint32 rounds = divide_ri(batch_count, thread_count);
250  // Might as well assume all threads should work, and see how many batches
251  // they would consume
252  const uint32 bal_batches = rounds * thread_count;
253  // So that the batch size that will attain it, is computed as follows
254  return divide_ri(total_count, bal_batches);
255 }
256 
257 CUGAR_API void yield();
258 
261 
262 } // namespace cugar
Definition: threads.h:62
void locked_push(const WorkItem work)
push a work item in the queue
Definition: threads.h:213
void set_callback(const ProgressCallback callback)
set a callback
Definition: threads.h:234
CUGAR_API void join()
join the thread
Definition: threads.cpp:286
void push(const WorkItem work)
push a work item in the queue
Definition: threads.h:210
bool pop(WorkItem &work)
pop the next work item from the queue
Definition: threads.h:220
Definition: shared_pointer.h:345
Definition: threads.h:181
uint32 balance_batch_size(uint32 batch_size, uint32 total_count, uint32 thread_count)
return a number close to batch_size that achieves best threading balance
Definition: threads.h:244
CUGAR_HOST_DEVICE L divide_ri(const L x, const R y)
Definition: numbers.h:180
Definition: threads.h:200
Definition: threads.h:145
WorkQueue()
empty constructor
Definition: threads.h:207
Define a vector_view POD type and plain_view() for std::vector.
Definition: diff.h:38
void join()
join the thread
Definition: threads.h:110
void create()
create the thread
Definition: threads.h:107
CUGAR_API void create(void *(*func)(void *), void *arg)
create the thread
Definition: threads.cpp:281
Definition: threads.h:103