Fermat
primitives.h
1 /*
2  * CUGAR : Cuda Graphics Accelerator
3  *
4  * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #pragma once
30 
31 #include <cugar/basic/types.h>
32 #include <cugar/basic/numbers.h>
33 #include <cugar/basic/functors.h>
34 #include <cugar/basic/vector.h>
35 #include <cugar/basic/algorithms.h>
36 #include <cugar/basic/cuda/arch.h>
37 #include <cugar/basic/cuda/timer.h>
38 #include <thrust/reduce.h>
39 #include <thrust/scan.h>
40 #include <thrust/copy.h>
41 #include <thrust/sort.h>
42 #include <thrust/binary_search.h>
43 #include <thrust/merge.h>
44 #include <thrust/iterator/constant_iterator.h>
45 
46 #if defined(__CUDACC__)
47 #include <cugar/basic/cuda/primitives.h>
48 #include <cugar/basic/cuda/sort.h>
49 #endif
50 
51 #if defined (_OPENMP)
52 #include <omp.h>
53 #endif
54 
83 
84 namespace cugar {
85 
88 
95 
98 template <typename system_tag, typename PredicateIterator>
99 bool any(
100  const uint32 n,
101  const PredicateIterator pred);
102 
105 template <typename system_tag, typename PredicateIterator>
106 bool all(
107  const uint32 n,
108  const PredicateIterator pred);
109 
112 template <typename system_tag, typename Iterator>
113 bool is_sorted(
114  const uint32 n,
115  const Iterator values);
116 
120 template <typename system_tag, typename Iterator, typename Headflags>
121 bool is_segment_sorted(
122  const uint32 n,
123  const Iterator values,
124  const Headflags flags);
125 
126 
129 template <typename system_tag, typename Iterator, typename Functor>
130 void for_each(
131  const uint64 n,
132  const Iterator in,
133  Functor functor);
134 
137 template <typename system_tag, typename Iterator, typename Output, typename Functor>
138 void transform(
139  const uint32 n,
140  const Iterator in,
141  const Output out,
142  const Functor functor);
143 
146 template <typename system_tag, typename Iterator1, typename Iterator2, typename Output, typename Functor>
147 void transform(
148  const uint32 n,
149  const Iterator1 in1,
150  const Iterator2 in2,
151  const Output out,
152  const Functor functor);
153 
161 template <typename system_tag, typename InputIterator, typename BinaryOp>
162 typename std::iterator_traits<InputIterator>::value_type reduce(
163  const uint32 n,
164  InputIterator in,
165  BinaryOp op,
166  cugar::vector<system_tag,uint8>& temp_storage);
167 
176 template <typename system_tag, typename InputIterator, typename OutputIterator, typename BinaryOp>
177 void inclusive_scan(
178  const uint32 n,
179  InputIterator in,
180  OutputIterator out,
181  BinaryOp op,
182  cugar::vector<system_tag,uint8>& temp_storage);
183 
193 template <typename system_tag, typename InputIterator, typename OutputIterator, typename BinaryOp, typename Identity>
194 void exclusive_scan(
195  const uint32 n,
196  InputIterator in,
197  OutputIterator out,
198  BinaryOp op,
199  Identity identity,
200  cugar::vector<system_tag,uint8>& temp_storage);
201 
212 template <typename system_tag, typename InputIterator, typename FlagsIterator, typename OutputIterator>
213 uint32 copy_flagged(
214  const uint32 n,
215  InputIterator in,
216  FlagsIterator flags,
217  OutputIterator out,
218  cugar::vector<system_tag,uint8>& temp_storage);
219 
230 template <typename system_tag, typename InputIterator, typename OutputIterator, typename Predicate>
231 uint32 copy_if(
232  const uint32 n,
233  InputIterator in,
234  OutputIterator out,
235  const Predicate pred,
236  cugar::vector<system_tag,uint8>& temp_storage);
237 
248 template <typename system_tag, typename InputIterator, typename OutputIterator, typename CountIterator>
249 uint32 runlength_encode(
250  const uint32 n,
251  InputIterator in,
252  OutputIterator out,
253  CountIterator counts,
254  cugar::vector<system_tag,uint8>& temp_storage);
255 
268 template <typename system_tag, typename KeyIterator, typename ValueIterator, typename OutputKeyIterator, typename OutputValueIterator, typename ReductionOp>
269 uint32 reduce_by_key(
270  const uint32 n,
271  KeyIterator keys_in,
272  ValueIterator values_in,
273  OutputKeyIterator keys_out,
274  OutputValueIterator values_out,
275  ReductionOp reduction_op,
276  cugar::vector<system_tag,uint8>& temp_storage);
277 
286 template <typename system_tag, typename KeyIterator, typename ValueIterator, typename OutputIterator>
287 void lower_bound(
288  const uint32 n,
289  ValueIterator values,
290  const uint32 n_keys,
291  KeyIterator keys,
292  OutputIterator indices);
293 
302 template <typename system_tag, typename KeyIterator, typename ValueIterator, typename OutputIterator>
303 void upper_bound(
304  const uint32 n,
305  ValueIterator values,
306  const uint32 n_keys,
307  KeyIterator keys,
308  OutputIterator indices);
309 
316 template <typename system_tag, typename KeyIterator>
317 void radix_sort(
318  const uint32 n,
319  KeyIterator keys,
320  cugar::vector<system_tag,uint8>& temp_storage);
321 
329 template <typename system_tag, typename KeyIterator, typename ValueIterator>
330 void radix_sort(
331  const uint32 n,
332  KeyIterator keys,
333  ValueIterator values,
334  cugar::vector<system_tag,uint8>& temp_storage);
335 
348 template <
349  typename system_tag,
350  typename key_iterator1,
351  typename key_iterator2,
352  typename value_iterator1,
353  typename value_iterator2,
354  typename key_output,
355  typename value_output>
356 void merge_by_key(
357  const uint32 A_len,
358  const uint32 B_len,
359  const key_iterator1 A_keys,
360  const key_iterator2 B_keys,
361  const value_iterator1 A_values,
362  const value_iterator2 B_values,
363  key_output C_keys,
364  value_output C_values,
365  cugar::vector<system_tag,uint8>& temp_storage);
366 
370 template <typename system_tag>
372 {
375  template <typename Iterator, typename Functor>
377  const uint64 n,
378  const Iterator in,
379  Functor functor)
380  {
381  for_each<system_tag>( n, in, functor );
382  }
383 
386  template <typename Functor>
388  const uint64 n,
389  Functor functor)
390  {
391  for_each<system_tag>( n, thrust::make_counting_iterator<uint64>(0), functor );
392  }
393 };
394 
398 template <>
400 {
404  m_blocks_lo( 0 ),
405  m_blocks_hi( 0 ),
406  m_speed_lo( 0.0f ),
407  m_speed_hi( 0.0f ) {}
408 
411  template <typename Iterator, typename Functor>
412  void operator () (
413  const uint64 n,
414  const Iterator in,
415  Functor functor);
416 
419  template <typename Functor>
421  const uint64 n,
422  Functor functor)
423  {
424  this->operator()( n, thrust::make_counting_iterator<uint64>(0), functor );
425  }
426 
427  private:
428 
431  template <typename KernelFunction>
432  uint32 suggested_blocks(KernelFunction kernel, const uint32 cta_size) const;
433 
436  void update(const uint32 n_blocks, const float speed);
437 
438  uint32 m_blocks_lo;
439  uint32 m_blocks_hi;
440  float m_speed_lo;
441  float m_speed_hi;
442 };
443 
446 
447 } // namespace cugar
448 
449 #include <cugar/basic/primitives_inl.h>
void radix_sort(const uint32 n, KeyIterator keys, cugar::vector< system_tag, uint8 > &temp_storage)
Definition: primitives_inl.h:1194
void transform(const uint32 n, const Iterator in, const Output out, const Functor functor)
Definition: primitives_inl.h:357
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE void merge_by_key(key_iterator1 first1, key_iterator1 end1, key_iterator2 first2, key_iterator2 end2, value_iterator1 values1, value_iterator2 values2, key_iterator output_keys, value_iterator output_values)
Definition: algorithms.h:273
bool any(const uint32 n, const PredicateIterator pred)
Definition: primitives_inl.h:91
bool is_sorted(const uint32 n, const Iterator values)
Definition: primitives_inl.h:234
Define CUDA based sort primitives.
Defines some general purpose functors.
uint32 runlength_encode(const uint32 n, InputIterator in, OutputIterator out, CountIterator counts, cugar::vector< system_tag, uint8 > &temp_storage)
Definition: primitives_inl.h:859
bool all(const uint32 n, const PredicateIterator pred)
Definition: primitives_inl.h:101
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE Iterator upper_bound(const Value x, Iterator begin, const index_type n)
Definition: algorithms.h:138
Defines some general purpose algorithms.
Definition: types.h:185
bool is_segment_sorted(const uint32 n, const Iterator values, const Headflags flags)
Definition: primitives_inl.h:245
Definition: vector.h:117
void exclusive_scan(const uint32 n, InputIterator in, OutputIterator out, BinaryOp op, Identity identity, cugar::vector< system_tag, uint8 > &temp_storage)
Definition: primitives_inl.h:569
uint32 reduce_by_key(const uint32 n, KeyIterator keys_in, ValueIterator values_in, OutputKeyIterator keys_out, OutputValueIterator values_out, ReductionOp reduction_op, cugar::vector< system_tag, uint8 > &temp_storage)
Definition: primitives_inl.h:882
Define a vector_view POD type and plain_view() for std::vector.
Definition: diff.h:38
Definition: primitives.h:371
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE Iterator lower_bound(const Value x, Iterator begin, const index_type n)
Definition: algorithms.h:99
uint32 copy_if(const uint32 n, InputIterator in, OutputIterator out, const Predicate pred, cugar::vector< system_tag, uint8 > &temp_storage)
Definition: primitives_inl.h:838
std::iterator_traits< InputIterator >::value_type reduce(const uint32 n, InputIterator in, BinaryOp op, cugar::vector< system_tag, uint8 > &temp_storage)
Definition: primitives_inl.h:520
void inclusive_scan(const uint32 n, InputIterator in, OutputIterator out, BinaryOp op, cugar::vector< system_tag, uint8 > &temp_storage)
Definition: primitives_inl.h:543
for_each_enactor()
Definition: primitives.h:403
void for_each(const uint64 n, const Iterator in, Functor functor)
Definition: primitives_inl.h:284
uint32 copy_flagged(const uint32 n, InputIterator in, FlagsIterator flags, OutputIterator out, cugar::vector< system_tag, uint8 > &temp_storage)
Definition: primitives_inl.h:817
void operator()(const uint64 n, const Iterator in, Functor functor)
Definition: primitives.h:376