33 #include <cugar/basic/types.h> 34 #include <vector_types.h> 35 #include <vector_functions.h> 36 #include <cuda_fp16.h> 44 #define M_PI 3.14159265358979323846 48 #define M_PIf 3.14159265358979323846f 52 #define M_TWO_PI 6.28318530717958647693 56 #define M_TWO_PIf 6.28318530717958647693f 59 CUGAR_HOST_DEVICE
inline bool is_finite(
const double x)
61 #if defined(CUGAR_DEVICE_COMPILATION) 62 return isfinite(x) != 0;
64 return _finite(x) != 0;
68 inline bool is_nan(
const double x)
70 #if defined(CUGAR_DEVICE_COMPILATION) 73 return _isnan(x) != 0;
76 CUGAR_HOST_DEVICE
inline bool is_finite(
const float x)
78 #if defined(CUGAR_DEVICE_COMPILATION) 79 return isfinite(x) != 0;
81 return _finite(x) != 0;
84 CUGAR_HOST_DEVICE
inline bool is_nan(
const float x)
86 #if defined(CUGAR_DEVICE_COMPILATION) 89 return _isnan(x) != 0;
96 #define M_PI 3.14159265358979323846 100 #define M_TWO_PI 6.28318530717958647693 104 #define M_PIf 3.14159265358979323846f 108 #define M_TWO_PIf 6.28318530717958647693f 115 CUGAR_FORCEINLINE __device__ uint32 warp_tid() {
return threadIdx.x & 31; }
116 CUGAR_FORCEINLINE __device__ uint32 warp_id() {
return threadIdx.x >> 5; }
120 CUGAR_HOST_DEVICE CUGAR_FORCEINLINE
123 CUGAR_HOST_DEVICE CUGAR_FORCEINLINE
124 double double_infinity() {
return cugar::binary_cast<
double>(0x7ff0000000000000ULL ); }
147 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
154 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
160 template <
typename Iterator,
typename T>
161 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
165 for (uint32 i = 0; i < size; ++i)
169 if (++occ >= max_occ)
179 template<
typename L,
typename R>
180 inline CUGAR_HOST_DEVICE L
divide_ri(
const L x,
const R y)
182 return L( (x + (y - 1)) / y );
188 template<
typename L,
typename R>
189 inline CUGAR_HOST_DEVICE L
divide_rz(
const L x,
const R y)
197 template<
typename L,
typename R>
198 inline CUGAR_HOST_DEVICE L
round_i(
const L x,
const R y){
return L( y *
divide_ri(x, y) ); }
203 template<
typename L,
typename R>
204 inline CUGAR_HOST_DEVICE L
round_z(
const L x,
const R y){
return L( y *
divide_rz(x, y) ); }
209 template<
typename L,
typename R>
210 inline CUGAR_HOST_DEVICE L
round(
const L x,
const R y)
213 return R((x - r)*2) > y ? r+L(1) : r;
218 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE uint8
comp(
const uchar2 a,
const char c)
220 return (c == 0 ? a.x : a.y);
224 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
char comp(
const char2 a,
const char c)
226 return (c == 0 ? a.x : a.y);
231 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE uint8
comp(
const uchar4 a,
const char c)
234 (c == 0 ? a.x : a.y) :
235 (c == 2 ? a.z : a.w);
239 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
char comp(
const char4 a,
const char c)
242 (c == 0 ? a.x : a.y) :
243 (c == 2 ? a.z : a.w);
248 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE uint32
comp(
const uint2 a,
const uint32 c)
250 return (c == 0 ? a.x : a.y);
254 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
void set(uint2& a,
const uint32 c,
const uint32 v)
261 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE uint64
comp(
const ulonglong2 a,
const uint32 c)
263 return (c == 0 ? a.x : a.y);
267 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
void set(ulonglong2& a,
const uint32 c,
const uint64 v)
274 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE int32
comp(
const int2 a,
const uint32 c)
276 return (c == 0 ? a.x : a.y);
280 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE uint32
comp(
const uint4 a,
const uint32 c)
283 (c == 0 ? a.x : a.y) :
284 (c == 2 ? a.z : a.w);
288 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
void set(uint4& a,
const uint32 c,
const uint32 v)
291 else if (c == 1) a.y = v;
292 else if (c == 2) a.z = v;
297 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE int32
comp(
const int4 a,
const uint32 c)
300 (c == 0 ? a.x : a.y) :
301 (c == 2 ? a.z : a.w);
306 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE uint16
comp(
const ushort4 a,
const uint32 c)
309 (c == 0 ? a.x : a.y) :
310 (c == 2 ? a.z : a.w);
315 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE uint64
comp(
const ulonglong4 a,
const uint32 c)
318 (c == 0 ? a.x : a.y) :
319 (c == 2 ? a.z : a.w);
323 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
void set(ulonglong4& a,
const uint32 c,
const uint64 v)
326 else if (c == 1) a.y = v;
327 else if (c == 2) a.z = v;
335 typedef uchar2 uint8_2;
336 typedef uchar3 uint8_3;
337 typedef uchar4 uint8_4;
339 typedef char2 int8_2;
340 typedef char3 int8_3;
341 typedef char4 int8_4;
343 typedef ushort2 uint16_2;
344 typedef ushort3 uint16_3;
345 typedef ushort4 uint16_4;
347 typedef short2 int16_2;
348 typedef short3 int16_3;
349 typedef short4 int16_4;
351 typedef uint2 uint32_2;
352 typedef uint3 uint32_3;
353 typedef uint4 uint32_4;
355 typedef int2 int32_2;
356 typedef int3 int32_3;
357 typedef int4 int32_4;
359 typedef ulonglong2 uint64_2;
360 typedef ulonglong3 uint64_3;
361 typedef ulonglong4 uint64_4;
363 typedef longlong2 int64_2;
364 typedef longlong3 int64_3;
365 typedef longlong4 int64_4;
367 template <
typename T, u
int32 DIM>
370 template <
typename T>
378 template <
typename T>
382 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
vector2_storage(T _x, T _y) : x(_x), y(_y) {}
386 template <
typename T>
390 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
vector3_storage(T _x, T _y, T _z) : x(_x), y(_y), z(_z) {}
394 template <
typename T>
398 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
vector4_storage(T _x, T _y, T _z, T _w) : x(_x), y(_y), z(_z), w(_w) {}
406 template <
typename T>
struct vector_type<T,4> {
typedef vector4_storage<T> type;
static type make(
const T i1,
const T i2,
const T i3,
const T i4) {
return type(i1,i2,i3,4); } };
408 template <>
struct vector_type<char,1> {
typedef char type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const char i1) {
return i1; } };
409 template <>
struct vector_type<char,2> {
typedef char2 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const char i1,
const char i2) {
return make_char2(i1,i2); } };
410 template <>
struct vector_type<char,3> {
typedef char3 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const char i1,
const char i2,
const char i3) {
return make_char3(i1,i2,i3); } };
411 template <>
struct vector_type<char,4> {
typedef char4 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const char i1,
const char i2,
const char i3,
const char i4) {
return make_char4(i1,i2,i3,i4); } };
413 template <>
struct vector_type<unsigned char,1> {
typedef unsigned char type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const unsigned char i1) {
return i1; } };
414 template <>
struct vector_type<unsigned char,2> {
typedef uchar2 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const unsigned char i1,
const unsigned char i2) {
return make_uchar2(i1,i2); } };
415 template <>
struct vector_type<unsigned char,3> {
typedef uchar3 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const unsigned char i1,
const unsigned char i2,
const unsigned char i3) {
return make_uchar3(i1,i2,i3); } };
416 template <>
struct vector_type<unsigned char,4> {
typedef uchar4 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const unsigned char i1,
const unsigned char i2,
const unsigned char i3,
const unsigned char i4) {
return make_uchar4(i1,i2,i3,i4); } };
418 template <>
struct vector_type<short,1> {
typedef short type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const short i1) {
return i1; } };
419 template <>
struct vector_type<short,2> {
typedef short2 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const short i1,
const short i2) {
return make_short2(i1,i2); } };
420 template <>
struct vector_type<short,3> {
typedef short3 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const short i1,
const short i2,
const short i3) {
return make_short3(i1,i2,i3); } };
421 template <>
struct vector_type<short,4> {
typedef short4 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const short i1,
const short i2,
const short i3,
const short i4) {
return make_short4(i1,i2,i3,i4); } };
423 template <>
struct vector_type<unsigned short,1> {
typedef unsigned short type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const unsigned short i1) {
return i1; } };
424 template <>
struct vector_type<unsigned short,2> {
typedef ushort2 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const unsigned short i1,
const unsigned short i2) {
return make_ushort2(i1,i2); } };
425 template <>
struct vector_type<unsigned short,3> {
typedef ushort3 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const unsigned short i1,
const unsigned short i2,
const unsigned short i3) {
return make_ushort3(i1,i2,i3); } };
426 template <>
struct vector_type<unsigned short,4> {
typedef ushort4 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const unsigned short i1,
const unsigned short i2,
const unsigned short i3,
const unsigned short i4) {
return make_ushort4(i1,i2,i3,i4); } };
428 template <>
struct vector_type<int,1> {
typedef int type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const int i1) {
return i1; } };
429 template <>
struct vector_type<int,2> {
typedef int2 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const int i1,
const int i2) {
return make_int2(i1,i2); } };
430 template <>
struct vector_type<int,3> {
typedef int3 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const int i1,
const int i2,
const int i3) {
return make_int3(i1,i2,i3); } };
431 template <>
struct vector_type<int,4> {
typedef int4 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const int i1,
const int i2,
const int i3,
const int i4) {
return make_int4(i1,i2,i3,i4); } };
433 template <>
struct vector_type<unsigned int,1> {
typedef unsigned int type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const unsigned int i1) {
return i1; } };
434 template <>
struct vector_type<unsigned int,2> {
typedef uint2 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const unsigned int i1,
const unsigned int i2) {
return make_uint2(i1,i2); } };
435 template <>
struct vector_type<unsigned int,3> {
typedef uint3 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const unsigned int i1,
const unsigned int i2,
const unsigned int i3) {
return make_uint3(i1,i2,i3); } };
436 template <>
struct vector_type<unsigned int,4> {
typedef uint4 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const unsigned int i1,
const unsigned int i2,
const unsigned int i3,
const unsigned int i4) {
return make_uint4(i1,i2,i3,i4); } };
438 template <>
struct vector_type<int64,1> {
typedef int64 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const int64 i1) {
return i1; } };
439 template <>
struct vector_type<int64,2> {
typedef int64_2 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const int64 i1,
const int64 i2) { int64_2 r; r.x = i1; r.y = i2;
return r; } };
440 template <>
struct vector_type<int64,3> {
typedef int64_3 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const int64 i1,
const int64 i2,
const int64 i3) { int64_3 r; r.x = i1; r.y = i2; r.z = i3;
return r; } };
441 template <>
struct vector_type<int64,4> {
typedef int64_4 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const int64 i1,
const int64 i2,
const int64 i3,
const int64 i4) { int64_4 r; r.x = i1; r.y = i2; r.z = i3, r.w = i4;
return r; } };
443 template <>
struct vector_type<uint64,1> {
typedef uint64 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const uint64 i1) {
return i1; } };
444 template <>
struct vector_type<uint64,2> {
typedef uint64_2 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const uint64 i1,
const uint64 i2) { uint64_2 r; r.x = i1; r.y = i2;
return r; } };
445 template <>
struct vector_type<uint64,3> {
typedef uint64_3 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const uint64 i1,
const uint64 i2,
const uint64 i3) { uint64_3 r; r.x = i1; r.y = i2; r.z = i3;
return r; } };
446 template <>
struct vector_type<uint64,4> {
typedef uint64_4 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const uint64 i1,
const uint64 i2,
const uint64 i3,
const uint64 i4) { uint64_4 r; r.x = i1; r.y = i2; r.z = i3, r.w = i4;
return r; } };
448 template <>
struct vector_type<float,1> {
typedef float type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const float i1) {
return i1; } };
449 template <>
struct vector_type<float,2> {
typedef float2 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const float i1,
const float i2) {
return make_float2(i1,i2); } };
450 template <>
struct vector_type<float,3> {
typedef float3 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const float i1,
const float i2,
const float i3) {
return make_float3(i1,i2,i3); } };
451 template <>
struct vector_type<float,4> {
typedef float4 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const float i1,
const float i2,
const float i3,
const float i4) {
return make_float4(i1,i2,i3,i4); } };
453 template <>
struct vector_type<double,1> {
typedef double type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const double i1) {
return i1; } };
454 template <>
struct vector_type<double,2> {
typedef double2 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const double i1,
const double i2) {
return make_double2(i1,i2); } };
455 template <>
struct vector_type<double,3> {
typedef double3 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const double i1,
const double i2,
const double i3) {
return make_double3(i1,i2,i3); } };
456 template <>
struct vector_type<double,4> {
typedef double4 type; CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
static type make(
const double i1,
const double i2,
const double i3,
const double i4) {
return make_double4(i1,i2,i3,i4); } };
464 template <>
struct vector_traits<char> {
typedef char value_type;
const static uint32 DIM = 1; };
465 template <>
struct vector_traits<unsigned char> {
typedef unsigned char value_type;
const static uint32 DIM = 1; };
466 template <>
struct vector_traits<short> {
typedef short value_type;
const static uint32 DIM = 1; };
467 template <>
struct vector_traits<unsigned short> {
typedef unsigned short value_type;
const static uint32 DIM = 1; };
468 template <>
struct vector_traits<int> {
typedef int value_type;
const static uint32 DIM = 1; };
469 template <>
struct vector_traits<unsigned int> {
typedef unsigned int value_type;
const static uint32 DIM = 1; };
470 template <>
struct vector_traits<int64> {
typedef int64 value_type;
const static uint32 DIM = 1; };
471 template <>
struct vector_traits<uint64> {
typedef uint64 value_type;
const static uint32 DIM = 1; };
472 template <>
struct vector_traits<float> {
typedef float value_type;
const static uint32 DIM = 1; };
473 template <>
struct vector_traits<double> {
typedef double value_type;
const static uint32 DIM = 1; };
474 template <>
struct vector_traits<char2> {
typedef char value_type;
const static uint32 DIM = 2; };
475 template <>
struct vector_traits<char3> {
typedef char value_type;
const static uint32 DIM = 3; };
476 template <>
struct vector_traits<char4> {
typedef char value_type;
const static uint32 DIM = 4; };
477 template <>
struct vector_traits<uchar2> {
typedef unsigned char value_type;
const static uint32 DIM = 2; };
478 template <>
struct vector_traits<uchar3> {
typedef unsigned char value_type;
const static uint32 DIM = 3; };
479 template <>
struct vector_traits<uchar4> {
typedef unsigned char value_type;
const static uint32 DIM = 4; };
480 template <>
struct vector_traits<short2> {
typedef short value_type;
const static uint32 DIM = 2; };
481 template <>
struct vector_traits<short3> {
typedef short value_type;
const static uint32 DIM = 3; };
482 template <>
struct vector_traits<short4> {
typedef short value_type;
const static uint32 DIM = 4; };
483 template <>
struct vector_traits<ushort2> {
typedef unsigned short value_type;
const static uint32 DIM = 2; };
484 template <>
struct vector_traits<ushort3> {
typedef unsigned short value_type;
const static uint32 DIM = 3; };
485 template <>
struct vector_traits<ushort4> {
typedef unsigned short value_type;
const static uint32 DIM = 4; };
486 template <>
struct vector_traits<int2> {
typedef int value_type;
const static uint32 DIM = 2; };
487 template <>
struct vector_traits<int3> {
typedef int value_type;
const static uint32 DIM = 3; };
488 template <>
struct vector_traits<int4> {
typedef int value_type;
const static uint32 DIM = 4; };
489 template <>
struct vector_traits<uint2> {
typedef unsigned int value_type;
const static uint32 DIM = 2; };
490 template <>
struct vector_traits<uint3> {
typedef unsigned int value_type;
const static uint32 DIM = 3; };
491 template <>
struct vector_traits<uint4> {
typedef unsigned int value_type;
const static uint32 DIM = 4; };
492 template <>
struct vector_traits<float2> {
typedef float value_type;
const static uint32 DIM = 2; };
493 template <>
struct vector_traits<float3> {
typedef float value_type;
const static uint32 DIM = 3; };
494 template <>
struct vector_traits<float4> {
typedef float value_type;
const static uint32 DIM = 4; };
495 template <>
struct vector_traits<double2> {
typedef float value_type;
const static uint32 DIM = 2; };
496 template <>
struct vector_traits<uint64_2> {
typedef uint64 value_type;
const static uint32 DIM = 2; };
497 template <>
struct vector_traits<uint64_3> {
typedef uint64 value_type;
const static uint32 DIM = 3; };
498 template <>
struct vector_traits<uint64_4> {
typedef uint64 value_type;
const static uint32 DIM = 4; };
499 template <>
struct vector_traits<int64_2> {
typedef int64 value_type;
const static uint32 DIM = 2; };
500 template <>
struct vector_traits<int64_3> {
typedef int64 value_type;
const static uint32 DIM = 3; };
501 template <>
struct vector_traits<int64_4> {
typedef int64 value_type;
const static uint32 DIM = 4; };
507 template <
typename T>
508 inline CUGAR_HOST_DEVICE T
sgn(
const T x) {
return x > 0 ? T(1) : T(-1); }
512 inline CUGAR_HOST_DEVICE
float round(
const float x)
514 const int y = x > 0.0f ? int(x) : int(x)-1;
515 return (x -
float(y) > 0.5f) ? float(y)+1.0f : float(y);
520 inline CUGAR_HOST_DEVICE int32
abs(
const int32 a) {
return a < 0 ? -a : a; }
524 inline CUGAR_HOST_DEVICE int64
abs(
const int64 a) {
return a < 0 ? -a : a; }
528 inline CUGAR_HOST_DEVICE
float abs(
const float a) {
return fabsf(a); }
532 inline CUGAR_HOST_DEVICE
double abs(
const double a) {
return fabs(a); }
536 inline CUGAR_HOST_DEVICE
float min(
const float a,
const float b) {
return a < b ? a : b; }
540 inline CUGAR_HOST_DEVICE
float max(
const float a,
const float b) {
return a > b ? a : b; }
544 inline CUGAR_HOST_DEVICE int8 min(
const int8 a,
const int8 b) {
return a < b ? a : b; }
548 inline CUGAR_HOST_DEVICE int8 max(
const int8 a,
const int8 b) {
return a > b ? a : b; }
552 inline CUGAR_HOST_DEVICE uint8 min(
const uint8 a,
const uint8 b) {
return a < b ? a : b; }
556 inline CUGAR_HOST_DEVICE uint8 max(
const uint8 a,
const uint8 b) {
return a > b ? a : b; }
560 inline CUGAR_HOST_DEVICE uint16 min(
const uint16 a,
const uint16 b) {
return a < b ? a : b; }
564 inline CUGAR_HOST_DEVICE uint16 max(
const uint16 a,
const uint16 b) {
return a > b ? a : b; }
568 inline CUGAR_HOST_DEVICE int32 min(
const int32 a,
const int32 b) {
return a < b ? a : b; }
572 inline CUGAR_HOST_DEVICE int32 max(
const int32 a,
const int32 b) {
return a > b ? a : b; }
576 inline CUGAR_HOST_DEVICE uint32 min(
const uint32 a,
const uint32 b) {
return a < b ? a : b; }
580 inline CUGAR_HOST_DEVICE uint32 max(
const uint32 a,
const uint32 b) {
return a > b ? a : b; }
584 inline CUGAR_HOST_DEVICE int64 min(
const int64 a,
const int64 b) {
return a < b ? a : b; }
588 inline CUGAR_HOST_DEVICE int64 max(
const int64 a,
const int64 b) {
return a > b ? a : b; }
592 inline CUGAR_HOST_DEVICE uint64 min(
const uint64 a,
const uint64 b) {
return a < b ? a : b; }
596 inline CUGAR_HOST_DEVICE uint64 max(
const uint64 a,
const uint64 b) {
return a > b ? a : b; }
600 inline CUGAR_HOST_DEVICE uint32
quantize(
const float x,
const uint32 n)
602 return (uint32)max( min( int32( x *
float(n) ), int32(n-1) ), int32(0) );
606 inline float CUGAR_HOST_DEVICE
mod(
const float x,
const float m) {
return x > 0.0f ? fmodf( x, m ) : m - fmodf( -x, m ); }
610 inline float CUGAR_HOST_DEVICE
sqr(
const float x) {
return x*x; }
614 inline double CUGAR_HOST_DEVICE
sqr(
const double x) {
return x*x; }
618 inline CUGAR_HOST_DEVICE uint32
log2(uint32 n)
621 if (n & 0xffff0000u) { n >>= 16; c |= 16; }
622 if (n & 0xff00) { n >>= 8; c |= 8; }
623 if (n & 0xf0) { n >>= 4; c |= 4; }
624 if (n & 0xc) { n >>= 2; c |= 2; }
636 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
637 float saturate(
const float x)
639 #ifdef CUGAR_DEVICE_COMPILATION 640 return ::saturate(x);
642 return max( min( x, 1.0f ), 0.0f );
648 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
651 a = (a+0x7ed55d16) + (a<<12);
652 a = (a^0xc761c23c) ^ (a>>19);
653 a = (a+0x165667b1) + (a<<5);
654 a = (a+0xd3a2646c) ^ (a<<9);
655 a = (a+0xfd7046c5) + (a<<3);
656 a = (a^0xb55a4f09) ^ (a>>16);
662 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
676 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
692 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
695 return (key >> 32) ^ key;
700 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
705 #if defined(__CUDA_ARCH__) 708 for (uint32 i = 0; i < 8; ++i)
710 hash = (hash << 4) + ((key >> (i*8)) & 255u);
713 const uint32
hi_bits = hash & 0xF0000000;
715 hash ^= hi_bits >> 24;
722 #define CUGAR_RAND_A 1664525 723 #define CUGAR_RAND_C 1013904223 729 static const uint32 MAX = 0xFFFFFFFF;
731 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
LCG_random(
const uint32 s = 0) : m_s(s) {}
733 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE uint32 next() { m_s = m_s*CUGAR_RAND_A + CUGAR_RAND_C;
return m_s; }
744 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
float next() {
return this->LCG_random::next() / float(LCG_random::MAX); }
752 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
757 i ^= i >> 10; i *= 0xb36534e5;
759 i ^= i >> 21; i *= 0x93fc4795;
761 i ^= i >> 17; i *= 1 | p >> 18;
762 return i * (1.0f / 4294967808.0f);
767 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
771 bits = (bits << 16u) | (bits >> 16u);
772 bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u);
773 bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u);
774 bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u);
775 bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u);
781 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
786 unsigned int remainder;
787 unsigned int m, bj = 1;
789 const unsigned int b = 2u;
797 remainder = m - n * b;
799 result += double(remainder) / double(bj);
802 return float(result);
805 bits = (bits << 16u) | (bits >> 16u);
806 bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u);
807 bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u);
808 bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u);
809 bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u);
810 return float(bits) * 2.3283064365386963e-10f;
816 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
819 bits = (bits << 32) | (bits >> 32);
820 bits = ((bits & 0x0000ffff0000ffffULL) << 16) |
821 ((bits & 0xffff0000ffff0000ULL) >> 16);
822 bits = ((bits & 0x00ff00ff00ff00ffULL) << 8) |
823 ((bits & 0xff00ff00ff00ff00ULL) >> 8);
824 bits = ((bits & 0x0f0f0f0f0f0f0f0fULL) << 4) |
825 ((bits & 0xf0f0f0f0f0f0f0f0ULL) >> 4);
826 bits = ((bits & 0x3333333333333333ULL) << 2) |
827 ((bits & 0xccccccccccccccccULL) >> 2);
828 bits = ((bits & 0x5555555555555555ULL) << 1) |
829 ((bits & 0xaaaaaaaaaaaaaaaaULL) >> 1);
830 return (scramble ^ bits) >> (64 - 52);
835 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
845 i ^= p; i *= 0xe170893d;
848 i ^= p >> 8; i *= 0x0929eb3f;
850 i ^= (i & w) >> 1; i *= 1 | p >> 27;
852 i ^= (i & w) >> 11; i *= 0x74dcb303;
853 i ^= (i & w) >> 2; i *= 0x9e501cc3;
854 i ^= (i & w) >> 2; i *= 0xc860a3df;
862 #if defined(__CUDA_ARCH__) 864 CUGAR_FORCEINLINE CUGAR_DEVICE
865 uint8 min3(
const uint8 op1,
const uint8 op2,
const uint8 op3)
868 asm(
" vmin.u32.u32.u32.min %0, %1, %2, %3;" :
"=r"(r) :
"r"(uint32(op1)),
"r"(uint32(op2)),
"r"(uint32(op3)) );
872 CUGAR_FORCEINLINE CUGAR_DEVICE
873 uint32 min3(
const uint32 op1,
const uint32 op2,
const uint32 op3)
876 asm(
" vmin.u32.u32.u32.min %0, %1, %2, %3;" :
"=r"(r) :
"r"(op1),
"r"(op2),
"r"(op3) );
880 CUGAR_FORCEINLINE CUGAR_DEVICE
881 uint32 max3(
const uint32 op1,
const uint32 op2,
const uint32 op3)
884 asm(
" vmax.u32.u32.u32.max %0, %1, %2, %3;" :
"=r"(r) :
"r"(op1),
"r"(op2),
"r"(op3) );
888 CUGAR_FORCEINLINE CUGAR_DEVICE
889 int32 min3(
const int32 op1,
const int32 op2,
const int32 op3)
892 asm(
" vmin.s32.s32.s32.min %0, %1, %2, %3;" :
"=r"(r) :
"r"(op1),
"r"(op2),
"r"(op3) );
896 CUGAR_FORCEINLINE CUGAR_DEVICE
897 int32 max3(
const int32 op1,
const int32 op2,
const int32 op3)
900 asm(
" vmax.s32.s32.s32.max %0, %1, %2, %3;" :
"=r"(r) :
"r"(op1),
"r"(op2),
"r"(op3) );
906 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
907 uint8 min3(
const uint8 op1,
const uint8 op2,
const uint8 op3)
909 return cugar::min( op1, cugar::min( op2, op3 ) );
912 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
913 uint32 min3(
const uint32 op1,
const uint32 op2,
const uint32 op3)
915 return cugar::min( op1, cugar::min( op2, op3 ) );
918 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
919 uint32 max3(
const uint32 op1,
const uint32 op2,
const uint32 op3)
921 return cugar::max( op1, cugar::max( op2, op3 ) );
923 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
924 int32 min3(
const int32 op1,
const int32 op2,
const int32 op3)
926 return cugar::min( op1, cugar::min( op2, op3 ) );
929 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
930 int32 max3(
const int32 op1,
const int32 op2,
const int32 op3)
932 return cugar::max( op1, cugar::max( op2, op3 ) );
937 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
938 float min3(
const float op1,
const float op2,
const float op3)
940 return cugar::min( op1, cugar::min( op2, op3 ) );
943 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
944 float max3(
const float op1,
const float op2,
const float op3)
946 return cugar::max( op1, cugar::max( op2, op3 ) );
951 inline CUGAR_DEVICE
float fast_pow(
const float a,
const float b)
955 inline CUGAR_DEVICE
float fast_sin(
const float x)
959 inline CUGAR_DEVICE
float fast_cos(
const float x)
963 inline CUGAR_DEVICE
float fast_sqrt(
const float x)
965 return __fsqrt_rn(x);
970 inline CUGAR_HOST_DEVICE
float fast_pow(
const float a,
const float b)
974 inline CUGAR_HOST_DEVICE
float fast_sin(
const float x)
978 inline CUGAR_HOST_DEVICE
float fast_cos(
const float x)
982 inline CUGAR_HOST_DEVICE
float fast_sqrt(
const float x)
989 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
990 void sincosf(
float phi,
float* s,
float* c)
992 #if defined(CUGAR_DEVIE_COMPILATION) 993 ::sincosf(phi, s, c);
1000 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
1001 float rsqrtf(
float x)
1003 #if defined(CUGAR_DEVIE_COMPILATION) 1006 return 1.0f / sqrtf(x);
1011 inline CUGAR_DEVICE uint16 float_to_half(
const float x) {
return __float2half_rn(x); }
1012 inline CUGAR_DEVICE
float half_to_float(
const uint32 h) {
return __half2float(h); }
1019 template <
typename T>
1023 CUGAR_HOST_DEVICE
static T min() {
return T(); }
1029 CUGAR_HOST_DEVICE
static T max() {
return T(); }
1035 return std::numeric_limits<T>::is_integer ?
1050 CUGAR_HOST_DEVICE
static int8 min() {
return -128; }
1051 CUGAR_HOST_DEVICE
static int8 max() {
return 127; }
1058 CUGAR_HOST_DEVICE
static int16 min() {
return -32768; }
1059 CUGAR_HOST_DEVICE
static int16 max() {
return 32767; }
1066 CUGAR_HOST_DEVICE
static int32 min() {
return -(1 << 30); }
1067 CUGAR_HOST_DEVICE
static int32 max() {
return (1 << 30); }
1074 CUGAR_HOST_DEVICE
static int64 min() {
return -(int64(1) << 62); }
1075 CUGAR_HOST_DEVICE
static int64 max() {
return (int64(1) << 62); }
1084 CUGAR_HOST_DEVICE
static float min() {
return -float(1.0e+30f); }
1085 CUGAR_HOST_DEVICE
static float max() {
return float(1.0e+30f); }
1092 CUGAR_HOST_DEVICE
static double min() {
return -double(1.0e+30); }
1093 CUGAR_HOST_DEVICE
static double max() {
return double(1.0e+30); }
1100 CUGAR_HOST_DEVICE
static uint32 min() {
return 0; }
1101 CUGAR_HOST_DEVICE
static uint32 max() {
return uint32(-1); }
1108 CUGAR_HOST_DEVICE
static uint64 min() {
return 0; }
1109 CUGAR_HOST_DEVICE
static uint64 max() {
return uint64(-1); }
CUGAR_HOST_DEVICE L round_z(const L x, const R y)
Definition: numbers.h:204
CUGAR_HOST_DEVICE uint32 quantize(const float x, const uint32 n)
Definition: numbers.h:600
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE float radical_inverse(unsigned int n)
Definition: numbers.h:782
Definition: numbers.h:395
float CUGAR_HOST_DEVICE mod(const float x, const float m)
Definition: numbers.h:606
thrust::device_vector< T >::iterator begin(thrust::device_vector< T > &vec)
Definition: thrust_view.h:89
Definition: numbers.h:371
Definition: numbers.h:727
Definition: numbers.h:387
CUGAR_HOST_DEVICE uint32 log2(uint32 n)
Definition: numbers.h:618
CUGAR_HOST_DEVICE uint64 min(const uint64 a, const uint64 b)
Definition: numbers.h:592
Definition: numbers.h:379
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE uint32 permute(uint32 i, uint32 l, uint32 p)
Definition: numbers.h:836
CUGAR_HOST_DEVICE L round_i(const L x, const R y)
Definition: numbers.h:198
CUGAR_HOST_DEVICE T sgn(const T x)
Definition: numbers.h:508
Definition: numbers.h:463
CUGAR_HOST_DEVICE L divide_ri(const L x, const R y)
Definition: numbers.h:180
Definition: numbers.h:740
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE uint32 hash2(uint32 key)
Definition: numbers.h:663
float CUGAR_HOST_DEVICE sqr(const float x)
Definition: numbers.h:610
Definition: numbers.h:1020
CUGAR_HOST_DEVICE uint64 max(const uint64 a, const uint64 b)
Definition: numbers.h:596
CUGAR_HOST_DEVICE int32 abs(const int32 a)
Definition: numbers.h:520
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE uint32 count_occurrences(const Iterator begin, uint32 size, const T val, const uint32 max_occ=uint32(-1))
Definition: numbers.h:162
Definition: numbers.h:368
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE uint32 lo_bits()
Definition: numbers.h:148
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE uint32 hi_bits()
Definition: numbers.h:155
Define a vector_view POD type and plain_view() for std::vector.
Definition: diff.h:38
static T max()
Definition: numbers.h:1041
CUGAR_HOST_DEVICE L divide_rz(const L x, const R y)
Definition: numbers.h:189
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE Out binary_cast(const In in)
Definition: types.h:288
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE uint8 comp(const uchar2 a, const char c)
Definition: numbers.h:218
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE uint32 reverse_bits(const uint32 n)
Definition: numbers.h:768
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE uint32 hash(uint32 a)
Definition: numbers.h:649
CUGAR_HOST_DEVICE L round(const L x, const R y)
Definition: numbers.h:210
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE float randfloat(unsigned i, unsigned p)
Definition: numbers.h:753
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE uint64 hash3(uint64 key)
Definition: numbers.h:701