35 #include <vector_types.h>
36 #include <vector_functions.h>
40 #define M_PIf 3.141592653589793238462643383279502884197169399375105820974944592f
41 #define M_PI_2f 6.283185307179586f
42 #define M_INV_PIf 0.3183098861837907f
45 #define M_PI 3.141592653589793238462643383279502884197169399375105820974944592
48 #define M_PI_2 (2.0 * M_PI)
54 inline bool is_finite(
const double x) {
return _finite(x) != 0; }
55 inline bool is_nan(
const double x) {
return _isnan(x) != 0; }
56 inline bool is_finite(
const float x) {
return _finite(x) != 0; }
57 inline bool is_nan(
const float x) {
return _isnan(x) != 0; }
108 template <
typename Iterator,
typename T>
113 for (
uint32 i = 0; i < size; ++i)
117 if (++occ >= max_occ)
127 template<
typename L,
typename R>
130 return L( (x + (y - 1)) / y );
136 template<
typename L,
typename R>
145 template<
typename L,
typename R>
151 template<
typename L,
typename R>
157 template<
typename L,
typename R>
161 return R((x - r)*2) > y ? r+L(1) : r;
170 return (c == 0 ? a.x : a.y);
176 return (c == 0 ? a.x : a.y);
184 (c == 0 ? a.x : a.y) :
185 (c == 2 ? a.z : a.w);
192 (c == 0 ? a.x : a.y) :
193 (c == 2 ? a.z : a.w);
200 (c == 0 ? a.x : a.y) :
201 (c == 2 ? a.z : a.w);
208 return (c == 0 ? a.x : a.y);
221 return (c == 0 ? a.x : a.y);
234 return (c == 0 ? a.x : a.y);
241 (c == 0 ? a.x : a.y) :
242 (c == 2 ? a.z : a.w);
249 else if (c == 1) a.y = v;
250 else if (c == 2) a.z = v;
258 (c == 0 ? a.x : a.y) :
259 (c == 2 ? a.z : a.w);
266 (c == 0 ? a.x : a.y) :
267 (c == 2 ? a.z : a.w);
275 (c == 0 ? a.x : a.y) :
276 (c == 2 ? a.z : a.w);
284 (c == 0 ? a.x : a.y) :
285 (c == 2 ? a.z : a.w);
292 else if (c == 1) a.y = v;
293 else if (c == 2) a.z = v;
332 template <
typename T, u
int32 DIM>
428 template <
typename T>
435 const int y = x > 0.0f ? int(x) : int(x)-1;
436 return (x -
float(y) > 0.5f) ? float(y)+1.0f : float(y);
511 inline float NVBIO_HOST_DEVICE mod(
const float x,
const float m) {
return x > 0.0f ? fmodf( x, m ) : m - fmodf( -x, m ); }
518 if (n & 0xffff0000u) { n >>= 16; c |= 16; }
519 if (n & 0xff00) { n >>= 8; c |= 8; }
520 if (n & 0xf0) { n >>= 4; c |= 4; }
521 if (n & 0xc) { n >>= 2; c |= 2; }
538 a = (a+0x7ed55d16) + (a<<12);
539 a = (a^0xc761c23c) ^ (a>>19);
540 a = (a+0x165667b1) + (a<<5);
541 a = (a+0xd3a2646c) ^ (a<<9);
542 a = (a+0xfd7046c5) + (a<<3);
543 a = (a^0xb55a4f09) ^ (a>>16);
582 return (key >> 32) ^ key;
592 #if defined(__CUDA_ARCH__)
595 for (
uint32 i = 0; i < 8; ++i)
597 hash = (hash << 4) + ((key >> (i*8)) & 255u);
602 hash ^= hi_bits >> 24;
609 #define NVBIO_RAND_A 1664525
610 #define NVBIO_RAND_C 1013904223
631 unsigned int remainder;
632 unsigned int m, bj = 1;
634 const unsigned int b = 2u;
642 remainder = m - n * b;
644 result += double( remainder ) / double( bj );
647 return float(result);
650 #if defined(__CUDA_ARCH__)
656 asm(
" vmin.u32.u32.u32.min %0, %1, %2, %3;" :
"=r"(r) :
"r"(
uint32(op1)),
"r"(
uint32(op2)),
"r"(
uint32(op3)) );
664 asm(
" vmin.u32.u32.u32.min %0, %1, %2, %3;" :
"=r"(r) :
"r"(op1),
"r"(op2),
"r"(op3) );
672 asm(
" vmax.u32.u32.u32.max %0, %1, %2, %3;" :
"=r"(r) :
"r"(op1),
"r"(op2),
"r"(op3) );
680 asm(
" vmin.s32.s32.s32.min %0, %1, %2, %3;" :
"=r"(r) :
"r"(op1),
"r"(op2),
"r"(op3) );
688 asm(
" vmax.s32.s32.s32.max %0, %1, %2, %3;" :
"=r"(r) :
"r"(op1),
"r"(op2),
"r"(op3) );
726 float min3(
const float op1,
const float op2,
const float op3)
732 float max3(
const float op1,
const float op2,
const float op3)
753 return __fsqrt_rn(x);
778 inline NVBIO_DEVICE uint16 float_to_half(
const float x) {
return __float2half_rn(x); }
786 template <
typename T>
802 return std::numeric_limits<T>::is_integer ?
849 struct Field_traits<float>
857 struct Field_traits<double>
865 struct Field_traits<
uint32>
873 struct Field_traits<
uint64>
890 template <
typename T>
896 template <
typename U>
905 template <
typename T>
911 template <
typename U>
920 template <
typename T>
926 template <
typename U>
938 template <
typename T>
940 T
operator() (
const T op1,
const T op2)
const {
return op1 + op2; }
946 template <
typename T>
954 template <
typename T>
961 template <
typename T>
978 template <
typename word_type>
1001 template <
typename word_type>
1026 template <
typename word_type>
1051 template <
typename word_type>
1065 template <
typename T,
typename U>
1118 template <
typename T>
1135 template <
typename T>
1147 template <
typename T>
1159 template <
typename T>
1176 template <
typename T>
1193 template <
typename T>
1206 template <
typename T>
1219 template <
typename Iterator,
typename index_type = u
int32>
1223 typedef typename std::iterator_traits<Iterator>::value_type
result_type;
1233 template <
typename Iterator>
1241 template <
typename Functor2,
typename Functor1>
1256 template <
typename Functor2,
typename Functor1>
1264 template <
typename Functor>
1286 template <
typename Functor>
1308 template <
typename IndexType>
1336 template <u
int32 ALPHABET_SIZE>
1355 template <
typename T,
typename R>
1369 template <
typename InputFunctor>