NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
numbers.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
30 #include <cmath>
31 #include <limits>
32 #include <nvbio/basic/types.h>
33 #include <nvbio/basic/popcount.h>
34 #include <nvbio/basic/iterator.h>
35 #include <vector_types.h>
36 #include <vector_functions.h>
37 
38 namespace nvbio {
39 
40 #define M_PIf 3.141592653589793238462643383279502884197169399375105820974944592f
41 #define M_PI_2f 6.283185307179586f
42 #define M_INV_PIf 0.3183098861837907f
43 
44 #ifndef M_PI
45 #define M_PI 3.141592653589793238462643383279502884197169399375105820974944592
46 #endif
47 #ifndef M_PI_2
48 #define M_PI_2 (2.0 * M_PI)
49 #endif
50 
51 #if WIN32
52 #include <float.h>
53 
54 inline bool is_finite(const double x) { return _finite(x) != 0; }
55 inline bool is_nan(const double x) { return _isnan(x) != 0; }
56 inline bool is_finite(const float x) { return _finite(x) != 0; }
57 inline bool is_nan(const float x) { return _isnan(x) != 0; }
58 
59 #endif
60 
61 #ifdef __CUDACC__
62 
63 NVBIO_FORCEINLINE __device__ uint32 warp_tid() { return threadIdx.x & 31; }
64 NVBIO_FORCEINLINE __device__ uint32 warp_id() { return threadIdx.x >> 5; }
65 
66 #endif
67 
76 
79 
84 
87 
88 namespace util
89 {
90 
94 template <uint32 N>
96 uint32 lo_bits() { return (1u << N) - 1u; }
97 
101 template <uint32 N>
103 uint32 hi_bits() { return ~lo_bits<N>(); }
104 
108 template <typename Iterator, typename T>
110 uint32 count_occurrences(const Iterator begin, uint32 size, const T val, const uint32 max_occ = uint32(-1))
111 {
112  uint32 occ = 0u;
113  for (uint32 i = 0; i < size; ++i)
114  {
115  if (begin[i] == val)
116  {
117  if (++occ >= max_occ)
118  return occ;
119  }
120  }
121  return occ;
122 }
123 
127 template<typename L, typename R>
128 inline NVBIO_HOST_DEVICE L divide_ri(const L x, const R y)
129 {
130  return L( (x + (y - 1)) / y );
131 }
132 
136 template<typename L, typename R>
137 inline NVBIO_HOST_DEVICE L divide_rz(const L x, const R y)
138 {
139  return L( x / y );
140 }
141 
145 template<typename L, typename R>
146 inline NVBIO_HOST_DEVICE L round_i(const L x, const R y){ return L( y * divide_ri(x, y) ); }
147 
151 template<typename L, typename R>
152 inline NVBIO_HOST_DEVICE L round_z(const L x, const R y){ return L( y * divide_rz(x, y) ); }
153 
157 template<typename L, typename R>
158 inline NVBIO_HOST_DEVICE L round(const L x, const R y)
159 {
160  const L r = round_z( x, y );
161  return R((x - r)*2) > y ? r+L(1) : r;
162 }
163 
164 } // end namespace util
165 
168 NVBIO_FORCEINLINE NVBIO_HOST_DEVICE uint8 comp(const uchar2 a, const char c)
169 {
170  return (c == 0 ? a.x : a.y);
171 }
174 NVBIO_FORCEINLINE NVBIO_HOST_DEVICE char comp(const char2 a, const char c)
175 {
176  return (c == 0 ? a.x : a.y);
177 }
178 
181 NVBIO_FORCEINLINE NVBIO_HOST_DEVICE uint8 comp(const uchar4 a, const char c)
182 {
183  return c <= 1 ?
184  (c == 0 ? a.x : a.y) :
185  (c == 2 ? a.z : a.w);
186 }
189 NVBIO_FORCEINLINE NVBIO_HOST_DEVICE char comp(const char4 a, const char c)
190 {
191  return c <= 1 ?
192  (c == 0 ? a.x : a.y) :
193  (c == 2 ? a.z : a.w);
194 }
197 NVBIO_FORCEINLINE NVBIO_HOST_DEVICE signed char& select(char4& a, const char c)
198 {
199  return c <= 1 ?
200  (c == 0 ? a.x : a.y) :
201  (c == 2 ? a.z : a.w);
202 }
203 
207 {
208  return (c == 0 ? a.x : a.y);
209 }
212 NVBIO_FORCEINLINE NVBIO_HOST_DEVICE void set(uint2& a, const uint32 c, const uint32 v)
213 {
214  if (c == 0) a.x = v;
215  else a.y = v;
216 }
220 {
221  return (c == 0 ? a.x : a.y);
222 }
225 NVBIO_FORCEINLINE NVBIO_HOST_DEVICE void set(ulonglong2& a, const uint32 c, const uint64 v)
226 {
227  if (c == 0) a.x = v;
228  else a.y = v;
229 }
233 {
234  return (c == 0 ? a.x : a.y);
235 }
239 {
240  return c <= 1 ?
241  (c == 0 ? a.x : a.y) :
242  (c == 2 ? a.z : a.w);
243 }
246 NVBIO_FORCEINLINE NVBIO_HOST_DEVICE void set(uint4& a, const uint32 c, const uint32 v)
247 {
248  if (c == 0) a.x = v;
249  else if (c == 1) a.y = v;
250  else if (c == 2) a.z = v;
251  else a.w = v;
252 }
256 {
257  return c <= 1 ?
258  (c == 0 ? a.x : a.y) :
259  (c == 2 ? a.z : a.w);
260 }
264 {
265  return c <= 1 ?
266  (c == 0 ? a.x : a.y) :
267  (c == 2 ? a.z : a.w);
268 }
269 
273 {
274  return c <= 1 ?
275  (c == 0 ? a.x : a.y) :
276  (c == 2 ? a.z : a.w);
277 }
278 
282 {
283  return c <= 1 ?
284  (c == 0 ? a.x : a.y) :
285  (c == 2 ? a.z : a.w);
286 }
289 NVBIO_FORCEINLINE NVBIO_HOST_DEVICE void set(ulonglong4& a, const uint32 c, const uint64 v)
290 {
291  if (c == 0) a.x = v;
292  else if (c == 1) a.y = v;
293  else if (c == 2) a.z = v;
294  else a.w = v;
295 }
296 
299 
300 typedef uchar2 uint8_2;
301 typedef uchar3 uint8_3;
302 typedef uchar4 uint8_4;
303 
304 typedef char2 int8_2;
305 typedef char3 int8_3;
306 typedef char4 int8_4;
307 
308 typedef ushort2 uint16_2;
309 typedef ushort3 uint16_3;
310 typedef ushort4 uint16_4;
311 
312 typedef short2 int16_2;
313 typedef short3 int16_3;
314 typedef short4 int16_4;
315 
316 typedef uint2 uint32_2;
317 typedef uint3 uint32_3;
318 typedef uint4 uint32_4;
319 
320 typedef int2 int32_2;
321 typedef int3 int32_3;
322 typedef int4 int32_4;
323 
324 typedef ulonglong2 uint64_2;
325 typedef ulonglong3 uint64_3;
326 typedef ulonglong4 uint64_4;
327 
328 typedef longlong2 int64_2;
329 typedef longlong3 int64_3;
330 typedef longlong4 int64_4;
331 
332 template <typename T, uint32 DIM>
333 struct vector_type {};
334 
335 template <> struct vector_type<char,1> { typedef char type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const char i1) { return i1; } };
336 template <> struct vector_type<char,2> { typedef char2 type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const char i1, const char i2) { return make_char2(i1,i2); } };
337 template <> struct vector_type<char,3> { typedef char3 type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const char i1, const char i2, const char i3) { return make_char3(i1,i2,i3); } };
338 template <> struct vector_type<char,4> { typedef char4 type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const char i1, const char i2, const char i3, const char i4) { return make_char4(i1,i2,i3,i4); } };
339 
340 template <> struct vector_type<unsigned char,1> { typedef unsigned char type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const unsigned char i1) { return i1; } };
341 template <> struct vector_type<unsigned char,2> { typedef uchar2 type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const unsigned char i1, const unsigned char i2) { return make_uchar2(i1,i2); } };
342 template <> struct vector_type<unsigned char,3> { typedef uchar3 type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const unsigned char i1, const unsigned char i2, const unsigned char i3) { return make_uchar3(i1,i2,i3); } };
343 template <> struct vector_type<unsigned char,4> { typedef uchar4 type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const unsigned char i1, const unsigned char i2, const unsigned char i3, const unsigned char i4) { return make_uchar4(i1,i2,i3,i4); } };
344 
345 template <> struct vector_type<short,1> { typedef short type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const short i1) { return i1; } };
346 template <> struct vector_type<short,2> { typedef short2 type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const short i1, const short i2) { return make_short2(i1,i2); } };
347 template <> struct vector_type<short,3> { typedef short3 type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const short i1, const short i2, const short i3) { return make_short3(i1,i2,i3); } };
348 template <> struct vector_type<short,4> { typedef short4 type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const short i1, const short i2, const short i3, const short i4) { return make_short4(i1,i2,i3,i4); } };
349 
350 template <> struct vector_type<unsigned short,1> { typedef unsigned short type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const unsigned short i1) { return i1; } };
351 template <> struct vector_type<unsigned short,2> { typedef ushort2 type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const unsigned short i1, const unsigned short i2) { return make_ushort2(i1,i2); } };
352 template <> struct vector_type<unsigned short,3> { typedef ushort3 type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const unsigned short i1, const unsigned short i2, const unsigned short i3) { return make_ushort3(i1,i2,i3); } };
353 template <> struct vector_type<unsigned short,4> { typedef ushort4 type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const unsigned short i1, const unsigned short i2, const unsigned short i3, const unsigned short i4) { return make_ushort4(i1,i2,i3,i4); } };
354 
355 template <> struct vector_type<int,1> { typedef int type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const int i1) { return i1; } };
356 template <> struct vector_type<int,2> { typedef int2 type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const int i1, const int i2) { return make_int2(i1,i2); } };
357 template <> struct vector_type<int,3> { typedef int3 type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const int i1, const int i2, const int i3) { return make_int3(i1,i2,i3); } };
358 template <> struct vector_type<int,4> { typedef int4 type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const int i1, const int i2, const int i3, const int i4) { return make_int4(i1,i2,i3,i4); } };
359 
360 template <> struct vector_type<unsigned int,1> { typedef unsigned int type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const unsigned int i1) { return i1; } };
361 template <> struct vector_type<unsigned int,2> { typedef uint2 type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const unsigned int i1, const unsigned int i2) { return make_uint2(i1,i2); } };
362 template <> struct vector_type<unsigned int,3> { typedef uint3 type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const unsigned int i1, const unsigned int i2, const unsigned int i3) { return make_uint3(i1,i2,i3); } };
363 template <> struct vector_type<unsigned int,4> { typedef uint4 type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const unsigned int i1, const unsigned int i2, const unsigned int i3, const unsigned int i4) { return make_uint4(i1,i2,i3,i4); } };
364 
365 template <> struct vector_type<int64,1> { typedef int64 type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const int64 i1) { return i1; } };
366 template <> struct vector_type<int64,2> { typedef int64_2 type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const int64 i1, const int64 i2) { int64_2 r; r.x = i1; r.y = i2; return r; } };
367 template <> struct vector_type<int64,3> { typedef int64_3 type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const int64 i1, const int64 i2, const int64 i3) { int64_3 r; r.x = i1; r.y = i2; r.z = i3; return r; } };
368 template <> struct vector_type<int64,4> { typedef int64_4 type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const int64 i1, const int64 i2, const int64 i3, const int64 i4) { int64_4 r; r.x = i1; r.y = i2; r.z = i3, r.w = i4; return r; } };
369 
370 template <> struct vector_type<uint64,1> { typedef uint64 type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const uint64 i1) { return i1; } };
371 template <> struct vector_type<uint64,2> { typedef uint64_2 type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const uint64 i1, const uint64 i2) { uint64_2 r; r.x = i1; r.y = i2; return r; } };
372 template <> struct vector_type<uint64,3> { typedef uint64_3 type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const uint64 i1, const uint64 i2, const uint64 i3) { uint64_3 r; r.x = i1; r.y = i2; r.z = i3; return r; } };
373 template <> struct vector_type<uint64,4> { typedef uint64_4 type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const uint64 i1, const uint64 i2, const uint64 i3, const uint64 i4) { uint64_4 r; r.x = i1; r.y = i2; r.z = i3, r.w = i4; return r; } };
374 
375 template <> struct vector_type<float,1> { typedef float type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const float i1) { return i1; } };
376 template <> struct vector_type<float,2> { typedef float2 type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const float i1, const float i2) { return make_float2(i1,i2); } };
377 template <> struct vector_type<float,3> { typedef float3 type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const float i1, const float i2, const float i3) { return make_float3(i1,i2,i3); } };
378 template <> struct vector_type<float,4> { typedef float4 type; NVBIO_FORCEINLINE NVBIO_HOST_DEVICE static type make(const float i1, const float i2, const float i3, const float i4) { return make_float4(i1,i2,i3,i4); } };
379 
380 template <typename T> NVBIO_FORCEINLINE NVBIO_HOST_DEVICE typename vector_type<T,1>::type make_vector(const T i1) { return vector_type<T,1>::make( i1 ); }
381 template <typename T> NVBIO_FORCEINLINE NVBIO_HOST_DEVICE typename vector_type<T,2>::type make_vector(const T i1, const T i2) { return vector_type<T,2>::make( i1, i2 ); }
382 template <typename T> NVBIO_FORCEINLINE NVBIO_HOST_DEVICE typename vector_type<T,3>::type make_vector(const T i1, const T i2, const T i3) { return vector_type<T,3>::make( i1, i2, i3 ); }
383 template <typename T> NVBIO_FORCEINLINE NVBIO_HOST_DEVICE typename vector_type<T,4>::type make_vector(const T i1, const T i2, const T i3, const T i4) { return vector_type<T,4>::make( i1, i2, i3, i4 ); }
384 
385 template <typename T> struct vector_traits {};
386 template <> struct vector_traits<char> { typedef char value_type; const static uint32 DIM = 1; };
387 template <> struct vector_traits<unsigned char> { typedef unsigned char value_type; const static uint32 DIM = 1; };
388 template <> struct vector_traits<short> { typedef short value_type; const static uint32 DIM = 1; };
389 template <> struct vector_traits<unsigned short> { typedef unsigned short value_type; const static uint32 DIM = 1; };
390 template <> struct vector_traits<int> { typedef int value_type; const static uint32 DIM = 1; };
391 template <> struct vector_traits<unsigned int> { typedef unsigned int value_type; const static uint32 DIM = 1; };
392 template <> struct vector_traits<int64> { typedef int64 value_type; const static uint32 DIM = 1; };
393 template <> struct vector_traits<uint64> { typedef uint64 value_type; const static uint32 DIM = 1; };
394 template <> struct vector_traits<float> { typedef float value_type; const static uint32 DIM = 1; };
395 template <> struct vector_traits<double> { typedef double value_type; const static uint32 DIM = 1; };
396 template <> struct vector_traits<char2> { typedef char value_type; const static uint32 DIM = 2; };
397 template <> struct vector_traits<char3> { typedef char value_type; const static uint32 DIM = 3; };
398 template <> struct vector_traits<char4> { typedef char value_type; const static uint32 DIM = 4; };
399 template <> struct vector_traits<uchar2> { typedef unsigned char value_type; const static uint32 DIM = 2; };
400 template <> struct vector_traits<uchar3> { typedef unsigned char value_type; const static uint32 DIM = 3; };
401 template <> struct vector_traits<uchar4> { typedef unsigned char value_type; const static uint32 DIM = 4; };
402 template <> struct vector_traits<short2> { typedef short value_type; const static uint32 DIM = 2; };
403 template <> struct vector_traits<short3> { typedef short value_type; const static uint32 DIM = 3; };
404 template <> struct vector_traits<short4> { typedef short value_type; const static uint32 DIM = 4; };
405 template <> struct vector_traits<ushort2> { typedef unsigned short value_type; const static uint32 DIM = 2; };
406 template <> struct vector_traits<ushort3> { typedef unsigned short value_type; const static uint32 DIM = 3; };
407 template <> struct vector_traits<ushort4> { typedef unsigned short value_type; const static uint32 DIM = 4; };
408 template <> struct vector_traits<int2> { typedef int value_type; const static uint32 DIM = 2; };
409 template <> struct vector_traits<int3> { typedef int value_type; const static uint32 DIM = 3; };
410 template <> struct vector_traits<int4> { typedef int value_type; const static uint32 DIM = 4; };
411 template <> struct vector_traits<uint2> { typedef unsigned int value_type; const static uint32 DIM = 2; };
412 template <> struct vector_traits<uint3> { typedef unsigned int value_type; const static uint32 DIM = 3; };
413 template <> struct vector_traits<uint4> { typedef unsigned int value_type; const static uint32 DIM = 4; };
414 template <> struct vector_traits<float2> { typedef float value_type; const static uint32 DIM = 2; };
415 template <> struct vector_traits<float3> { typedef float value_type; const static uint32 DIM = 3; };
416 template <> struct vector_traits<float4> { typedef float value_type; const static uint32 DIM = 4; };
417 template <> struct vector_traits<uint64_2> { typedef uint64 value_type; const static uint32 DIM = 2; };
418 template <> struct vector_traits<uint64_3> { typedef uint64 value_type; const static uint32 DIM = 3; };
419 template <> struct vector_traits<uint64_4> { typedef uint64 value_type; const static uint32 DIM = 4; };
420 template <> struct vector_traits<int64_2> { typedef int64 value_type; const static uint32 DIM = 2; };
421 template <> struct vector_traits<int64_3> { typedef int64 value_type; const static uint32 DIM = 3; };
422 template <> struct vector_traits<int64_4> { typedef int64 value_type; const static uint32 DIM = 4; };
423 
425 
428 template <typename T>
429 inline NVBIO_HOST_DEVICE T sgn(const T x) { return x > 0 ? T(1) : T(-1); }
430 
433 inline NVBIO_HOST_DEVICE float round(const float x)
434 {
435  const int y = x > 0.0f ? int(x) : int(x)-1;
436  return (x - float(y) > 0.5f) ? float(y)+1.0f : float(y);
437 }
438 
441 inline NVBIO_HOST_DEVICE float min(const float a, const float b) { return a < b ? a : b; }
442 
445 inline NVBIO_HOST_DEVICE float max(const float a, const float b) { return a > b ? a : b; }
446 
449 inline NVBIO_HOST_DEVICE int8 min(const int8 a, const int8 b) { return a < b ? a : b; }
450 
453 inline NVBIO_HOST_DEVICE int8 max(const int8 a, const int8 b) { return a > b ? a : b; }
454 
457 inline NVBIO_HOST_DEVICE uint8 min(const uint8 a, const uint8 b) { return a < b ? a : b; }
458 
461 inline NVBIO_HOST_DEVICE uint8 max(const uint8 a, const uint8 b) { return a > b ? a : b; }
462 
465 inline NVBIO_HOST_DEVICE uint16 min(const uint16 a, const uint16 b) { return a < b ? a : b; }
466 
469 inline NVBIO_HOST_DEVICE uint16 max(const uint16 a, const uint16 b) { return a > b ? a : b; }
470 
473 inline NVBIO_HOST_DEVICE int32 min(const int32 a, const int32 b) { return a < b ? a : b; }
474 
477 inline NVBIO_HOST_DEVICE int32 max(const int32 a, const int32 b) { return a > b ? a : b; }
478 
481 inline NVBIO_HOST_DEVICE uint32 min(const uint32 a, const uint32 b) { return a < b ? a : b; }
482 
485 inline NVBIO_HOST_DEVICE uint32 max(const uint32 a, const uint32 b) { return a > b ? a : b; }
486 
489 inline NVBIO_HOST_DEVICE int64 min(const int64 a, const int64 b) { return a < b ? a : b; }
490 
493 inline NVBIO_HOST_DEVICE int64 max(const int64 a, const int64 b) { return a > b ? a : b; }
494 
497 inline NVBIO_HOST_DEVICE uint64 min(const uint64 a, const uint64 b) { return a < b ? a : b; }
498 
501 inline NVBIO_HOST_DEVICE uint64 max(const uint64 a, const uint64 b) { return a > b ? a : b; }
502 
505 inline NVBIO_HOST_DEVICE uint32 quantize(const float x, const uint32 n)
506 {
507  return (uint32)max( min( int32( x * float(n) ), int32(n-1) ), int32(0) );
508 }
511 inline float NVBIO_HOST_DEVICE mod(const float x, const float m) { return x > 0.0f ? fmodf( x, m ) : m - fmodf( -x, m ); }
512 
516 {
517  unsigned int c = 0;
518  if (n & 0xffff0000u) { n >>= 16; c |= 16; }
519  if (n & 0xff00) { n >>= 8; c |= 8; }
520  if (n & 0xf0) { n >>= 4; c |= 4; }
521  if (n & 0xc) { n >>= 2; c |= 2; }
522  if (n & 0x2) c |= 1;
523  return c;
524 /* uint32 m = 0;
525  while (n > 0)
526  {
527  n >>= 1;
528  m++;
529  }
530  return m-1;*/
531 }
532 
537 {
538  a = (a+0x7ed55d16) + (a<<12);
539  a = (a^0xc761c23c) ^ (a>>19);
540  a = (a+0x165667b1) + (a<<5);
541  a = (a+0xd3a2646c) ^ (a<<9);
542  a = (a+0xfd7046c5) + (a<<3);
543  a = (a^0xb55a4f09) ^ (a>>16);
544  return a;
545 }
546 
551 {
552  key += ~(key << 15);
553  key ^= (key >> 10);
554  key += (key << 3);
555  key ^= (key >> 6);
556  key += ~(key << 11);
557  key ^= (key >> 16);
558  return key;
559 }
560 
565 {
566  key += ~(key << 32);
567  key ^= (key >> 22);
568  key += ~(key << 13);
569  key ^= (key >> 8);
570  key += (key << 3);
571  key ^= (key >> 15);
572  key += ~(key << 27);
573  key ^= (key >> 31);
574  return key;
575 }
576 
581 {
582  return (key >> 32) ^ key;
583 }
584 
589 {
590  uint32 hash = 0u;
591 
592  #if defined(__CUDA_ARCH__)
593  #pragma unroll
594  #endif
595  for (uint32 i = 0; i < 8; ++i)
596  {
597  hash = (hash << 4) + ((key >> (i*8)) & 255u); // shift/mix
598 
599  // get high nybble
600  const uint32 hi_bits = hash & 0xF0000000;
601  if (hi_bits != 0u)
602  hash ^= hi_bits >> 24; // xor high nybble with second nybble
603 
604  hash &= ~hi_bits; // clear high nybble
605  }
606  return hash;
607 }
608 
609 #define NVBIO_RAND_A 1664525
610 #define NVBIO_RAND_C 1013904223
611 
615 {
616  static const uint32 MAX = 0xFFFFFFFF;
617 
619 
621 
623 };
624 
628 float radical_inverse(unsigned int n)
629 {
630  double result = 0.0;
631  unsigned int remainder;
632  unsigned int m, bj = 1;
633 
634  const unsigned int b = 2u;
635 
636  do
637  {
638  bj *= b;
639  m = n;
640  n /= b;
641 
642  remainder = m - n * b;
643 
644  result += double( remainder ) / double( bj );
645  } while (n > 0);
646 
647  return float(result);
648 };
649 
650 #if defined(__CUDA_ARCH__)
651 
653 uint8 min3(const uint8 op1, const uint8 op2, const uint8 op3)
654 {
655  uint32 r;
656  asm( " vmin.u32.u32.u32.min %0, %1, %2, %3;" : "=r"(r) : "r"(uint32(op1)), "r"(uint32(op2)), "r"(uint32(op3)) );
657  return r;
658 }
659 
661 uint32 min3(const uint32 op1, const uint32 op2, const uint32 op3)
662 {
663  uint32 r;
664  asm( " vmin.u32.u32.u32.min %0, %1, %2, %3;" : "=r"(r) : "r"(op1), "r"(op2), "r"(op3) );
665  return r;
666 }
667 
669 uint32 max3(const uint32 op1, const uint32 op2, const uint32 op3)
670 {
671  uint32 r;
672  asm( " vmax.u32.u32.u32.max %0, %1, %2, %3;" : "=r"(r) : "r"(op1), "r"(op2), "r"(op3) );
673  return r;
674 }
675 
677 int32 min3(const int32 op1, const int32 op2, const int32 op3)
678 {
679  uint32 r;
680  asm( " vmin.s32.s32.s32.min %0, %1, %2, %3;" : "=r"(r) : "r"(op1), "r"(op2), "r"(op3) );
681  return r;
682 }
683 
685 int32 max3(const int32 op1, const int32 op2, const int32 op3)
686 {
687  uint32 r;
688  asm( " vmax.s32.s32.s32.max %0, %1, %2, %3;" : "=r"(r) : "r"(op1), "r"(op2), "r"(op3) );
689  return r;
690 }
691 
692 #else
693 
695 uint8 min3(const uint8 op1, const uint8 op2, const uint8 op3)
696 {
697  return nvbio::min( op1, nvbio::min( op2, op3 ) );
698 }
699 
701 uint32 min3(const uint32 op1, const uint32 op2, const uint32 op3)
702 {
703  return nvbio::min( op1, nvbio::min( op2, op3 ) );
704 }
705 
707 uint32 max3(const uint32 op1, const uint32 op2, const uint32 op3)
708 {
709  return nvbio::max( op1, nvbio::max( op2, op3 ) );
710 }
712 int32 min3(const int32 op1, const int32 op2, const int32 op3)
713 {
714  return nvbio::min( op1, nvbio::min( op2, op3 ) );
715 }
716 
718 int32 max3(const int32 op1, const int32 op2, const int32 op3)
719 {
720  return nvbio::max( op1, nvbio::max( op2, op3 ) );
721 }
722 
723 #endif
724 
726 float min3(const float op1, const float op2, const float op3)
727 {
728  return nvbio::min( op1, nvbio::min( op2, op3 ) );
729 }
730 
732 float max3(const float op1, const float op2, const float op3)
733 {
734  return nvbio::max( op1, nvbio::max( op2, op3 ) );
735 }
736 
737 #ifdef __CUDA_ARCH__
738 
739 inline NVBIO_DEVICE float fast_pow(const float a, const float b)
740 {
741  return __powf(a,b);
742 }
743 inline NVBIO_DEVICE float fast_sin(const float x)
744 {
745  return __sinf(x);
746 }
747 inline NVBIO_DEVICE float fast_cos(const float x)
748 {
749  return __cosf(x);
750 }
751 inline NVBIO_DEVICE float fast_sqrt(const float x)
752 {
753  return __fsqrt_rn(x);
754 }
755 
756 #else
757 
758 inline NVBIO_HOST_DEVICE float fast_pow(const float a, const float b)
759 {
760  return ::powf(a,b);
761 }
762 inline NVBIO_HOST_DEVICE float fast_sin(const float x)
763 {
764  return sinf(x);
765 }
766 inline NVBIO_HOST_DEVICE float fast_cos(const float x)
767 {
768  return cosf(x);
769 }
770 inline NVBIO_HOST_DEVICE float fast_sqrt(const float x)
771 {
772  return sqrtf(x);
773 }
774 
775 #endif
776 
777 #ifdef __CUDACC__
778 inline NVBIO_DEVICE uint16 float_to_half(const float x) { return __float2half_rn(x); }
779 inline NVBIO_DEVICE float half_to_float(const uint32 h) { return __half2float(h); }
780 #endif
781 
786 template <typename T>
788 {
789 #ifdef __CUDACC__
790  NVBIO_HOST_DEVICE static T min() { return T(); }
793 
796  NVBIO_HOST_DEVICE static T max() { return T(); }
797 #else
798  static T min()
801  {
802  return std::numeric_limits<T>::is_integer ?
805  }
808  static T max() { return std::numeric_limits<T>::max(); }
809 #endif
810 };
811 
814 template <>
816 {
817  NVBIO_HOST_DEVICE static int8 min() { return -128; }
818  NVBIO_HOST_DEVICE static int8 max() { return 127; }
819 };
822 template <>
824 {
825  NVBIO_HOST_DEVICE static int16 min() { return -32768; }
826  NVBIO_HOST_DEVICE static int16 max() { return 32767; }
827 };
830 template <>
832 {
833  NVBIO_HOST_DEVICE static int32 min() { return -(1 << 30); }
834  NVBIO_HOST_DEVICE static int32 max() { return (1 << 30); }
835 };
838 template <>
840 {
841  NVBIO_HOST_DEVICE static int64 min() { return -(int64(1) << 62); }
842  NVBIO_HOST_DEVICE static int64 max() { return (int64(1) << 62); }
843 };
844 
845 #ifdef __CUDACC__
846 template <>
849 struct Field_traits<float>
850 {
851  NVBIO_HOST_DEVICE static float min() { return -float(1.0e+30f); }
852  NVBIO_HOST_DEVICE static float max() { return float(1.0e+30f); }
853 };
856 template <>
857 struct Field_traits<double>
858 {
859  NVBIO_HOST_DEVICE static double min() { return -double(1.0e+30); }
860  NVBIO_HOST_DEVICE static double max() { return double(1.0e+30); }
861 };
864 template <>
865 struct Field_traits<uint32>
866 {
867  NVBIO_HOST_DEVICE static uint32 min() { return 0; }
868  NVBIO_HOST_DEVICE static uint32 max() { return uint32(-1); }
869 };
872 template <>
873 struct Field_traits<uint64>
874 {
875  NVBIO_HOST_DEVICE static uint64 min() { return 0; }
876  NVBIO_HOST_DEVICE static uint64 max() { return uint64(-1); }
877 };
878 #endif
879 
884 
887 
890 template <typename T>
891 struct Constant
892 {
894  Constant(const T k) : m_k(k) {}
895 
896  template <typename U>
898  T operator() (const U x) const { return m_k; }
899 
900  const T m_k;
901 };
902 
905 template <typename T>
906 struct Linear
907 {
909  Linear(const T m) : m_m(m) {}
910 
911  template <typename U>
913  T operator() (const U x) const { return T(m_m * x); }
914 
915  const T m_m;
916 };
917 
920 template <typename T>
921 struct Affine
922 {
924  Affine(const T k, const T m) : m_k(k), m_m(m) {}
925 
926  template <typename U>
928  T operator() (const U x) const { return m_k + T(m_m * x); }
929 
930  const T m_k;
931  const T m_m;
932 };
933 
937 {
938  template <typename T>
940  T operator() (const T op1, const T op2) const { return op1 + op2; }
941 };
945 {
946  template <typename T>
948  T operator() (const T op1, const T op2) const { return nvbio::min( op1, op2 ); }
949 };
953 {
954  template <typename T>
956  T operator() (const T op1, const T op2) const { return nvbio::max( op1, op2 ); }
957 };
958 
961 template <typename T>
963 {
964  typedef T argument_type;
966 
968  component_functor(const uint32 c) : m_c( c ) {}
969 
971  result_type operator() (const argument_type op) const { return comp( op, m_c ); }
972 
974 };
975 
978 template <typename word_type>
980 {
981  typedef word_type argument_type;
982  typedef word_type result_type;
983 
984  static const uint32 BITS = 8u*sizeof(word_type);
985 
989  leading_bits(const uint32 n) : n_bits( n ) {}
990 
994  result_type operator() (const argument_type op) const { return op >> (BITS - n_bits); }
995 
996  const uint32 n_bits;
997 };
998 
1001 template <typename word_type>
1003 {
1004  typedef word_type argument_type;
1005  typedef word_type result_type;
1006 
1010  shift_left(const uint32 _shift) : shift(_shift) {}
1011 
1016  {
1017  // shift i by d bits
1018  return result_type(i) << shift;
1019  }
1020 
1021  const uint32 shift;
1022 };
1023 
1026 template <typename word_type>
1028 {
1029  typedef word_type argument_type;
1030  typedef word_type result_type;
1031 
1035  shift_right(const uint32 _shift) : shift(_shift) {}
1036 
1041  {
1042  // shift i by d bits
1043  return result_type(i) >> shift;
1044  }
1045 
1046  const uint32 shift;
1047 };
1048 
1051 template <typename word_type>
1053 {
1054  typedef word_type argument_type;
1056 
1060  result_type operator() (const argument_type op) const { return popc( op ); }
1061 };
1062 
1065 template <typename T, typename U>
1067 
1070 template <>
1072 {
1075 
1077  result_type operator() (const argument_type op) const { return result_type( op >> 24u ); }
1078 };
1079 
1082 template <>
1084 {
1087 
1089  result_type operator() (const argument_type op) const { return result_type( op >> 16u ); }
1090 };
1091 
1094 template <>
1096 {
1099 
1101  result_type operator() (const argument_type op) const { return result_type( op ); }
1102 };
1103 
1106 template <>
1108 {
1111 
1113  result_type operator() (const argument_type op) const { return result_type( op >> 32 ); }
1114 };
1115 
1118 template <typename T>
1120 {
1121  typedef T argument_type;
1122  typedef char result_type;
1123 
1125  get_char_functor(const uint32 i) : m_i( i ) {}
1126 
1128  result_type operator() (const argument_type op) const { return result_type( op[m_i] ); }
1129 
1131 };
1132 
1135 template <typename T>
1137 {
1138  typedef T argument_type;
1139  typedef bool result_type;
1140 
1142  result_type operator() (const T op) const { return op ? true : false; }
1143 };
1144 
1147 template <typename T>
1149 {
1150  typedef T argument_type;
1151  typedef bool result_type;
1152 
1154  result_type operator() (const T op) const { return op ? false : true; }
1155 };
1156 
1159 template <typename T>
1161 {
1162  typedef T argument_type;
1163  typedef bool result_type;
1164 
1166  equal_to_functor(const T k) : m_k( k ) {}
1167 
1169  result_type operator() (const T op) const { return op == m_k; }
1170 
1171  const T m_k;
1172 };
1173 
1176 template <typename T>
1178 {
1179  typedef T argument_type;
1180  typedef bool result_type;
1181 
1183  not_equal_to_functor(const T k) : m_k( k ) {}
1184 
1186  result_type operator() (const T op) const { return op != m_k; }
1187 
1188  const T m_k;
1189 };
1190 
1193 template <typename T>
1195 {
1198  typedef bool result_type;
1199 
1201  result_type operator() (const T op1, const T op2) const { return op1 == op2; }
1202 };
1203 
1206 template <typename T>
1208 {
1211  typedef bool result_type;
1212 
1214  result_type operator() (const T op1, const T op2) const { return op1 != op2; }
1215 };
1216 
1219 template <typename Iterator, typename index_type = uint32>
1221 {
1222  typedef index_type argument_type;
1223  typedef typename std::iterator_traits<Iterator>::value_type result_type;
1224 
1226  gather_functor(const Iterator perm) : m_perm( perm ) {}
1227 
1229  result_type operator() (const argument_type op) const { return m_perm[ op ]; }
1230 
1231  Iterator m_perm;
1232 };
1233 template <typename Iterator>
1235 {
1236  return gather_functor<Iterator>( perm );
1237 }
1238 
1241 template <typename Functor2, typename Functor1>
1243 {
1244  typedef typename Functor1::argument_type argument_type;
1245  typedef typename Functor2::result_type result_type;
1246 
1248  composition_functor(const Functor2 fun2, const Functor1 fun1) : m_fun1( fun1 ), m_fun2( fun2 ) {}
1249 
1251  result_type operator() (const argument_type op) const { return m_fun2( m_fun1( op ) ); }
1252 
1253  Functor1 m_fun1;
1254  Functor2 m_fun2;
1255 };
1256 template <typename Functor2, typename Functor1>
1257 composition_functor<Functor2,Functor1> make_composition_functor(const Functor2 fun2, const Functor1 fun1)
1258 {
1259  return composition_functor<Functor2,Functor1>( fun2, fun1 );
1260 }
1261 
1264 template <typename Functor>
1266 {
1267  typedef typename Functor::second_argument_type argument_type;
1268  typedef typename Functor::first_argument_type const_type;
1269  typedef typename Functor::result_type result_type;
1270 
1272  bind_first_functor(const const_type c) : m_fun(), m_c( c ) {}
1273 
1275  bind_first_functor(const Functor fun, const const_type c) : m_fun( fun ), m_c( c ) {}
1276 
1278  result_type operator() (const argument_type op) const { return m_fun( m_c, op ); }
1279 
1280  Functor m_fun;
1282 };
1283 
1286 template <typename Functor>
1288 {
1289  typedef typename Functor::first_argument_type argument_type;
1290  typedef typename Functor::second_argument_type const_type;
1291  typedef typename Functor::result_type result_type;
1292 
1294  bind_second_functor(const const_type c) : m_fun(), m_c( c ) {}
1295 
1297  bind_second_functor(const Functor fun, const const_type c) : m_fun( fun ), m_c( c ) {}
1298 
1300  result_type operator() (const argument_type op) const { return m_fun( op, m_c ); }
1301 
1302  Functor m_fun;
1304 };
1305 
1308 template <typename IndexType>
1310 {
1311  typedef IndexType index_type;
1312 
1317 
1321  reverse_functor(const index_type len) : m_len( len ) {}
1322 
1326  index_type operator() (const index_type i) const { return m_len - i - 1; }
1327 
1329 };
1330 
1336 template <uint32 ALPHABET_SIZE>
1338 {
1341 
1346 
1350  uint8 operator() (const uint8 c) const { return c >= ALPHABET_SIZE ? c : uint8(ALPHABET_SIZE-1) - c; }
1351 };
1352 
1355 template <typename T,typename R>
1357 {
1358  typedef T argument_type;
1359  typedef R result_type;
1360 
1364  R operator() (const T i) const { return R(i); }
1365 };
1366 
1369 template <typename InputFunctor>
1371 {
1372  typedef typename InputFunctor::argument_type argument_type;
1373  typedef typename InputFunctor::result_type result_type;
1374 
1375  negate_functor(const InputFunctor f = InputFunctor()) : m_f(f) {}
1376 
1378  result_type operator() (const argument_type op) const { return !m_f(op); }
1379 
1380  const InputFunctor m_f;
1381 };
1382 
1386 
1387 } // namespace nvbio