50 #if defined(NVBIO_DEVICE_COMPILATION)
51 return device_popc( i );
52 #elif defined(__GNUC__)
53 return __builtin_popcount( i );
56 v = v - ((v >> 1) & 0x55555555);
57 v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
58 v = (v + (v >> 4)) & 0x0F0F0F0F;
59 return (v * 0x01010101) >> 24;
67 #if defined(NVBIO_DEVICE_COMPILATION)
68 return device_popc( i );
69 #elif defined(__GNUC__)
70 return __builtin_popcountll( i );
74 v = v - ((v >> 1) & 0x5555555555555555U);
75 v = (v & 0x3333333333333333U) + ((v >> 2) & 0x3333333333333333U);
76 v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FU;
77 return (v * 0x0101010101010101U) >> 56;
85 const uint32 lut[16] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
86 return lut[ i & 0x0F ] + lut[ i >> 4 ];
93 const uint32 popc0 = (mask & 1u);
94 const uint32 popc1 = popc0 + ((mask >> 1u) & 1u);
95 const uint32 popc2 = popc1 + ((mask >> 2u) & 1u);
97 (n == popc0) ? 0u : 1u :
98 (n == popc2) ? 2u : 3u;
107 ((mask >> 1u) & 1u) +
108 ((mask >> 2u) & 1u) +
165 const uint32 popc_half =
popc( mask & 0xFFu );
200 #if defined(NVBIO_DEVICE_COMPILATION)
209 #if defined(NVBIO_DEVICE_COMPILATION)
210 return ffs_device(x);
211 #elif defined(__GNUC__)
212 return __builtin_ffs(x);
214 return x ?
popc(x ^ (~(-x))) : 0u;
222 #if defined(NVBIO_DEVICE_COMPILATION)
223 return lzc_device(x);
224 #elif defined(__GNUC__)
225 return __builtin_clz(x);
233 return (32u -
popc(y));
241 const uint32 odd = ((c&2)? x : ~x) >> 1;
242 const uint32 even = ((c&1)? x : ~x);
243 const uint32 mask = odd & even & 0x55555555;
251 const uint64 odd = ((c&2)? x : ~x) >> 1;
252 const uint64 even = ((c&1)? x : ~x);
253 const uint64 mask = odd & even & 0x5555555555555555U;
265 template <
typename CountTable>
268 const CountTable count_table)
272 count_table[(b)&0xff] + count_table[(b)>>8&0xff] +
273 count_table[(b)>>16&0xff] + count_table[(b)>>24];
275 return (1u << ((b&3) << 3)) +
276 (1u << ((b>>2&3) << 3)) +
277 (1u << ((b>>4&3) << 3)) +
278 (1u << ((b>>6&3) << 3)) +
279 (1u << ((b>>8&3) << 3)) +
280 (1u << ((b>>10&3) << 3)) +
281 (1u << ((b>>12&3) << 3)) +
282 (1u << ((b>>14&3) << 3)) +
283 (1u << ((b>>16&3) << 3)) +
284 (1u << ((b>>18&3) << 3)) +
285 (1u << ((b>>20&3) << 3)) +
286 (1u << ((b>>22&3) << 3)) +
287 (1u << ((b>>24&3) << 3)) +
288 (1u << ((b>>26&3) << 3)) +
289 (1u << ((b>>28&3) << 3)) +
290 (1u << ((b>>30) << 3));
292 const uint32 n1 =
popc( ~b >> 1 & ~b & 0x55555555 );
293 const uint32 n2 =
popc( ~b >> 1 & b & 0x55555555 );
294 const uint32 n3 =
popc( b >> 1 & ~b & 0x55555555 );
295 const uint32 n4 = 16u - n1 - n2 - n3;
312 template <
typename CountTable>
315 const CountTable count_table)
318 count_table[(b)&0xff] + count_table[(b)>>8&0xff] +
319 count_table[(b)>>16&0xff] + count_table[(b)>>24&0xff] +
320 count_table[(b)>>32&0xff] + count_table[(b)>>40&0xff] +
321 count_table[(b)>>48&0xff] + count_table[(b)>>56];
329 return mask & ~((1u<<(i<<1)) - 1u);
337 return mask & ~((
uint64(1u)<<(i<<1)) - 1u);
349 return (c == 0) ? r - i : r;
361 return (c == 0) ? r - i : r;
372 return c ? r : 32u - r;
379 return (((x >> 0) & 15u) == c) +
380 (((x >> 4) & 15u) == c) +
381 (((x >> 8) & 15u) == c) +
382 (((x >> 12) & 15u) == c) +
383 (((x >> 16) & 15u) == c) +
384 (((x >> 20) & 15u) == c) +
385 (((x >> 24) & 15u) == c) +
386 (((x >> 28) & 15u) == c);
391 return (((x >> 0) & 255u) == c) +
392 (((x >> 8) & 255u) == c) +
393 (((x >> 16) & 255u) == c) +
394 (((x >> 24) & 255u) == c);
407 return c ? r : 64u - r;
413 return popc_nbit<N>(
uint32(x), c ) +
414 popc_nbit<N>(
uint32(x >> 32), c );
424 const uint32 LOG_N = (N == 1) ? 0u :
429 return mask & ~((1u<<(i<<LOG_N)) - 1u);
438 const uint32 LOG_N = (N == 1) ? 1u :
443 return mask & ~((
uint64(1u)<<(i<<LOG_N)) - 1u);
452 const uint32 r = popc_nbit<N>( hibits_nbit<N>( mask, i ), c );
456 return (c == 0) ? r - i : r;
465 const uint32 r = popc_nbit<N>( hibits_nbit<N>( mask, i ), c );
469 return (c == 0) ? r - i : r;
480 template <
typename CountTable>
483 const CountTable count_table,
497 template <
typename CountTable>
500 const CountTable count_table,