NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
popcount_inl.h
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #pragma once
29 
30 namespace nvbio {
31 
32 #ifdef __CUDACC__
33 NVBIO_FORCEINLINE NVBIO_DEVICE uint32 device_popc(const uint32 i) { return __popc(i); }
34 #endif
35 
36 #ifdef __CUDACC__
37 NVBIO_FORCEINLINE NVBIO_DEVICE uint32 device_popc(const uint64 i) { return __popcll(i); }
38 #endif
39 
40 // int32 popcount
41 //
43 {
44  return popc(uint32(i));
45 }
46 // uint32 popcount
47 //
49 {
50 #if defined(NVBIO_DEVICE_COMPILATION)
51  return device_popc( i );
52 #elif defined(__GNUC__)
53  return __builtin_popcount( i );
54 #else
55  uint32 v = i;
56  v = v - ((v >> 1) & 0x55555555);
57  v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
58  v = (v + (v >> 4)) & 0x0F0F0F0F;
59  return (v * 0x01010101) >> 24;
60 #endif
61 }
62 
63 // uint64 popcount
64 //
66 {
67 #if defined(NVBIO_DEVICE_COMPILATION)
68  return device_popc( i );
69 #elif defined(__GNUC__)
70  return __builtin_popcountll( i );
71 #else
72  //return popc( uint32(i & 0xFFFFFFFFU) ) + popc( uint32(i >> 32) );
73  uint64 v = i;
74  v = v - ((v >> 1) & 0x5555555555555555U);
75  v = (v & 0x3333333333333333U) + ((v >> 2) & 0x3333333333333333U);
76  v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FU;
77  return (v * 0x0101010101010101U) >> 56;
78 #endif
79 }
80 
81 // uint8 popcount
82 //
84 {
85  const uint32 lut[16] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
86  return lut[ i & 0x0F ] + lut[ i >> 4 ];
87 }
88 
89 // find the n-th bit set in a 4-bit mask (n in [1,4])
90 //
92 {
93  const uint32 popc0 = (mask & 1u);
94  const uint32 popc1 = popc0 + ((mask >> 1u) & 1u);
95  const uint32 popc2 = popc1 + ((mask >> 2u) & 1u);
96  return (n <= popc1) ?
97  (n == popc0) ? 0u : 1u :
98  (n == popc2) ? 2u : 3u;
99 }
100 
101 // compute the pop-count of 4-bit mask
102 //
104 {
105  return
106  (mask & 1u) +
107  ((mask >> 1u) & 1u) +
108  ((mask >> 2u) & 1u) +
109  ((mask >> 3u) & 1u);
110 }
111 
112 // find the n-th bit set in a 8-bit mask (n in [1,8])
113 //
115 {
116  const uint32 popc_half = popc4( mask );
117  uint32 _mask;
118  uint32 _n;
119  uint32 _r;
120 
121  if (n <= popc_half)
122  {
123  _mask = mask;
124  _n = n;
125  _r = 0u;
126  }
127  else
128  {
129  _mask = mask >> 4u;
130  _n = n - popc_half;
131  _r = 4u;
132  }
133  return find_nthbit4( _mask, _n ) + _r;
134 }
135 
136 // find the n-th bit set in a 16-bit mask (n in [1,16])
137 //
139 {
140  const uint32 popc_half = popc( mask & 0xFu );
141 
142  uint32 _mask;
143  uint32 _n;
144  uint32 _r;
145 
146  if (n <= popc_half)
147  {
148  _mask = mask;
149  _n = n;
150  _r = 0u;
151  }
152  else
153  {
154  _mask = mask >> 8u;
155  _n = n - popc_half;
156  _r = 8u;
157  }
158  return find_nthbit8( _mask, _n ) + _r;
159 }
160 
161 // find the n-th bit set in a 32-bit mask (n in [1,32])
162 //
164 {
165  const uint32 popc_half = popc( mask & 0xFFu );
166 
167  uint32 _mask;
168  uint32 _n;
169  uint32 _r;
170 
171  if (n <= popc_half)
172  {
173  _mask = mask;
174  _n = n;
175  _r = 0u;
176  }
177  else
178  {
179  _mask = mask >> 16u;
180  _n = n - popc_half;
181  _r = 16u;
182  }
183  return find_nthbit16( _mask, _n ) + _r;
184 }
185 
186 // find the n-th bit set in a 8-bit mask (n in [1,8])
187 //
189 {
190  return find_nthbit8( mask, n );
191 }
192 
193 // find the n-th bit set in a 16-bit mask (n in [1,16])
194 //
196 {
197  return find_nthbit16( mask, n );
198 }
199 
200 #if defined(NVBIO_DEVICE_COMPILATION)
201 NVBIO_FORCEINLINE NVBIO_DEVICE uint32 ffs_device(const int32 x) { return __ffs(x); }
202 NVBIO_FORCEINLINE NVBIO_DEVICE uint32 lzc_device(const int32 x) { return __clz(x); }
203 #endif
204 
205 // find the least significant bit set
206 //
208 {
209 #if defined(NVBIO_DEVICE_COMPILATION)
210  return ffs_device(x);
211 #elif defined(__GNUC__)
212  return __builtin_ffs(x);
213 #else
214  return x ? popc(x ^ (~(-x))) : 0u;
215 #endif
216 }
217 
218 // count the number of leading zeros
219 //
221 {
222 #if defined(NVBIO_DEVICE_COMPILATION)
223  return lzc_device(x);
224 #elif defined(__GNUC__)
225  return __builtin_clz(x);
226 #else
227  uint32 y = x;
228  y |= (y >> 1);
229  y |= (y >> 2);
230  y |= (y >> 4);
231  y |= (y >> 8);
232  y |= (y >> 16);
233  return (32u - popc(y));
234 #endif
235 }
236 
237 // count the number of occurrences of a given 2-bit pattern in a given word
238 //
240 {
241  const uint32 odd = ((c&2)? x : ~x) >> 1;
242  const uint32 even = ((c&1)? x : ~x);
243  const uint32 mask = odd & even & 0x55555555;
244  return popc(mask);
245 }
246 
247 // count the number of occurrences of a given 2-bit pattern in a given word
248 //
250 {
251  const uint64 odd = ((c&2)? x : ~x) >> 1;
252  const uint64 even = ((c&1)? x : ~x);
253  const uint64 mask = odd & even & 0x5555555555555555U;
254  return popc(mask);
255 }
256 
257 // count the number of occurrences of all 2-bit patterns in a given word,
258 // using an auxiliary table.
259 //
260 // \param count_table auxiliary table to perform the parallel bit-counting
261 // for all integers in the range [0,255].
262 //
263 // \return the 4 pop counts shifted and OR'ed together
264 //
265 template <typename CountTable>
267  const uint32 b,
268  const CountTable count_table)
269 {
270 #if 1
271  return
272  count_table[(b)&0xff] + count_table[(b)>>8&0xff] +
273  count_table[(b)>>16&0xff] + count_table[(b)>>24];
274 #elif 0
275  return (1u << ((b&3) << 3)) +
276  (1u << ((b>>2&3) << 3)) +
277  (1u << ((b>>4&3) << 3)) +
278  (1u << ((b>>6&3) << 3)) +
279  (1u << ((b>>8&3) << 3)) +
280  (1u << ((b>>10&3) << 3)) +
281  (1u << ((b>>12&3) << 3)) +
282  (1u << ((b>>14&3) << 3)) +
283  (1u << ((b>>16&3) << 3)) +
284  (1u << ((b>>18&3) << 3)) +
285  (1u << ((b>>20&3) << 3)) +
286  (1u << ((b>>22&3) << 3)) +
287  (1u << ((b>>24&3) << 3)) +
288  (1u << ((b>>26&3) << 3)) +
289  (1u << ((b>>28&3) << 3)) +
290  (1u << ((b>>30) << 3));
291 #else
292  const uint32 n1 = popc( ~b >> 1 & ~b & 0x55555555 );
293  const uint32 n2 = popc( ~b >> 1 & b & 0x55555555 );
294  const uint32 n3 = popc( b >> 1 & ~b & 0x55555555 );
295  const uint32 n4 = 16u - n1 - n2 - n3;
296 
297  return n1 +
298  (n2 << 8) +
299  (n3 << 16) +
300  (n4 << 24);
301 #endif
302 }
303 
304 // count the number of occurrences of all 2-bit patterns in a given word,
305 // using an auxiliary table.
306 //
307 // \param count_table auxiliary table to perform the parallel bit-counting
308 // for all integers in the range [0,255].
309 //
310 // \return the 4 pop counts shifted and OR'ed together
311 //
312 template <typename CountTable>
314  const uint64 b,
315  const CountTable count_table)
316 {
317  return
318  count_table[(b)&0xff] + count_table[(b)>>8&0xff] +
319  count_table[(b)>>16&0xff] + count_table[(b)>>24&0xff] +
320  count_table[(b)>>32&0xff] + count_table[(b)>>40&0xff] +
321  count_table[(b)>>48&0xff] + count_table[(b)>>56];
322 }
323 
324 // given a 32-bit word encoding a set of 2-bit symbols, return a submask containing
325 // all but the first 'i' entries.
326 //
328 {
329  return mask & ~((1u<<(i<<1)) - 1u);
330 }
331 
332 // given a 64-bit word encoding a set of 2-bit symbols, return a submask containing
333 // all but the first 'i' entries.
334 //
336 {
337  return mask & ~((uint64(1u)<<(i<<1)) - 1u);
338 }
339 
340 // count the number of occurrences of a given 2-bit pattern in all but the first 'i' symbols
341 // of a 32-bit word mask.
342 //
344 {
345  const uint32 r = popc_2bit( hibits_2bit( mask, i ), c );
346 
347  // if the 2-bit pattern we're looking for is 0, we have to subtract
348  // the amount of symbols we added by masking
349  return (c == 0) ? r - i : r;
350 }
351 
352 // count the number of occurrences of a given 2-bit pattern in all but the first 'i' symbols
353 // of a 32-bit word mask.
354 //
356 {
357  const uint32 r = popc_2bit( hibits_2bit( mask, i ), c );
358 
359  // if the 2-bit pattern we're looking for is 0, we have to subtract
360  // the amount of symbols we added by masking
361  return (c == 0) ? r - i : r;
362 }
363 
364 // count the number of occurrences of a given n-bit pattern in a given word
365 //
366 template <uint32 N>
368 {
369  if (N == 1)
370  {
371  const uint32 r = popc(x);
372  return c ? r : 32u - r;
373  }
374  else if (N == 2)
375  return popc_2bit(x,c);
376  else if (N == 4)
377  {
378  // use brute-force - TODO: find a clever algorithm
379  return (((x >> 0) & 15u) == c) +
380  (((x >> 4) & 15u) == c) +
381  (((x >> 8) & 15u) == c) +
382  (((x >> 12) & 15u) == c) +
383  (((x >> 16) & 15u) == c) +
384  (((x >> 20) & 15u) == c) +
385  (((x >> 24) & 15u) == c) +
386  (((x >> 28) & 15u) == c);
387  }
388  else if (N == 8)
389  {
390  // use brute-force - TODO: find a clever algorithm
391  return (((x >> 0) & 255u) == c) +
392  (((x >> 8) & 255u) == c) +
393  (((x >> 16) & 255u) == c) +
394  (((x >> 24) & 255u) == c);
395  }
396  return 0u;
397 }
398 
399 // count the number of occurrences of a given n-bit pattern in a given word
400 //
401 template <uint32 N>
403 {
404  if (N == 1)
405  {
406  const uint32 r = popc(x);
407  return c ? r : 64u - r;
408  }
409  else if (N == 2)
410  return popc_2bit(x,c);
411  else
412  {
413  return popc_nbit<N>( uint32(x), c ) +
414  popc_nbit<N>( uint32(x >> 32), c );
415  }
416 }
417 
418 // given a 32-bit word encoding a set of n-bit symbols, return a submask containing
419 // all but the first 'i' entries.
420 //
421 template <uint32 N>
423 {
424  const uint32 LOG_N = (N == 1) ? 0u :
425  (N == 2) ? 1u :
426  (N == 4) ? 2u :
427  3u;
428 
429  return mask & ~((1u<<(i<<LOG_N)) - 1u);
430 }
431 
432 // given a 64-bit word encoding a set of n-bit symbols, return a submask containing
433 // all but the first 'i' entries.
434 //
435 template <uint32 N>
437 {
438  const uint32 LOG_N = (N == 1) ? 1u :
439  (N == 2) ? 1u :
440  (N == 4) ? 2u :
441  3u;
442 
443  return mask & ~((uint64(1u)<<(i<<LOG_N)) - 1u);
444 }
445 
446 // count the number of occurrences of a given n-bit pattern in all but the first 'i' symbols
447 // of a 32-bit word mask.
448 //
449 template <uint32 N>
451 {
452  const uint32 r = popc_nbit<N>( hibits_nbit<N>( mask, i ), c );
453 
454  // if the n-bit pattern we're looking for is 0, we have to subtract
455  // the amount of symbols we added by masking
456  return (c == 0) ? r - i : r;
457 }
458 
459 // count the number of occurrences of a given n-bit pattern in all but the first 'i' symbols
460 // of a 32-bit word mask.
461 //
462 template <uint32 N>
464 {
465  const uint32 r = popc_nbit<N>( hibits_nbit<N>( mask, i ), c );
466 
467  // if the n-bit pattern we're looking for is 0, we have to subtract
468  // the amount of symbols we added by masking
469  return (c == 0) ? r - i : r;
470 }
471 
472 // count the number of occurrences of all 2-bit patterns in all but the first 'i' symbols
473 // of a given word, using an auxiliary table.
474 //
475 // \param count_table auxiliary table to perform the parallel bit-counting
476 // for all integers in the range [0,255].
477 //
478 // \return the 4 pop counts shifted and OR'ed together
479 //
480 template <typename CountTable>
482  const uint32 mask,
483  const CountTable count_table,
484  const uint32 i)
485 {
486  return popc_2bit_all( hibits_2bit( mask, i ), count_table ) - i;
487 }
488 
489 // count the number of occurrences of all 2-bit patterns in all but the first 'i' symbols
490 // of a given word, using an auxiliary table.
491 //
492 // \param count_table auxiliary table to perform the parallel bit-counting
493 // for all integers in the range [0,255].
494 //
495 // \return the 4 pop counts shifted and OR'ed together
496 //
497 template <typename CountTable>
499  const uint64 mask,
500  const CountTable count_table,
501  const uint32 i)
502 {
503  return popc_2bit_all( hibits_2bit( mask, i ), count_table ) - i;
504 }
505 
506 } // namespace nvbio