NVBIO
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
paged_text.cpp
Go to the documentation of this file.
1 /*
2  * nvbio
3  * Copyright (c) 2011-2014, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
29 #include <nvbio/basic/popcount.h>
30 #include <nvbio/basic/algorithms.h>
31 #include <nvbio/basic/vector.h>
32 #include <nvbio/basic/numbers.h>
33 #include <nvbio/basic/primitives.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 
38 #if defined(PLATFORM_X86)
39 //#define SSE_LOADS
40 //#define SSE_MATH
41 #endif
42 
43 #if defined(SSE_LOADS)
44 #include <emmintrin.h>
45 #include <smmintrin.h>
46 #endif
47 
48 namespace nvbio {
49 
50 // build a set of buckets pointing to the lower/upper bounds of a sequence of keys
51 //
52 void build_buckets(const uint64 key_range, const uint32 n_keys, const uint64* keys, const uint32 bucket_size, nvbio::vector<host_tag,uint32>& buckets, const bool upper)
53 {
54  const uint32 n_buckets = (uint32)util::divide_ri( key_range, bucket_size ) + 1u;
55  buckets.resize( n_buckets );
56 
57  #pragma omp parallel for
58  for (int32 i = 0; i < int32( n_buckets ); ++i)
59  {
60  buckets[i] = upper ?
61  upper_bound_index( uint64(i) * bucket_size, keys, n_keys ) :
62  lower_bound_index( uint64(i) * bucket_size, keys, n_keys );
63  }
64 }
65 
66 // count the number of occurrences of a given 2-bit pattern in a given word
67 //
68 template <uint8 c>
71 {
72  const uint64 odd = ((c&2)? x : ~x) >> 1;
73  const uint64 even = ((c&1)? x : ~x);
74  const uint64 mask = odd & even & 0x5555555555555555U;
75  return popc(mask);
76 }
77 
78 // count the number of occurrences of a given 2-bit pattern in all but the first 'i' symbols
79 // of a 32-bit word mask.
80 //
81 template <uint8 c>
83 uint32 popc_2bit(const uint64 mask, const uint32 i)
84 {
85  const uint32 r = popc_2bit<c>( hibits_2bit( mask, i ) );
86 
87  // if the 2-bit pattern we're looking for is 0, we have to subtract
88  // the amount of symbols we added by masking
89  return (c == 0) ? r - i : r;
90 }
91 
92 #if defined(SSE_LOADS)
93 
94 // count the number of occurrences of a given 2-bit pattern in a given word
95 //
96 template <uint8 c>
98 uint32 popc_2bit(const __m128i x)
99 {
100 #if defined(SSE_MATH)
101  const __m128i ones = _mm_set_epi64x( (int64)0xFFFFFFFFFFFFFFFFull, (int64)0xFFFFFFFFFFFFFFFFull );
102  const __m128i fives = _mm_set_epi64x( (int64)0x5555555555555555ull, (int64)0x5555555555555555ull );
103  const __m128i odd = _mm_srli_epi64( ((c&2)? x : _mm_subs_epu8( ones, x )), 1 );
104  const __m128i even = ((c&1)? x : _mm_subs_epu8( ones, x ));
105  const __m128i mask = _mm_and_si128( _mm_and_si128( odd, even ), fives );
106  return popc( (uint64)_mm_extract_epi64( mask, 0 ) ) +
107  popc( (uint64)_mm_extract_epi64( mask, 1 ) );
108 #else
109  return popc_2bit<c>( (uint64)_mm_extract_epi64(x,0) ) +
110  popc_2bit<c>( (uint64)_mm_extract_epi64(x,1) );
111 #endif
112 }
113 
114 // given a 64-bit word encoding a set of 2-bit symbols, return a submask containing
115 // all but the first 'i' entries.
116 //
117 NVBIO_FORCEINLINE __m128i hibits_2bit(const __m128i mask, const uint32 w, const uint32 i)
118 {
119  const uint64 u = ~((uint64(1u) << (i<<1)) - 1u);
120  const __m128i m = _mm_set_epi64x(
121  (int64)(w == 0 ? u : 0xFFFFFFFFFFFFFFFFull),
122  (int64)(w == 1 ? u : 0x0000000000000000ull) );
123 
124  return _mm_and_si128( mask, m );
125 }
126 
127 // count the number of occurrences of a given 2-bit pattern in a given word
128 //
129 template <uint8 c>
131 uint32 popc_2bit(const __m128i mask, const uint32 w, const uint32 i)
132 {
133 #if 0 && defined(SSE_MATH)
134  const uint32 r = popc_2bit<c>( hibits_2bit( mask, w, i ) );
135 
136  // if the 2-bit pattern we're looking for is 0, we have to subtract
137  // the amount of symbols we added by masking
138  return (c == 0) ? r - i - w*32u : r;
139 #else
140  if (w == 0)
141  return popc_2bit<c>( (uint64)_mm_extract_epi64(mask,0), i );
142  else
143  return popc_2bit<c>( (uint64)_mm_extract_epi64(mask,0) ) +
144  popc_2bit<c>( (uint64)_mm_extract_epi64(mask,1), i );
145 #endif
146 }
147 
148 #endif
149 
150 uint32 popc_2bit(const uint64* page, const uint32 n, const uint32 mod, const uint32 c)
151 {
152 #if !defined(SSE_LOADS)
153  // sum up all the pop-counts of the relevant masks
154  uint32 out = 0u;
155 
156  for (uint32 j = 0; j < n; ++j)
157  out += popc_2bit( page[j], c );
158 
159  out += popc_2bit( page[n], c, mod );
160  return out;
161 #else
162  const __m128i* page_mm = reinterpret_cast<const __m128i*>( page );
163 
164  uint32 out = 0u;
165 
166  if (c == 0)
167  {
168  // sum up all the pop-counts of the relevant masks
169  for (uint32 j = 0; j < n/2; ++j)
170  out += popc_2bit<0>( _mm_load_si128( page_mm + j ) );
171 
172  out += popc_2bit<0>( _mm_load_si128( page_mm + n/2 ), n & 1, mod );
173  }
174  else if (c == 1)
175  {
176  // sum up all the pop-counts of the relevant masks
177  for (uint32 j = 0; j < n/2; ++j)
178  out += popc_2bit<1>( _mm_load_si128( page_mm + j ) );
179 
180  out += popc_2bit<1>( _mm_load_si128( page_mm + n/2 ), n & 1, mod );
181  }
182  else if (c == 2)
183  {
184  // sum up all the pop-counts of the relevant masks
185  for (uint32 j = 0; j < n/2; ++j)
186  out += popc_2bit<2>( _mm_load_si128( page_mm + j ) );
187 
188  out += popc_2bit<2>( _mm_load_si128( page_mm + n/2 ), n & 1, mod );
189  }
190  else
191  {
192  // sum up all the pop-counts of the relevant masks
193  for (uint32 j = 0; j < n/2; ++j)
194  out += popc_2bit<3>( _mm_load_si128( page_mm + j ) );
195 
196  out += popc_2bit<3>( _mm_load_si128( page_mm + n/2 ), n & 1, mod );
197  }
198  return out;
199 #endif
200 }
201 
202 void SparseSymbolSet::reserve(const uint64 n, const uint32 n_special)
203 {
204  m_pos.reserve( n_special );
205  m_new_pos.reserve( n_special );
206 
207  m_id.reserve( n_special );
208  m_new_id.reserve( n_special );
209 
210  m_buckets.reserve( (n / BUCKET_SIZE) + 1u );
211 }
212 
213 void SparseSymbolSet::set(const uint64 range, const uint32 n_special, const uint32* p, const uint32* id)
214 {
215  nvbio::vector<host_tag,uint8> h_temp_storage;
216 
217  m_pos.resize( n_special );
218  m_id.resize( n_special );
219 
221  p,
222  p + n_special,
223  m_pos.begin(),
225 
226  thrust::copy(
227  id,
228  id + n_special,
229  m_id.begin() );
230 
231  m_n_special = n_special;
232 
233  set_range( range );
234 }
235 
237  const uint64 range,
238  const uint32 n_block,
239  const uint64* g,
240  const uint32 n_special,
241  const uint32* p_special,
242  const uint64* g_special,
243  const uint32* id_special)
244 {
245  nvbio::vector<host_tag,uint8> h_temp_storage;
246 
247  // make room for the output dollars
248  m_new_pos.resize( m_n_special + n_special );
249  m_new_id.resize( m_n_special + n_special );
250 
251  // for each ext dollar, find how many insertions preceed it
252  #pragma omp parallel for
253  for (int64 i = 0; i < int64( m_n_special ); ++i)
254  {
255  const uint32 n = upper_bound_index( m_pos[i], &g[0], n_block );
256  m_pos[i] += n;
257  }
258 
259  // do a parallel merge
260  merge_by_key<host_tag>(
261  m_n_special,
262  n_special,
263  m_pos.begin(),
264  make_binary_transform_iterator( g_special, p_special, thrust::plus<uint64>() ),
265  m_id.begin(),
266  id_special,
267  m_new_pos.begin(),
268  m_new_id.begin(),
269  h_temp_storage );
270 
271  m_pos.swap( m_new_pos );
272  m_id.swap( m_new_id );
273 
274  m_n_special += n_special;
275 
276  set_range( range );
277 }
278 
279 // extend range
280 //
282 {
283  m_n = n;
284 
285  // build the buckets
287 }
288 
289 } // namespace nvbio