37 template <
typename VectorType>
40 if (vec.size() < size)
49 log_error(stderr,
"alloc_temp_storage() : allocation failed! (%llu entries / %llu bytes)\n", size, size *
sizeof(
typename VectorType::value_type));
57 template <
typename PredicateIterator>
61 const PredicateIterator pred,
64 const uint32 i = threadIdx.x + blockIdx.x * blockDim.x;
66 const bool p_i = (i < n ? pred[i] :
false);
67 const bool p = __syncthreads_or( p_i );
77 template <
typename PredicateIterator>
81 const PredicateIterator pred,
84 const uint32 i = threadIdx.x + blockIdx.x * blockDim.x;
86 const bool p_i = (i < n ? pred[i] :
true);
87 const bool p = __syncthreads_and( p_i );
97 template <
typename PredicateIterator>
100 const PredicateIterator pred)
102 const uint32 block_dim = 256;
105 thrust::device_vector<uint32> r( 1u, 0u );
113 template <
typename PredicateIterator>
116 const PredicateIterator pred)
118 const uint32 block_dim = 256;
121 thrust::device_vector<uint32> r( 1u, 1u );
129 template <
typename Iterator1,
typename Iterator2>
146 template <
typename Iterator1,
typename Iterator2,
typename Headflags>
164 template <
typename Iterator>
167 const Iterator values)
175 template <
typename Iterator,
typename Headflags>
178 const Iterator values,
179 const Headflags flags)
191 template <
typename InputIterator,
typename BinaryOp>
192 typename std::iterator_traits<InputIterator>::value_type
reduce(
196 thrust::device_vector<uint8>& d_temp_storage)
198 typedef typename std::iterator_traits<InputIterator>::value_type value_type;
200 thrust::device_vector<value_type> d_out(1);
202 size_t temp_bytes = 0;
204 cub::DeviceReduce::Reduce(
205 (
void*)NULL, temp_bytes,
214 cub::DeviceReduce::Reduce(
232 template <
typename InputIterator,
typename OutputIterator,
typename BinaryOp>
236 OutputIterator d_out,
238 thrust::device_vector<uint8>& d_temp_storage)
240 size_t temp_bytes = 0;
242 cub::DeviceScan::InclusiveScan(
243 (
void*)NULL, temp_bytes,
252 cub::DeviceScan::InclusiveScan(
269 template <
typename InputIterator,
typename OutputIterator,
typename BinaryOp,
typename Identity>
273 OutputIterator d_out,
276 thrust::device_vector<uint8>& d_temp_storage)
278 size_t temp_bytes = 0;
280 cub::DeviceScan::ExclusiveScan(
281 (
void*)NULL, temp_bytes,
291 cub::DeviceScan::ExclusiveScan(
310 template <
typename InputIterator,
typename FlagsIterator,
typename OutputIterator>
314 FlagsIterator d_flags,
315 OutputIterator d_out,
316 thrust::device_vector<uint8>& d_temp_storage)
318 size_t temp_bytes = 0;
319 thrust::device_vector<int> d_num_selected(1);
321 cub::DeviceSelect::Flagged(
322 (
void*)NULL, temp_bytes,
332 cub::DeviceSelect::Flagged(
340 return uint32( d_num_selected[0] );
353 template <
typename InputIterator,
typename OutputIterator,
typename Predicate>
357 OutputIterator d_out,
358 const Predicate pred,
359 thrust::device_vector<uint8>& d_temp_storage)
361 size_t temp_bytes = 0;
362 thrust::device_vector<int> d_num_selected(1);
364 cub::DeviceSelect::If(
365 (
void*)NULL, temp_bytes,
375 cub::DeviceSelect::If(
383 return uint32( d_num_selected[0] );
396 template <
typename InputIterator,
typename OutputIterator,
typename CountIterator>
400 OutputIterator d_out,
401 CountIterator d_counts,
402 thrust::device_vector<uint8>& d_temp_storage)
404 size_t temp_bytes = 0;
405 thrust::device_vector<int> d_num_selected(1);
407 cub::DeviceReduce::RunLengthEncode(
408 (
void*)NULL, temp_bytes,
418 cub::DeviceReduce::RunLengthEncode(
426 return uint32( d_num_selected[0] );
442 template <
typename KeyIterator,
typename ValueIterator,
typename OutputKeyIterator,
typename OutputValueIterator,
typename ReductionOp>
445 KeyIterator d_keys_in,
446 ValueIterator d_values_in,
447 OutputKeyIterator d_keys_out,
448 OutputValueIterator d_values_out,
449 ReductionOp reduction_op,
450 thrust::device_vector<uint8>& d_temp_storage)
452 size_t temp_bytes = 0;
453 thrust::device_vector<int> d_num_selected(1);
455 cub::DeviceReduce::ReduceByKey(
456 (
void*)NULL, temp_bytes,
468 cub::DeviceReduce::ReduceByKey(
478 return uint32( d_num_selected[0] );