|
Fermat
|
This module provides a set of convenience wrappers to invoke device-wide CUB's parallel primitives without worrying about the memory management. All temporary storage is in fact allocated within a single thrust::device_vector passed by the user, which can be safely reused across function calls.
Functions | |
| template<typename VectorType > | |
| void | cugar::cuda::alloc_temp_storage (VectorType &vec, const uint64 size) |
| template<typename PredicateIterator > | |
| bool | cugar::cuda::any (const uint32 n, const PredicateIterator pred) |
| template<typename PredicateIterator > | |
| bool | cugar::cuda::all (const uint32 n, const PredicateIterator pred) |
| template<typename Iterator > | |
| bool | cugar::cuda::is_sorted (const uint32 n, const Iterator values) |
| template<typename Iterator , typename Headflags > | |
| bool | cugar::cuda::is_segment_sorted (const uint32 n, const Iterator values, const Headflags flags) |
| template<typename InputIterator , typename BinaryOp > | |
| std::iterator_traits< InputIterator >::value_type | cugar::cuda::reduce (const uint32 n, InputIterator d_in, BinaryOp op, thrust::device_vector< uint8 > &d_temp_storage) |
| template<typename InputIterator , typename OutputIterator , typename BinaryOp > | |
| void | cugar::cuda::inclusive_scan (const uint32 n, InputIterator d_in, OutputIterator d_out, BinaryOp op, thrust::device_vector< uint8 > &d_temp_storage) |
| template<typename InputIterator , typename OutputIterator , typename BinaryOp , typename Identity > | |
| void | cugar::cuda::exclusive_scan (const uint32 n, InputIterator d_in, OutputIterator d_out, BinaryOp op, Identity identity, thrust::device_vector< uint8 > &d_temp_storage) |
| template<typename InputIterator , typename FlagsIterator , typename OutputIterator > | |
| uint32 | cugar::cuda::copy_flagged (const uint32 n, InputIterator d_in, FlagsIterator d_flags, OutputIterator d_out, thrust::device_vector< uint8 > &d_temp_storage) |
| template<typename InputIterator , typename OutputIterator , typename Predicate > | |
| uint32 | cugar::cuda::copy_if (const uint32 n, InputIterator d_in, OutputIterator d_out, const Predicate pred, thrust::device_vector< uint8 > &d_temp_storage) |
| template<typename InputIterator , typename OutputIterator , typename CountIterator > | |
| uint32 | cugar::cuda::runlength_encode (const uint32 n, InputIterator d_in, OutputIterator d_out, CountIterator d_counts, thrust::device_vector< uint8 > &d_temp_storage) |
| template<typename KeyIterator , typename ValueIterator , typename OutputKeyIterator , typename OutputValueIterator , typename ReductionOp > | |
| uint32 | cugar::cuda::reduce_by_key (const uint32 n, KeyIterator d_keys_in, ValueIterator d_values_in, OutputKeyIterator d_keys_out, OutputValueIterator d_values_out, ReductionOp reduction_op, thrust::device_vector< uint8 > &d_temp_storage) |
| bool cugar::cuda::all | ( | const uint32 | n, |
| const PredicateIterator | pred | ||
| ) |
return true if all items in the range [0,n) evaluate to true
| void cugar::cuda::alloc_temp_storage | ( | VectorType & | vec, |
| const uint64 | size | ||
| ) |
make sure a given buffer is as big as size; note: upon reallocations, the contents of the buffer are invalidated
| bool cugar::cuda::any | ( | const uint32 | n, |
| const PredicateIterator | pred | ||
| ) |
return true if any item in the range [0,n) evaluates to true
| uint32 cugar::cuda::copy_flagged | ( | const uint32 | n, |
| InputIterator | d_in, | ||
| FlagsIterator | d_flags, | ||
| OutputIterator | d_out, | ||
| thrust::device_vector< uint8 > & | d_temp_storage | ||
| ) |
device-wide copy of flagged items
| n | number of input items |
| d_in | a device input iterator |
| d_flags | a device flags iterator |
| d_out | a device output iterator |
| d_temp_storage | some temporary storage |
| uint32 cugar::cuda::copy_if | ( | const uint32 | n, |
| InputIterator | d_in, | ||
| OutputIterator | d_out, | ||
| const Predicate | pred, | ||
| thrust::device_vector< uint8 > & | d_temp_storage | ||
| ) |
device-wide copy of predicated items
| n | number of input items |
| d_in | a device input iterator |
| d_out | a device output iterator |
| pred | a unary predicate functor |
| d_temp_storage | some temporary storage |
| void cugar::cuda::exclusive_scan | ( | const uint32 | n, |
| InputIterator | d_in, | ||
| OutputIterator | d_out, | ||
| BinaryOp | op, | ||
| Identity | identity, | ||
| thrust::device_vector< uint8 > & | d_temp_storage | ||
| ) |
device-wide exclusive scan
| n | number of items to reduce |
| d_in | a device input iterator |
| d_out | a device output iterator |
| op | the binary reduction operator |
| identity | the identity element |
| d_temp_storage | some temporary storage |
| void cugar::cuda::inclusive_scan | ( | const uint32 | n, |
| InputIterator | d_in, | ||
| OutputIterator | d_out, | ||
| BinaryOp | op, | ||
| thrust::device_vector< uint8 > & | d_temp_storage | ||
| ) |
device-wide inclusive scan
| n | number of items to reduce |
| d_in | a device input iterator |
| d_out | a device output iterator |
| op | the binary reduction operator |
| d_temp_storage | some temporary storage |
| bool cugar::cuda::is_segment_sorted | ( | const uint32 | n, |
| const Iterator | values, | ||
| const Headflags | flags | ||
| ) |
return true if the items in the range [0,n) are sorted by segment, where the beginning of each segment is identified by a set head flag
| bool cugar::cuda::is_sorted | ( | const uint32 | n, |
| const Iterator | values | ||
| ) |
return true if the items in the range [0,n) are sorted
| std::iterator_traits< InputIterator >::value_type cugar::cuda::reduce | ( | const uint32 | n, |
| InputIterator | d_in, | ||
| BinaryOp | op, | ||
| thrust::device_vector< uint8 > & | d_temp_storage | ||
| ) |
device-wide reduce
| n | number of items to reduce |
| d_in | a device iterator |
| op | the binary reduction operator |
| d_temp_storage | some temporary storage |
| uint32 cugar::cuda::reduce_by_key | ( | const uint32 | n, |
| KeyIterator | d_keys_in, | ||
| ValueIterator | d_values_in, | ||
| OutputKeyIterator | d_keys_out, | ||
| OutputValueIterator | d_values_out, | ||
| ReductionOp | reduction_op, | ||
| thrust::device_vector< uint8 > & | d_temp_storage | ||
| ) |
device-wide run-length encode
| n | number of input items |
| d_keys_in | a device input iterator |
| d_values_in | a device input iterator |
| d_keys_out | a device output iterator |
| d_values_out | a device output iterator |
| reduction_op | a reduction operator |
| d_temp_storage | some temporary storage |
| uint32 cugar::cuda::runlength_encode | ( | const uint32 | n, |
| InputIterator | d_in, | ||
| OutputIterator | d_out, | ||
| CountIterator | d_counts, | ||
| thrust::device_vector< uint8 > & | d_temp_storage | ||
| ) |
device-wide run-length encode
| n | number of input items |
| d_in | a device input iterator |
| d_out | a device output iterator |
| d_counts | a device output count iterator |
| d_temp_storage | some temporary storage |
1.8.13