Fermat
|
This module provides a set of convenience wrappers to invoke device-wide CUB's parallel primitives without worrying about the memory management. All temporary storage is in fact allocated within a single thrust::device_vector passed by the user, which can be safely reused across function calls.
Functions | |
template<typename VectorType > | |
void | cugar::cuda::alloc_temp_storage (VectorType &vec, const uint64 size) |
template<typename PredicateIterator > | |
bool | cugar::cuda::any (const uint32 n, const PredicateIterator pred) |
template<typename PredicateIterator > | |
bool | cugar::cuda::all (const uint32 n, const PredicateIterator pred) |
template<typename Iterator > | |
bool | cugar::cuda::is_sorted (const uint32 n, const Iterator values) |
template<typename Iterator , typename Headflags > | |
bool | cugar::cuda::is_segment_sorted (const uint32 n, const Iterator values, const Headflags flags) |
template<typename InputIterator , typename BinaryOp > | |
std::iterator_traits< InputIterator >::value_type | cugar::cuda::reduce (const uint32 n, InputIterator d_in, BinaryOp op, thrust::device_vector< uint8 > &d_temp_storage) |
template<typename InputIterator , typename OutputIterator , typename BinaryOp > | |
void | cugar::cuda::inclusive_scan (const uint32 n, InputIterator d_in, OutputIterator d_out, BinaryOp op, thrust::device_vector< uint8 > &d_temp_storage) |
template<typename InputIterator , typename OutputIterator , typename BinaryOp , typename Identity > | |
void | cugar::cuda::exclusive_scan (const uint32 n, InputIterator d_in, OutputIterator d_out, BinaryOp op, Identity identity, thrust::device_vector< uint8 > &d_temp_storage) |
template<typename InputIterator , typename FlagsIterator , typename OutputIterator > | |
uint32 | cugar::cuda::copy_flagged (const uint32 n, InputIterator d_in, FlagsIterator d_flags, OutputIterator d_out, thrust::device_vector< uint8 > &d_temp_storage) |
template<typename InputIterator , typename OutputIterator , typename Predicate > | |
uint32 | cugar::cuda::copy_if (const uint32 n, InputIterator d_in, OutputIterator d_out, const Predicate pred, thrust::device_vector< uint8 > &d_temp_storage) |
template<typename InputIterator , typename OutputIterator , typename CountIterator > | |
uint32 | cugar::cuda::runlength_encode (const uint32 n, InputIterator d_in, OutputIterator d_out, CountIterator d_counts, thrust::device_vector< uint8 > &d_temp_storage) |
template<typename KeyIterator , typename ValueIterator , typename OutputKeyIterator , typename OutputValueIterator , typename ReductionOp > | |
uint32 | cugar::cuda::reduce_by_key (const uint32 n, KeyIterator d_keys_in, ValueIterator d_values_in, OutputKeyIterator d_keys_out, OutputValueIterator d_values_out, ReductionOp reduction_op, thrust::device_vector< uint8 > &d_temp_storage) |
bool cugar::cuda::all | ( | const uint32 | n, |
const PredicateIterator | pred | ||
) |
return true if all items in the range [0,n) evaluate to true
void cugar::cuda::alloc_temp_storage | ( | VectorType & | vec, |
const uint64 | size | ||
) |
make sure a given buffer is as big as size; note: upon reallocations, the contents of the buffer are invalidated
bool cugar::cuda::any | ( | const uint32 | n, |
const PredicateIterator | pred | ||
) |
return true if any item in the range [0,n) evaluates to true
uint32 cugar::cuda::copy_flagged | ( | const uint32 | n, |
InputIterator | d_in, | ||
FlagsIterator | d_flags, | ||
OutputIterator | d_out, | ||
thrust::device_vector< uint8 > & | d_temp_storage | ||
) |
device-wide copy of flagged items
n | number of input items |
d_in | a device input iterator |
d_flags | a device flags iterator |
d_out | a device output iterator |
d_temp_storage | some temporary storage |
uint32 cugar::cuda::copy_if | ( | const uint32 | n, |
InputIterator | d_in, | ||
OutputIterator | d_out, | ||
const Predicate | pred, | ||
thrust::device_vector< uint8 > & | d_temp_storage | ||
) |
device-wide copy of predicated items
n | number of input items |
d_in | a device input iterator |
d_out | a device output iterator |
pred | a unary predicate functor |
d_temp_storage | some temporary storage |
void cugar::cuda::exclusive_scan | ( | const uint32 | n, |
InputIterator | d_in, | ||
OutputIterator | d_out, | ||
BinaryOp | op, | ||
Identity | identity, | ||
thrust::device_vector< uint8 > & | d_temp_storage | ||
) |
device-wide exclusive scan
n | number of items to reduce |
d_in | a device input iterator |
d_out | a device output iterator |
op | the binary reduction operator |
identity | the identity element |
d_temp_storage | some temporary storage |
void cugar::cuda::inclusive_scan | ( | const uint32 | n, |
InputIterator | d_in, | ||
OutputIterator | d_out, | ||
BinaryOp | op, | ||
thrust::device_vector< uint8 > & | d_temp_storage | ||
) |
device-wide inclusive scan
n | number of items to reduce |
d_in | a device input iterator |
d_out | a device output iterator |
op | the binary reduction operator |
d_temp_storage | some temporary storage |
bool cugar::cuda::is_segment_sorted | ( | const uint32 | n, |
const Iterator | values, | ||
const Headflags | flags | ||
) |
return true if the items in the range [0,n) are sorted by segment, where the beginning of each segment is identified by a set head flag
bool cugar::cuda::is_sorted | ( | const uint32 | n, |
const Iterator | values | ||
) |
return true if the items in the range [0,n) are sorted
std::iterator_traits< InputIterator >::value_type cugar::cuda::reduce | ( | const uint32 | n, |
InputIterator | d_in, | ||
BinaryOp | op, | ||
thrust::device_vector< uint8 > & | d_temp_storage | ||
) |
device-wide reduce
n | number of items to reduce |
d_in | a device iterator |
op | the binary reduction operator |
d_temp_storage | some temporary storage |
uint32 cugar::cuda::reduce_by_key | ( | const uint32 | n, |
KeyIterator | d_keys_in, | ||
ValueIterator | d_values_in, | ||
OutputKeyIterator | d_keys_out, | ||
OutputValueIterator | d_values_out, | ||
ReductionOp | reduction_op, | ||
thrust::device_vector< uint8 > & | d_temp_storage | ||
) |
device-wide run-length encode
n | number of input items |
d_keys_in | a device input iterator |
d_values_in | a device input iterator |
d_keys_out | a device output iterator |
d_values_out | a device output iterator |
reduction_op | a reduction operator |
d_temp_storage | some temporary storage |
uint32 cugar::cuda::runlength_encode | ( | const uint32 | n, |
InputIterator | d_in, | ||
OutputIterator | d_out, | ||
CountIterator | d_counts, | ||
thrust::device_vector< uint8 > & | d_temp_storage | ||
) |
device-wide run-length encode
n | number of input items |
d_in | a device input iterator |
d_out | a device output iterator |
d_counts | a device output count iterator |
d_temp_storage | some temporary storage |