Fermat
Functions
Parallel Primitives

Detailed Description

This module provides a set of convenience wrappers to invoke device-wide CUB's parallel primitives without worrying about the memory management. All temporary storage is in fact allocated within a single thrust::device_vector passed by the user, which can be safely reused across function calls.

Functions

template<typename VectorType >
void cugar::cuda::alloc_temp_storage (VectorType &vec, const uint64 size)
 
template<typename PredicateIterator >
bool cugar::cuda::any (const uint32 n, const PredicateIterator pred)
 
template<typename PredicateIterator >
bool cugar::cuda::all (const uint32 n, const PredicateIterator pred)
 
template<typename Iterator >
bool cugar::cuda::is_sorted (const uint32 n, const Iterator values)
 
template<typename Iterator , typename Headflags >
bool cugar::cuda::is_segment_sorted (const uint32 n, const Iterator values, const Headflags flags)
 
template<typename InputIterator , typename BinaryOp >
std::iterator_traits< InputIterator >::value_type cugar::cuda::reduce (const uint32 n, InputIterator d_in, BinaryOp op, thrust::device_vector< uint8 > &d_temp_storage)
 
template<typename InputIterator , typename OutputIterator , typename BinaryOp >
void cugar::cuda::inclusive_scan (const uint32 n, InputIterator d_in, OutputIterator d_out, BinaryOp op, thrust::device_vector< uint8 > &d_temp_storage)
 
template<typename InputIterator , typename OutputIterator , typename BinaryOp , typename Identity >
void cugar::cuda::exclusive_scan (const uint32 n, InputIterator d_in, OutputIterator d_out, BinaryOp op, Identity identity, thrust::device_vector< uint8 > &d_temp_storage)
 
template<typename InputIterator , typename FlagsIterator , typename OutputIterator >
uint32 cugar::cuda::copy_flagged (const uint32 n, InputIterator d_in, FlagsIterator d_flags, OutputIterator d_out, thrust::device_vector< uint8 > &d_temp_storage)
 
template<typename InputIterator , typename OutputIterator , typename Predicate >
uint32 cugar::cuda::copy_if (const uint32 n, InputIterator d_in, OutputIterator d_out, const Predicate pred, thrust::device_vector< uint8 > &d_temp_storage)
 
template<typename InputIterator , typename OutputIterator , typename CountIterator >
uint32 cugar::cuda::runlength_encode (const uint32 n, InputIterator d_in, OutputIterator d_out, CountIterator d_counts, thrust::device_vector< uint8 > &d_temp_storage)
 
template<typename KeyIterator , typename ValueIterator , typename OutputKeyIterator , typename OutputValueIterator , typename ReductionOp >
uint32 cugar::cuda::reduce_by_key (const uint32 n, KeyIterator d_keys_in, ValueIterator d_values_in, OutputKeyIterator d_keys_out, OutputValueIterator d_values_out, ReductionOp reduction_op, thrust::device_vector< uint8 > &d_temp_storage)
 

Function Documentation

◆ all()

template<typename PredicateIterator >
bool cugar::cuda::all ( const uint32  n,
const PredicateIterator  pred 
)

return true if all items in the range [0,n) evaluate to true

◆ alloc_temp_storage()

template<typename VectorType >
void cugar::cuda::alloc_temp_storage ( VectorType &  vec,
const uint64  size 
)

make sure a given buffer is as big as size; note: upon reallocations, the contents of the buffer are invalidated

◆ any()

template<typename PredicateIterator >
bool cugar::cuda::any ( const uint32  n,
const PredicateIterator  pred 
)

return true if any item in the range [0,n) evaluates to true

◆ copy_flagged()

template<typename InputIterator , typename FlagsIterator , typename OutputIterator >
uint32 cugar::cuda::copy_flagged ( const uint32  n,
InputIterator  d_in,
FlagsIterator  d_flags,
OutputIterator  d_out,
thrust::device_vector< uint8 > &  d_temp_storage 
)

device-wide copy of flagged items

Parameters
nnumber of input items
d_ina device input iterator
d_flagsa device flags iterator
d_outa device output iterator
d_temp_storagesome temporary storage
Returns
the number of copied items

◆ copy_if()

template<typename InputIterator , typename OutputIterator , typename Predicate >
uint32 cugar::cuda::copy_if ( const uint32  n,
InputIterator  d_in,
OutputIterator  d_out,
const Predicate  pred,
thrust::device_vector< uint8 > &  d_temp_storage 
)

device-wide copy of predicated items

Parameters
nnumber of input items
d_ina device input iterator
d_outa device output iterator
preda unary predicate functor
d_temp_storagesome temporary storage
Returns
the number of copied items

◆ exclusive_scan()

template<typename InputIterator , typename OutputIterator , typename BinaryOp , typename Identity >
void cugar::cuda::exclusive_scan ( const uint32  n,
InputIterator  d_in,
OutputIterator  d_out,
BinaryOp  op,
Identity  identity,
thrust::device_vector< uint8 > &  d_temp_storage 
)

device-wide exclusive scan

Parameters
nnumber of items to reduce
d_ina device input iterator
d_outa device output iterator
opthe binary reduction operator
identitythe identity element
d_temp_storagesome temporary storage

◆ inclusive_scan()

template<typename InputIterator , typename OutputIterator , typename BinaryOp >
void cugar::cuda::inclusive_scan ( const uint32  n,
InputIterator  d_in,
OutputIterator  d_out,
BinaryOp  op,
thrust::device_vector< uint8 > &  d_temp_storage 
)

device-wide inclusive scan

Parameters
nnumber of items to reduce
d_ina device input iterator
d_outa device output iterator
opthe binary reduction operator
d_temp_storagesome temporary storage

◆ is_segment_sorted()

template<typename Iterator , typename Headflags >
bool cugar::cuda::is_segment_sorted ( const uint32  n,
const Iterator  values,
const Headflags  flags 
)

return true if the items in the range [0,n) are sorted by segment, where the beginning of each segment is identified by a set head flag

◆ is_sorted()

template<typename Iterator >
bool cugar::cuda::is_sorted ( const uint32  n,
const Iterator  values 
)

return true if the items in the range [0,n) are sorted

◆ reduce()

template<typename InputIterator , typename BinaryOp >
std::iterator_traits< InputIterator >::value_type cugar::cuda::reduce ( const uint32  n,
InputIterator  d_in,
BinaryOp  op,
thrust::device_vector< uint8 > &  d_temp_storage 
)

device-wide reduce

Parameters
nnumber of items to reduce
d_ina device iterator
opthe binary reduction operator
d_temp_storagesome temporary storage

◆ reduce_by_key()

template<typename KeyIterator , typename ValueIterator , typename OutputKeyIterator , typename OutputValueIterator , typename ReductionOp >
uint32 cugar::cuda::reduce_by_key ( const uint32  n,
KeyIterator  d_keys_in,
ValueIterator  d_values_in,
OutputKeyIterator  d_keys_out,
OutputValueIterator  d_values_out,
ReductionOp  reduction_op,
thrust::device_vector< uint8 > &  d_temp_storage 
)

device-wide run-length encode

Parameters
nnumber of input items
d_keys_ina device input iterator
d_values_ina device input iterator
d_keys_outa device output iterator
d_values_outa device output iterator
reduction_opa reduction operator
d_temp_storagesome temporary storage
Returns
the number of copied items

◆ runlength_encode()

template<typename InputIterator , typename OutputIterator , typename CountIterator >
uint32 cugar::cuda::runlength_encode ( const uint32  n,
InputIterator  d_in,
OutputIterator  d_out,
CountIterator  d_counts,
thrust::device_vector< uint8 > &  d_temp_storage 
)

device-wide run-length encode

Parameters
nnumber of input items
d_ina device input iterator
d_outa device output iterator
d_countsa device output count iterator
d_temp_storagesome temporary storage
Returns
the number of copied items