CUB
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups
Namespaces | Functions
util_ptx.cuh File Reference
#include "util_type.cuh"
#include "util_arch.cuh"
#include "util_namespace.cuh"
#include "util_debug.cuh"

Namespaces

 cub
 Optional outer namespace(s)
 

Functions

__device__ __forceinline__
unsigned int 
cub::SHR_ADD (unsigned int x, unsigned int shift, unsigned int addend)
 Shift-right then add. Returns (x >> shift) + addend.
 
__device__ __forceinline__
unsigned int 
cub::SHL_ADD (unsigned int x, unsigned int shift, unsigned int addend)
 Shift-left then add. Returns (x << shift) + addend.
 
template<typename UnsignedBits >
__device__ __forceinline__
unsigned int 
cub::BFE (UnsignedBits source, unsigned int bit_start, unsigned int num_bits)
 Bitfield-extract. Extracts num_bits from source starting at bit-offset bit_start. The input source may be an 8b, 16b, 32b, or 64b unsigned integer type.
 
__device__ __forceinline__ void cub::BFI (unsigned int &ret, unsigned int x, unsigned int y, unsigned int bit_start, unsigned int num_bits)
 Bitfield insert. Inserts the num_bits least significant bits of y into x at bit-offset bit_start.
 
__device__ __forceinline__
unsigned int 
cub::IADD3 (unsigned int x, unsigned int y, unsigned int z)
 Three-operand add. Returns x + y + z.
 
__device__ __forceinline__ int cub::PRMT (unsigned int a, unsigned int b, unsigned int index)
 Byte-permute. Pick four arbitrary bytes from two 32-bit registers, and reassemble them into a 32-bit destination register. For SM2.0 or later. More...
 
__device__ __forceinline__ void cub::ThreadExit ()
 Terminates the calling thread.
 
__device__ __forceinline__ void cub::ThreadTrap ()
 Abort execution and generate an interrupt to the host CPU.
 
__device__ __forceinline__ int cub::RowMajorTid (int block_dim_x, int block_dim_y, int block_dim_z)
 Returns the row-major linear thread identifier for a multidimensional thread block.
 
__device__ __forceinline__
unsigned int 
cub::LaneId ()
 Returns the warp lane ID of the calling thread.
 
__device__ __forceinline__
unsigned int 
cub::WarpId ()
 Returns the warp ID of the calling thread. Warp ID is guaranteed to be unique among warps, but may not correspond to a zero-based ranking within the thread block.
 
__device__ __forceinline__
unsigned int 
cub::LaneMaskLt ()
 Returns the warp lane mask of all lanes less than the calling thread.
 
__device__ __forceinline__
unsigned int 
cub::LaneMaskLe ()
 Returns the warp lane mask of all lanes less than or equal to the calling thread.
 
__device__ __forceinline__
unsigned int 
cub::LaneMaskGt ()
 Returns the warp lane mask of all lanes greater than the calling thread.
 
__device__ __forceinline__
unsigned int 
cub::LaneMaskGe ()
 Returns the warp lane mask of all lanes greater than or equal to the calling thread.
 
template<int LOGICAL_WARP_THREADS, typename T >
__device__ __forceinline__ T cub::ShuffleUp (T input, int src_offset, int first_thread, unsigned int member_mask)
 Shuffle-up for any data type. Each warp-lanei obtains the value input contributed by warp-lanei-src_offset. For thread lanes i < src_offset, the thread's own input is returned to the thread.

shfl_up_logo.png
.
More...
 
template<int LOGICAL_WARP_THREADS, typename T >
__device__ __forceinline__ T cub::ShuffleDown (T input, int src_offset, int last_thread, unsigned int member_mask)
 Shuffle-down for any data type. Each warp-lanei obtains the value input contributed by warp-lanei+src_offset. For thread lanes i >= WARP_THREADS, the thread's own input is returned to the thread.

shfl_down_logo.png
.
More...
 
template<int LOGICAL_WARP_THREADS, typename T >
__device__ __forceinline__ T cub::ShuffleIndex (T input, int src_lane, unsigned int member_mask)
 Shuffle-broadcast for any data type. Each warp-lanei obtains the value input contributed by warp-lanesrc_lane. For src_lane < 0 or src_lane >= WARP_THREADS, then the thread's own input is returned to the thread.

shfl_broadcast_logo.png
.
More...
 
template<int LABEL_BITS>
__device__ unsigned int cub::MatchAny (unsigned int label)
 

Detailed Description

PTX intrinsics