This module implements device ping-pong queues, i.e. a pair of input / output queues built on top of some ping-pong device memory storage that gets swapped at each iteration.
At a Glance
Example
__global__ void copy_even_kernel(PingPongQueuesView<uint32> queues)
{
const uint32 idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx >= queues.in_size) return;
const uint32 n = queues.in_queue[idx];
if ((n & 1) == 0)
{
const uint32 slot = atomicAdd( queues.out_size, 1u );
queues.out_queue[ slot ] = n;
}
}
PingPongQueues<uint32> queues;
queues.resize_arena( 1000 );
queues.resize( 1000 );
thrust::make_counting_iterator<uint32>(0),
thrust::make_counting_iterator<uint32>(1000),
thrust::device_ptr<uint32>( queues.raw_input_queue() ) );
while (queues.in_size)
{
queues.clear_output();
copy_even_kernel<<<1,1000>>>(
plain_view( queues ) );
cudaDeviceSynchronize();
queues.swap();
}
Technical Overview
See the Ping-Pong Queues module documentation.