nvbio/ping_pong_queues_page.html

// copy even input-queue entries into the output-queue

__global__ void copy_even_kernel(PingPongQueuesView<uint32> queues)

{

    const uint32 idx = threadIdx.x + blockIdx.x * blockDim.x;

    if (idx >= queues.in_size) return;


    // fetch an element from the input queue

    const uint32 n = queues.in_queue[idx];


    // if n is even, append it to the output

    if ((n & 1) == 0)

    {

        const uint32 slot = atomicAdd( queues.out_size, 1u );

        queues.out_queue[ slot ] = n;

    }

}


PingPongQueues<uint32> queues;


// reserve some storage

queues.resize_arena( 1000 );


// resize the input queue

queues.resize( 1000 );


// fill the input queue with numbers 0,...,999

thrust::copy(

    thrust::make_counting_iterator<uint32>(0),

    thrust::make_counting_iterator<uint32>(1000),

    thrust::device_ptr<uint32>( queues.raw_input_queue() ) );


while (queues.in_size)

{

    // clear the output queue

    queues.clear_output();


    // run our kernel

    copy_even_kernel<<<1,1000>>>( plain_view( queues ) );

    cudaDeviceSynchronize();


    // swap the input & output queues

    queues.swap();

}