33 #include <cub/cub.cuh>
34 #include <thrust/copy.h>
54 typedef WorkUnitT WorkUnit;
65 for (
uint32 stream_begin = 0; stream_begin <
stream_end; stream_begin += grid_threads)
69 if (work_id < stream_end)
72 stream.get( work_id, &unit, make_uint2( thread_id, 0u ) );
97 template <
typename WorkStream,
typename WorkMover>
104 wq::inplace_work_queue_kernel<BLOCKDIM,WorkUnit,WorkStream> <<<n_blocks,
BLOCKDIM>>>(
stream,
view( stats ) );
113 if (
valid() ==
false)
116 const uint32 active_mask = __ballot(
true);
117 const uint32 active_count = __popc(active_mask);
118 if (__popc(active_mask >> warp_tid()) == 1u)
130 if (
valid() ==
false)