30 #include <nih/basic/utils.h> 31 #include <nih/basic/functors.h> 32 #include <nih/basic/cuda/scan.h> 33 #include <nih/basic/cuda_config.h> 34 #include <nih/thrust/iterator_wrapper.h> 35 #include <thrust/scan.h> 36 #include <thrust/sort.h> 37 #include <thrust/iterator/constant_iterator.h> 38 #include <thrust/iterator/counting_iterator.h> 39 #include <thrust/iterator/reverse_iterator.h> 40 #include <thrust/iterator/transform_iterator.h> 47 inline void start_timer(
const cudaEvent_t start,
const cudaEvent_t stop)
49 cudaEventRecord( start, 0 );
51 inline float stop_timer(
const cudaEvent_t start,
const cudaEvent_t stop)
54 cudaEventRecord( stop, 0 );
55 cudaEventSynchronize( stop );
56 cudaEventElapsedTime( &dtime, start, stop );
60 FORCE_INLINE NIH_HOST_DEVICE uint32
largest_axis(
const float3 edge)
62 return edge.x > edge.y ?
63 (edge.x > edge.z ? 0 : 2) :
64 (edge.y > edge.z ? 1 : 2);
72 typedef thrust::tuple<uint32,Bbox4f> argument_type;
73 typedef uint2 result_type;
77 FORCE_INLINE NIH_HOST_DEVICE uint2 operator() (
const argument_type op)
const 79 const uint32 key = thrust::get<0>( op );
80 if (key == uint32(-1))
81 return make_uint2(0,0);
83 const Bbox4f bbox = thrust::get<1>( op );
85 const float4 bmin = m_bins[ key-1 ].bmin;
86 const float4 bmax = m_bins[ key-1 ].bmax;
88 const float3 delta = make_float3( bmax.x - bmin.x, bmax.y - bmin.y, bmax.z - bmin.z );
93 l = delta.x < 1.0e-8f ? 0u :
quantize( (bbox[0][0] - bmin.x) / delta.x, 1024 );
94 l |= delta.y < 1.0e-8f ? 0u :
quantize( (bbox[0][1] - bmin.y) / delta.y, 1024 ) << 10;
95 l |= delta.z < 1.0e-8f ? 0u :
quantize( (bbox[0][2] - bmin.z) / delta.z, 1024 ) << 20;
99 r = delta.x < 1.0e-8f ? 0u :
quantize( (bbox[1][0] - bmin.x) / delta.x, 1024 );
100 r |= delta.y < 1.0e-8f ? 0u :
quantize( (bbox[1][1] - bmin.y) / delta.y, 1024 ) << 10;
101 r |= delta.z < 1.0e-8f ? 0u :
quantize( (bbox[1][2] - bmin.z) / delta.z, 1024 ) << 20;
103 return make_uint2( l, r );
111 const uint32 n_nodes,
112 uint32* split_planes,
113 const float* split_costs,
114 const uint32* segment_heads,
116 const uint32 max_leaf_size,
117 const float max_cost);
121 const uint32 n_objects,
122 const uint32 n_leaves,
124 const uint32* segment_keys,
125 const uint32* split_index,
126 const uint32* allocation_map,
132 const uint32 n_segments,
133 const uint32 n_nodes,
134 const uint32 n_leaves,
135 const uint32 input_node_offset,
136 const uint32* split_index,
137 const uint32* allocation_map,
139 const uint2* bounds_l,
140 const uint2* bounds_r,
142 Bvh_node* bvh_nodes);
146 const uint32 n_objects,
147 const uint32* leaf_ids,
153 template <
typename Iterator>
156 const Iterator bbox_begin,
157 const Iterator bbox_end,
158 const uint32 max_leaf_size,
159 const float max_cost)
161 const uint32 n_objects = uint32( bbox_end - bbox_begin );
163 need_space( *m_nodes, n_objects*2 );
164 need_space( *m_leaves, n_objects );
165 need_space( *m_index, n_objects );
169 need_space( m_segment_heads, n_objects+1 ); storage += (n_objects+1) *
sizeof(uint32);
170 need_space( m_segment_keys, n_objects ); storage += n_objects *
sizeof(uint32);
172 need_space( m_queue_bins[0], n_objects / max_leaf_size ); storage += (n_objects / max_leaf_size) *
sizeof(
Bin);
173 need_space( m_queue_bins[1], n_objects / max_leaf_size ); storage += (n_objects / max_leaf_size) *
sizeof(
Bin);
175 need_space( m_segment_ids, n_objects ); storage += n_objects *
sizeof(uint32);
176 need_space( m_node_ids, n_objects ); storage += n_objects *
sizeof(uint32);
178 need_space( m_scan_bounds, n_objects * 3 ); storage += n_objects*3 *
sizeof(uint2);
179 need_space( m_split_costs, n_objects ); storage += n_objects *
sizeof(float);
180 need_space( m_split_index, n_objects ); storage += n_objects *
sizeof(uint32);
183 thrust::fill( m_node_ids.begin(), m_node_ids.begin() + n_objects, 0 );
185 thrust::fill( m_segment_keys.begin(), m_segment_keys.begin() + n_objects, 1u );
186 thrust::fill( m_segment_ids.begin(), m_segment_ids.begin() + n_objects, 1u );
188 m_segment_heads[0] = 0;
189 m_segment_heads[1] = n_objects;
191 thrust::device_vector<float>::iterator centroids = m_split_costs.begin();
199 bin.bmin = make_float4( bbox[0][0],bbox[0][1],bbox[0][2],binary_cast<float>(n_objects) );
200 bin.bmax = make_float4( bbox[1][0],bbox[1][1],bbox[1][2],binary_cast<float>(Bvh_node::kInvalid) );
201 m_queue_bins[ in_queue ][0] = bin;
204 m_counters.resize(2);
208 uint32 input_node_offset = 0;
211 uint32 out_nodes = 1;
213 cudaEvent_t start, stop;
214 cudaEventCreate( &start );
215 cudaEventCreate( &stop );
217 thrust::device_vector<uint2>::iterator bounds_l = m_scan_bounds.begin() + n_objects*0;
218 thrust::device_vector<uint2>::iterator bounds_r = m_scan_bounds.begin() + n_objects*1;
219 thrust::device_vector<uint2>::iterator bounds = m_scan_bounds.begin() + n_objects*2;
223 uint32 n_active_objects = n_objects;
231 m_levels[ level++ ] = n_nodes;
235 sah::start_timer( start, stop );
238 thrust::raw_pointer_cast( &m_queue_bins[ in_queue ].front() ) );
241 thrust::make_zip_iterator( thrust::make_tuple( m_segment_ids.begin(), bbox_begin ) ),
242 thrust::make_zip_iterator( thrust::make_tuple( m_segment_ids.begin(), bbox_begin ) ) + n_objects,
246 m_compression_time += sah::stop_timer( start, stop );
249 uint32 n_segments = out_nodes;
256 thrust::device_vector<uint32>::iterator order = m_index->begin();
258 sah::start_timer( start, stop );
262 m_segment_ids.begin(),
263 m_segment_keys.begin(),
271 m_sorting_time += sah::stop_timer( start, stop );
273 sah::start_timer( start, stop );
278 m_segment_keys.begin(),
279 m_segment_heads.begin(),
283 m_split_costs.begin(),
284 m_split_index.begin() );
286 thrust::device_ptr<uint32> allocation_map( (uint32*)thrust::raw_pointer_cast( &*bounds ) );
291 thrust::raw_pointer_cast( &m_split_index.front() ),
292 thrust::raw_pointer_cast( &m_split_costs.front() ),
293 thrust::raw_pointer_cast( &m_segment_heads.front() ),
294 thrust::raw_pointer_cast( &*allocation_map ),
298 m_sah_split_time += sah::stop_timer( start, stop );
300 sah::start_timer( start, stop );
305 const uint32 n_splits = allocation_map[ n_segments-1 ];
306 out_nodes = n_splits*2;
312 thrust::raw_pointer_cast( &*order ),
313 thrust::raw_pointer_cast( &m_segment_keys.front() ),
314 thrust::raw_pointer_cast( &m_split_index.front() ),
315 thrust::raw_pointer_cast( &*allocation_map ),
316 thrust::raw_pointer_cast( &m_segment_ids.front() ),
317 thrust::raw_pointer_cast( &m_node_ids.front() ) );
320 storage -=
sizeof(
Bin) * m_queue_bins[ out_queue ].size();
321 need_space( m_queue_bins[ out_queue ], out_nodes );
322 storage +=
sizeof(
Bin) * m_queue_bins[ out_queue ].size();
330 thrust::raw_pointer_cast( &m_split_index.front() ),
331 thrust::raw_pointer_cast( &*allocation_map ),
332 thrust::raw_pointer_cast( &m_queue_bins[ in_queue ].front() ),
333 thrust::raw_pointer_cast( &*bounds_l ),
334 thrust::raw_pointer_cast( &*bounds_r ),
335 thrust::raw_pointer_cast( &m_queue_bins[ out_queue ].front() ),
336 thrust::raw_pointer_cast( &m_nodes->front() ) );
338 m_distribute_objects_time += sah::stop_timer( start, stop );
340 input_node_offset = n_nodes;
342 n_nodes += out_nodes;
343 n_leaves += (n_segments - n_splits);
345 std::swap( in_queue, out_queue );
348 m_level_count = level;
349 for (; level < 128; ++level)
350 m_levels[ level ] = n_nodes;
353 thrust::copy( thrust::make_counting_iterator(0u), thrust::make_counting_iterator(0u) + n_objects, m_index->begin() );
354 thrust::sort_by_key( m_node_ids.begin(), m_node_ids.begin() + n_objects, m_index->begin() );
359 thrust::raw_pointer_cast( &m_node_ids.front() ),
360 thrust::raw_pointer_cast( &m_leaves->front() ) );
362 m_leaf_count = n_leaves;
363 m_node_count = n_nodes;
365 cudaEventDestroy( start );
366 cudaEventDestroy( stop );
368 m_temp_storage = nih::max( storage, m_temp_storage );
void transform(const uint32 n, const Iterator in, const Output out, const Functor functor)
Definition: primitives_inl.h:357
CUGAR_HOST_DEVICE uint32 quantize(const float x, const uint32 n)
Definition: numbers.h:600
Definition: sah_builder_inline.h:70
Definition: sah_builder.h:37
Definition: sah_builder.h:46
void build(const uint32 BINS, const Bbox3f bbox, const Iterator bbox_begin, const Iterator bbox_end, const uint32 max_leaf_size, const float max_cost=1.8f)
build a bvh given a set of bboxes
Definition: sah_builder_inline.h:217
void inclusive_scan(const uint32 n, InputIterator d_in, OutputIterator d_out, BinaryOp op, thrust::device_vector< uint8 > &d_temp_storage)
Definition: primitives_inl.h:228
Definition: sah_builder.h:80
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE size_t largest_axis(const Bbox< Vector_t > &bbox)
Definition: bbox_inline.h:103