Fermat
kd_builder_inline.h
1 /*
2  * Copyright (c) 2010-2018, NVIDIA Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of NVIDIA Corporation nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include <cugar/bits/morton.h>
29 #include <cugar/basic/cuda/sort.h>
30 #include <cugar/basic/utils.h>
31 #include <cugar/radixtree/cuda/radixtree.h>
32 
33 namespace cugar {
34 namespace cuda {
35 
36 namespace kd {
37 
38  template <typename integer, uint32 DIM>
39  struct Morton_bits {};
40 
41  template <>
42  struct Morton_bits<uint32, 2u>
43  {
44  static const uint32 value = 32u;
45 
46  CUGAR_HOST_DEVICE static inline float convert(float a, float b, const uint32 i)
47  {
48  const float x = float(i) / float(1u << 16u);
49  return a + (b - a) * x;
50  }
51  };
52 
53  template <>
54  struct Morton_bits<uint64, 2u>
55  {
56  static const uint32 value = 64u;
57 
58  CUGAR_HOST_DEVICE static inline float convert(float a, float b, const uint64 i)
59  {
60  const float x = float(i) / float(0xFFFFFFFFu);
61  return a + (b - a) * x;
62  }
63  };
64 
65  template <>
66  struct Morton_bits<uint32, 3u>
67  {
68  static const uint32 value = 30u;
69 
70  CUGAR_HOST_DEVICE static inline float convert(float a, float b, const uint64 i)
71  {
72  const float x = float(i) / float(1u << 10u);
73  return a + (b - a) * x;
74  }
75  };
76 
77  template <>
78  struct Morton_bits<uint64, 3u>
79  {
80  static const uint32 value = 60u;
81 
82  CUGAR_HOST_DEVICE static inline float convert(float a, float b, const uint64 i)
83  {
84  const float x = float(i) / float(1u << 20u);
85  return a + (b - a) * x;
86  }
87  };
88 
91 template <uint32 DIM, typename BboxType, typename Integer, typename OutputTree>
92 struct Kd_context
93 {
94  typedef typename OutputTree::Context BaseContext;
95 
97  struct Context
98  {
99  CUGAR_HOST_DEVICE Context() {}
100  CUGAR_HOST_DEVICE Context(const BaseContext context, const Integer* codes, BboxType bbox) :
101  m_context( context ), m_codes( codes ), m_bbox( bbox ) {}
102 
105  CUGAR_HOST_DEVICE void write_node(const uint32 node, const uint32 parent, bool p1, bool p2, const uint32 offset, const uint32 skip_node, const uint32 level, const uint32 begin, const uint32 end, const uint32 split_index)
106  {
107  //if (m_parents)
108  // m_parents[node] = parent;
109 
110  if (p1)
111  {
112  // fetch the Morton code corresponding to the split plane
113  Integer code = m_codes[ split_index ];
114  const uint32 split_dim = level % DIM;
115 
116  // extract the selected coordinate
117  Integer split_coord = 0;
118 
119  if (level)
120  {
121  code >>= level-1;
122  code <<= level-1;
123  }
124 
125  for (int i = 0; code; i++)
126  {
127  split_coord |= (((code >> split_dim) & 1u) << i);
128  code >>= DIM;
129  }
130 
131  // convert to floating point
132  const float split_plane = Morton_bits<Integer,DIM>::convert( m_bbox[0][split_dim], m_bbox[1][split_dim], split_coord );
133 
134  // and output the split node
135  m_context.write_node(
136  node,
137  offset,
138  skip_node,
139  begin,
140  end,
141  split_index,
142  split_dim,
143  split_plane );
144  }
145  else
146  {
147  // output a leaf node
148  m_context.write_node(
149  node,
150  offset,
151  skip_node,
152  begin,
153  end );
154  }
155  }
158  CUGAR_HOST_DEVICE void write_leaf(const uint32 leaf_index, const uint32 node_index, const uint32 begin, const uint32 end)
159  {
160  m_context.write_leaf( leaf_index, begin, end );
161  }
162 
163  BaseContext m_context;
164  const Integer* m_codes;
165  BboxType m_bbox;
166  };
167 
170  OutputTree context,
171  const Integer* codes,
172  BboxType bbox) :
173  m_context( context ), m_codes( codes ), m_bbox( bbox ) {}
174 
176  void reserve_nodes(const uint32 n) { m_context.reserve_nodes(n); }
177 
179  void reserve_leaves(const uint32 n) { m_context.reserve_leaves(n); }
180 
183  {
184  return Context(
185  m_context.get_context(),
186  m_codes,
187  m_bbox );
188  }
189 
190  OutputTree m_context;
191  const Integer* m_codes;
192  BboxType m_bbox;
193 };
194 
195 // a small kernel to calculate Morton codes
196 template <typename PointIterator, typename Integer, typename MortonFunctor>
197 __global__
198 void morton_kernel(const uint32 n_points, const PointIterator points_begin, Integer* out, const MortonFunctor morton)
199 {
200  const uint32 thread_id = threadIdx.x + blockIdx.x * blockDim.x;
201 
202  if (thread_id < n_points)
203  {
204  typedef typename std::iterator_traits<PointIterator>::value_type VectorType;
205 
206  const VectorType p = points_begin[thread_id];
207  out[thread_id] = morton(p);
208  }
209 }
210 
211 }; // namespace kd
212 
213 // build a k-d tree given a set of points
214 template <typename Integer>
215 template <typename OutputTree, typename Iterator, typename BboxType>
217  OutputTree& tree,
219  const BboxType bbox,
220  const Iterator points_begin,
221  const Iterator points_end,
222  const uint32 max_leaf_size)
223 {
224  const uint32 DIM = BboxType::vector_type::DIMENSION;
225  const uint32 n_points = uint32( points_end - points_begin );
226 
227  need_space( m_codes, n_points );
228  need_space( index, n_points );
229  need_space( m_temp_codes, n_points );
230  need_space( m_temp_index, n_points );
231 
232  // compute the Morton code for each point
233  #if 1
234  {
235  const uint32 blockSize = (uint32)cugar::cuda::max_blocksize_with_highest_occupancy(kd::morton_kernel< Iterator,Integer,morton_functor<Integer,DIM,BboxType> >, 0u);
236  const dim3 gridSize(cugar::divide_ri(n_points, blockSize));
237  kd::morton_kernel<<< gridSize, blockSize >>> (n_points, points_begin, raw_pointer(m_codes), morton_functor<Integer,DIM,BboxType>( bbox ));
238  }
239  //cuda::sync_and_check_error("morton codes");
240  #else
242  points_begin,
243  points_begin + n_points,
244  m_codes.begin(),
246  #endif
247 
248  // setup the point indices, from 0 to n_points-1
249  thrust::copy(
250  thrust::counting_iterator<uint32>(0),
251  thrust::counting_iterator<uint32>(0) + n_points,
252  index.begin() );
253 
254  //cuda::sync_and_check_error("copy");
255 
256  if (n_points > 1)
257  {
258  // sort the indices by Morton code
259  SortBuffers<Integer*, uint32*> sort_buffers;
260  sort_buffers.keys[0] = raw_pointer(m_codes);
261  sort_buffers.keys[1] = raw_pointer(m_temp_codes);
262  sort_buffers.values[0] = raw_pointer(index);
263  sort_buffers.values[1] = raw_pointer(m_temp_index);
264 
265  SortEnactor sort_enactor;
266  sort_enactor.sort(n_points, sort_buffers);
267 
268  // check whether we need to copy the sort results back in place
269  if (sort_buffers.selector)
270  {
271  thrust::copy(m_temp_codes.begin(), m_temp_codes.begin() + n_points, m_codes.begin());
272  thrust::copy(m_temp_index.begin(), m_temp_index.begin() + n_points, index.begin());
273  }
274  }
275 
276  // generate a kd-tree
277  kd::Kd_context<DIM,BboxType,Integer,OutputTree> bintree_context( tree, thrust::raw_pointer_cast( &m_codes.front() ), bbox );
278 
279  const uint32 bits = kd::Morton_bits<Integer,DIM>::value;
280 
282  m_kd_context,
283  n_points,
284  raw_pointer(m_codes),
285  bits,
286  max_leaf_size,
287  false,
288  false,
289  bintree_context );
290 
291  m_leaf_count = m_kd_context.m_leaves;
292  m_node_count = m_kd_context.m_nodes;
293 }
294 
295 } // namespace cuda
296 } // namespace cugar
void reserve_leaves(const uint32 n)
reserve space for more leaves
Definition: kd_builder_inline.h:179
void transform(const uint32 n, const Iterator in, const Output out, const Functor functor)
Definition: primitives_inl.h:357
Defines some general purpose algorithms.
Definition: kd_builder_inline.h:39
Kd_context(OutputTree context, const Integer *codes, BboxType bbox)
constructor
Definition: kd_builder_inline.h:169
thrust::device_vector< T >::iterator begin(thrust::device_vector< T > &vec)
Definition: thrust_view.h:89
void reserve_nodes(const uint32 n)
reserve space for more nodes
Definition: kd_builder_inline.h:176
Definition: sort.h:103
T * raw_pointer(thrust::device_vector< T, Alloc > &vec)
Definition: thrust_view.h:69
Definition: kd_builder_inline.h:92
Define CUDA based sort primitives.
Definition: morton.h:160
Define CUDA utilities.
CUGAR_HOST_DEVICE L divide_ri(const L x, const R y)
Definition: numbers.h:180
Context get_context()
return a cuda context
Definition: kd_builder_inline.h:182
Cuda accessor struct.
Definition: kd_builder_inline.h:97
Definition: vector.h:117
Define a vector_view POD type and plain_view() for std::vector.
Definition: diff.h:38
void build(OutputTree &out_tree, vector< device_tag, uint32 > &out_index, const BboxType bbox, const Iterator points_begin, const Iterator points_end, const uint32 max_leaf_size)
Definition: kd_builder_inline.h:216
void generate_radix_tree(const uint32 n_codes, const Integer *codes, const uint32 bits, const uint32 max_leaf_size, const bool keep_singletons, const bool middle_splits, Tree_writer &tree)
Definition: radixtree_inline.h:381
Definition: sort.h:160
CUGAR_HOST_DEVICE void write_leaf(const uint32 leaf_index, const uint32 node_index, const uint32 begin, const uint32 end)
Definition: kd_builder_inline.h:158
CUGAR_HOST_DEVICE void write_node(const uint32 node, const uint32 parent, bool p1, bool p2, const uint32 offset, const uint32 skip_node, const uint32 level, const uint32 begin, const uint32 end, const uint32 split_index)
Definition: kd_builder_inline.h:105