28 #include <cugar/basic/cuda/arch.h> 32 #define KD_KNN_STATS_DEF(type,name,value) type name = value; 33 #define KD_KNN_STATS_ADD(name,value) name += value; 35 #define KD_KNN_STATS_DEF(type,name,value) 36 #define KD_KNN_STATS_ADD(name,value) 44 #define KNN_LOOKUP_BLOCK_SIZE 64 45 #define KNN_LOOKUP_CTA_BLOCKS 32 47 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
48 float comp(
const float2 v,
const uint32 i)
50 return i == 0 ? v.x : v.y;
52 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
53 void set_comp(float2& v,
const uint32 i,
const float t)
58 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
59 float sq_length(
const float2 v)
61 return v.x*v.x + v.y*v.y;
64 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
65 float comp(
const float3 v,
const uint32 i)
72 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
73 void set_comp(float3& v,
const uint32 i,
const float t)
76 else if (i == 1) v.y = t;
79 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
80 float sq_length(
const float3 v)
82 return v.x*v.x + v.y*v.y + v.z*v.z;
85 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
86 float comp(
const float4 v,
const uint32 i)
89 (i == 0 ? v.x : v.y) :
92 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
93 void set_comp(float4& v,
const uint32 i,
const float t)
96 else if (i == 1) v.y = t;
97 else if (i == 2) v.z = t;
100 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
101 float sq_length(
const float4 v)
103 return v.x*v.x + v.y*v.y + v.z*v.z+ v.w*v.w;
106 template <
typename VectorType,
typename Po
intIterator>
107 __device__
void lookup_2d(
108 const VectorType query,
109 const Kd_node* kd_nodes,
110 const uint2* kd_ranges,
111 const uint2* kd_leaves,
112 const PointIterator kd_points,
113 Kd_knn<2>::Result* results)
121 float dist2 = 1.0e16f;
124 uint32 node_index = 0;
127 uint32 first_leaf = 0;
131 const Kd_node node = Kd_node::load_ldg( kd_nodes + node_index );
136 const uint2 leaf = __ldg( &kd_leaves[ node.get_leaf_index() ] );
137 for (uint32 i = leaf.x; i < leaf.y; ++i)
139 const VectorType delta = kd_points[i] - query;
140 const float d2 = delta[0]*delta[0] + delta[1]*delta[1];
149 first_leaf = node_index;
154 const uint32 split_dim = node.get_split_dim();
155 const float split_plane = node.get_split_plane();
157 node_index = node.get_child_offset() + (
comp( query, split_dim ) < split_plane ? 0u : 1u);
169 stack[0] = make_float4( 0.0f, 0.0f, 0.0f, binary_cast<float>(uint32(-1)) );
174 float2 cdist = make_float2( 0.0f, 0.0f );
176 while (node_index != uint32(-1))
178 const Kd_node node = Kd_node::load_ldg( kd_nodes + node_index );
182 if (first_leaf != node_index)
185 const uint2 leaf = __ldg( &kd_leaves[ node.get_leaf_index() ] );
186 for (uint32 i = leaf.x; i < leaf.y; ++i)
188 const VectorType delta = kd_points[i] - query;
189 const float d2 = delta[0]*delta[0] + delta[1]*delta[1];
201 const float4 stack_node = stack[ --stackp ];
203 cdist = make_float2( stack_node.x, stack_node.y );
205 if (sq_length( cdist ) < dist2)
211 const uint32 split_dim = node.get_split_dim();
212 const float split_plane = node.get_split_plane();
214 const float split_dist =
comp( query, split_dim ) - split_plane;
216 const uint32 select = split_dist <= 0.0f ? 0u : 1u;
218 node_index = node.get_child_offset() + select;
221 float2 cdist_far = cdist;
222 set_comp( cdist_far, split_dim, split_dist );
225 const float dist_far2 = sq_length( cdist_far );
227 if (dist_far2 < dist2)
229 stack[ stackp++ ] = make_float4(
233 binary_cast<float>( node.get_child_offset() + 1u - select ) );
239 results->index = idx;
240 results->dist2 = dist2;
243 template <
typename VectorType,
typename Po
intIterator>
244 __device__
void lookup_3d(
245 const VectorType query,
246 const Kd_node* kd_nodes,
247 const uint2* kd_ranges,
248 const uint2* kd_leaves,
249 const PointIterator kd_points,
250 Kd_knn<3>::Result* results)
258 float dist2 = 1.0e16f;
261 uint32 node_index = 0;
264 uint32 first_leaf = 0;
268 const Kd_node node = Kd_node::load_ldg( kd_nodes + node_index );
273 const uint2 leaf = __ldg( &kd_leaves[ node.get_leaf_index() ]);
274 for (uint32 i = leaf.x; i < leaf.y; ++i)
276 const VectorType delta = kd_points[i] - query;
277 const float d2 = delta[0]*delta[0] + delta[1]*delta[1] + delta[2]*delta[2];
286 first_leaf = node_index;
291 const uint32 split_dim = node.get_split_dim();
292 const float split_plane = node.get_split_plane();
294 node_index = node.get_child_offset() + (
comp( query, split_dim ) < split_plane ? 0u : 1u);
306 stack[0] = make_float4( 0.0f, 0.0f, 0.0f, binary_cast<float>(uint32(-1)) );
311 float3 cdist = make_float3( 0.0f, 0.0f, 0.0f );
313 while (node_index != uint32(-1))
315 const Kd_node node = Kd_node::load_ldg( kd_nodes + node_index );
319 if (first_leaf != node_index)
322 const uint2 leaf = __ldg( &kd_leaves[ node.get_leaf_index() ] );
323 for (uint32 i = leaf.x; i < leaf.y; ++i)
325 const VectorType delta = kd_points[i] - query;
326 const float d2 = delta[0]*delta[0] + delta[1]*delta[1] + delta[2]*delta[2];
338 const float4 stack_node = stack[ --stackp ];
340 cdist = make_float3( stack_node.x, stack_node.y, stack_node.z );
342 if (sq_length( cdist ) < dist2)
348 const uint32 split_dim = node.get_split_dim();
349 const float split_plane = node.get_split_plane();
351 const float split_dist =
comp( query, split_dim ) - split_plane;
353 const uint32 select = split_dist <= 0.0f ? 0u : 1u;
355 node_index = node.get_child_offset() + select;
358 float3 cdist_far = cdist;
359 set_comp( cdist_far, split_dim, split_dist );
362 const float dist_far2 = sq_length( cdist_far );
364 if (dist_far2 < dist2)
366 stack[ stackp++ ] = make_float4(
370 binary_cast<float>( node.get_child_offset() + 1u - select ) );
376 results->index = idx;
377 results->dist2 = dist2;
382 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
bool operator() (
const float2 op1,
const float2 op2)
const 384 return (op1.y == op2.y) ?
391 CUGAR_FORCEINLINE CUGAR_HOST_DEVICE
float norm3(
const float4 v) {
return v.x*v.x + v.y*v.y + v.z*v.z; }
393 #define KNN_REORDER_STACK(LhsIndex, RhsIndex) \ 394 if (norm3( stack[stackp - (LhsIndex)] ) < \ 395 norm3( stack[stackp - (RhsIndex)] )) \ 397 const float4 tmpv = stack[stackp - (LhsIndex)]; \ 398 stack[stackp - (LhsIndex)] = stack[stackp - (RhsIndex)]; \ 399 stack[stackp - (RhsIndex)] = tmpv; \ 402 #define KNN_REORDER_STACK(LhsIndex, RhsIndex) 408 const static uint32 M = 8;
411 template <u
int32 K,
typename VectorType,
typename Po
intIterator>
412 __device__
void lookup_2d(
413 const VectorType query,
415 const uint2* kd_ranges,
416 const uint2* kd_leaves,
417 const PointIterator kd_points,
420 KD_KNN_STATS_DEF(uint32, leaf_tests, 0u);
421 KD_KNN_STATS_DEF(uint32, point_tests, 0u);
422 KD_KNN_STATS_DEF(uint32, node_pops, 0u);
423 KD_KNN_STATS_DEF(uint32, node_pushes, 0u);
430 float2 queue_storage[K+1];
431 queue_vector_type queue_vector(0,queue_storage);
433 float max_dist2 = 1.0e16f;
436 uint32 node_index = 0;
439 uint32 entry_subtree = 0;
443 const Kd_node node = Kd_node::load_ldg( kd_nodes + node_index );
444 const uint2 range = kd_ranges[ node_index ];
446 if (range.y - range.x < K)
449 entry_subtree = node_index;
474 stack[0] = make_float4( 1.0e8f, 1.0e8f, 1.0e8f, binary_cast<float>(uint32(-1)) );
479 const uint2 range = kd_ranges[ entry_subtree ];
480 for (uint32 i = range.x; i < range.y; ++i)
482 const VectorType delta = kd_points[i] - query;
483 const float d2 = delta[0]*delta[0] + delta[1]*delta[1];
486 if (queue.
size() == K && d2 < queue.
top().y)
489 if (queue.
size() < K)
490 queue.
push( make_float2(binary_cast<float>(i), d2) );
493 max_dist2 = queue.
top().y;
498 node_index = entry_subtree;
503 float2 cdist = make_float2( 0.0f, 0.0f );
504 while (node_index != uint32(-1))
506 KD_KNN_STATS_ADD( node_tests, 1u );
507 const Kd_node node = Kd_node::load_ldg( kd_nodes + node_index );
508 const uint2 range = kd_ranges[ node_index ];
510 if (node.
is_leaf() || range.y - range.x <= M)
513 KD_KNN_STATS_ADD( leaf_tests, 1u );
514 KD_KNN_STATS_ADD( point_tests, range.y - range.x );
515 for (uint32 i = range.x; i < range.y; ++i)
517 const VectorType delta = kd_points[i] - query;
518 const float d2 = delta[0]*delta[0] + delta[1]*delta[1];
521 max_dist2 = cugar::max( max_dist2, d2 );
531 KD_KNN_STATS_ADD( node_pops, 1u );
532 const float4 stack_node = stack[ --stackp ];
534 cdist = make_float2( stack_node.x, stack_node.y );
536 if (sq_length( cdist ) < max_dist2)
545 const float split_dist =
comp( query, split_dim ) - split_plane;
547 const uint32 select = split_dist <= 0.0f ? 0u : 1u;
552 float2 cdist_far = cdist;
553 set_comp( cdist_far, split_dim, split_dist );
556 const float dist_far2 = sq_length( cdist_far );
558 if (dist_far2 < max_dist2)
560 KD_KNN_STATS_ADD( node_pushes, 1u );
566 const float4 last_cdist = stack[ stackp-1 ];
567 const float last_dist2 = sq_length( last_cdist );
570 last_dist2 < dist_far2 ? stackp-1 : stackp;
572 if (last_dist2 < dist_far2)
573 stack[ stackp ] = last_cdist;
575 stack[ index ] = make_float4(
584 stack[ stackp++ ] = make_float4(
592 KNN_REORDER_STACK(4, 3);
593 KNN_REORDER_STACK(2, 1);
594 KNN_REORDER_STACK(4, 2);
595 KNN_REORDER_STACK(3, 1);
596 KNN_REORDER_STACK(3, 2);
604 entry_subtree = uint32(-1);
617 float2 cdist = make_float2( 0.0f, 0.0f );
619 while (node_index != uint32(-1))
621 KD_KNN_STATS_ADD( node_tests, 1u );
622 const Kd_node node = Kd_node::load_ldg( kd_nodes + node_index );
624 if (node.
is_leaf() || entry_subtree == node_index)
626 if (entry_subtree != node_index)
631 KD_KNN_STATS_ADD( leaf_tests, 1u );
632 KD_KNN_STATS_ADD( point_tests, range.y - range.x );
633 for (uint32 i = range.x; i < range.y; ++i)
635 const VectorType delta = kd_points[i] - query;
636 const float d2 = delta[0]*delta[0] + delta[1]*delta[1];
639 KD_KNN_STATS_ADD( point_pushes, 1u );
642 if (queue.
size() == K && d2 < queue.
top().y)
645 if (queue.
size() < K)
646 queue.
push( make_float2( binary_cast<float>(i), d2 ) );
649 if (queue.
size() == K)
650 max_dist2 = queue.
top().y;
658 KD_KNN_STATS_ADD( node_pops, 1u );
659 const float4 stack_node = stack[ --stackp ];
661 cdist = make_float2( stack_node.x, stack_node.y );
663 if (sq_length( cdist ) < max_dist2)
672 const float split_dist =
comp( query, split_dim ) - split_plane;
674 const uint32 select = split_dist <= 0.0f ? 0u : 1u;
679 float2 cdist_far = cdist;
680 set_comp( cdist_far, split_dim, split_dist );
683 const float dist_far2 = sq_length( cdist_far );
685 if (dist_far2 < max_dist2)
687 KD_KNN_STATS_ADD( node_pushes, 1u );
693 const float4 last_cdist = stack[ stackp-1 ];
694 const float last_dist2 = sq_length( last_cdist );
697 last_dist2 < dist_far2 ? stackp-1 : stackp;
699 if (last_dist2 < dist_far2)
700 stack[ stackp ] = last_cdist;
702 stack[ index ] = make_float4(
711 stack[ stackp++ ] = make_float4(
719 KNN_REORDER_STACK(4, 3);
720 KNN_REORDER_STACK(2, 1);
721 KNN_REORDER_STACK(4, 2);
722 KNN_REORDER_STACK(3, 1);
723 KNN_REORDER_STACK(3, 2);
731 for (uint32 i = 0; i < K; ++i)
732 ((float2*)results)[i] = queue[i];
735 template <u
int32 K,
typename VectorType,
typename Po
intIterator>
736 __device__
void lookup_3d(
737 const VectorType query,
739 const uint2* kd_ranges,
740 const uint2* kd_leaves,
741 const PointIterator kd_points,
744 KD_KNN_STATS_DEF(uint32, leaf_tests, 0u);
745 KD_KNN_STATS_DEF(uint32, point_tests, 0u);
746 KD_KNN_STATS_DEF(uint32, node_pops, 0u);
747 KD_KNN_STATS_DEF(uint32, node_pushes, 0u);
754 float2 queue_storage[K+1];
757 queue_vector_type queue_vector(0,queue_storage);
759 float max_dist2 = 1.0e16f;
762 uint32 node_index = 0;
765 uint32 entry_subtree = 0;
769 const Kd_node node = Kd_node::load_ldg( kd_nodes + node_index );
770 const uint2 range = kd_ranges[ node_index ];
772 if (range.y - range.x < K)
775 entry_subtree = node_index;
800 stack[0] = make_float4( 1.0e8f, 1.0e8f, 1.0e8f, binary_cast<float>(uint32(-1)) );
805 const uint2 range = kd_ranges[ entry_subtree ];
806 for (uint32 i = range.x; i < range.y; ++i)
808 const VectorType delta = kd_points[i] - query;
809 const float d2 = delta[0]*delta[0] + delta[1]*delta[1] + delta[2]*delta[2];
812 if (queue.
size() == K && d2 < queue.
top().y)
815 if (queue.
size() < K)
816 queue.
push( make_float2(binary_cast<float>(i), d2) );
819 max_dist2 = queue.
top().y;
824 node_index = entry_subtree;
829 float3 cdist = make_float3( 0.0f, 0.0f, 0.0f );
830 while (node_index != uint32(-1))
832 KD_KNN_STATS_ADD( node_tests, 1u );
833 const Kd_node node = Kd_node::load_ldg( kd_nodes + node_index );
834 const uint2 range = kd_ranges[ node_index ];
836 if (node.
is_leaf() || range.y - range.x <= M)
839 KD_KNN_STATS_ADD( leaf_tests, 1u );
840 KD_KNN_STATS_ADD( point_tests, range.y - range.x );
841 for (uint32 i = range.x; i < range.y; ++i)
843 const VectorType delta = kd_points[i] - query;
844 const float d2 = delta[0]*delta[0] + delta[1]*delta[1] + delta[2]*delta[2];
847 max_dist2 = cugar::max( max_dist2, d2 );
857 KD_KNN_STATS_ADD( node_pops, 1u );
858 const float4 stack_node = stack[ --stackp ];
860 cdist = make_float3( stack_node.x, stack_node.y, stack_node.z );
862 if (sq_length( cdist ) < max_dist2)
871 const float split_dist =
comp( query, split_dim ) - split_plane;
873 const uint32 select = split_dist <= 0.0f ? 0u : 1u;
878 float3 cdist_far = cdist;
879 set_comp( cdist_far, split_dim, split_dist );
882 const float dist_far2 = sq_length( cdist_far );
884 if (dist_far2 < max_dist2)
886 KD_KNN_STATS_ADD( node_pushes, 1u );
892 const float4 last_cdist = stack[ stackp-1 ];
893 const float last_dist2 = sq_length( last_cdist );
896 last_dist2 < dist_far2 ? stackp-1 : stackp;
898 if (last_dist2 < dist_far2)
899 stack[ stackp ] = last_cdist;
901 stack[ index ] = make_float4(
910 stack[ stackp++ ] = make_float4(
918 KNN_REORDER_STACK(4, 3);
919 KNN_REORDER_STACK(2, 1);
920 KNN_REORDER_STACK(4, 2);
921 KNN_REORDER_STACK(3, 1);
922 KNN_REORDER_STACK(3, 2);
930 entry_subtree = uint32(-1);
943 float3 cdist = make_float3( 0.0f, 0.0f, 0.0f );
945 while (node_index != uint32(-1))
947 KD_KNN_STATS_ADD( node_tests, 1u );
948 const Kd_node node = Kd_node::load_ldg( kd_nodes + node_index );
950 if (node.
is_leaf() || entry_subtree == node_index)
952 if (entry_subtree != node_index)
957 KD_KNN_STATS_ADD( leaf_tests, 1u );
958 KD_KNN_STATS_ADD( point_tests, range.y - range.x );
959 for (uint32 i = range.x; i < range.y; ++i)
961 const VectorType delta = kd_points[i] - query;
962 const float d2 = delta[0]*delta[0] + delta[1]*delta[1] + delta[2]*delta[2];
965 KD_KNN_STATS_ADD( point_pushes, 1u );
968 if (queue.
size() == K && d2 < queue.
top().y)
971 if (queue.
size() < K)
972 queue.
push( make_float2( binary_cast<float>(i), d2 ) );
975 if (queue.
size() == K)
976 max_dist2 = queue.
top().y;
984 KD_KNN_STATS_ADD( node_pops, 1u );
985 const float4 stack_node = stack[ --stackp ];
987 cdist = make_float3( stack_node.x, stack_node.y, stack_node.z );
989 if (sq_length( cdist ) < max_dist2)
998 const float split_dist =
comp( query, split_dim ) - split_plane;
1000 const uint32 select = split_dist <= 0.0f ? 0u : 1u;
1005 float3 cdist_far = cdist;
1006 set_comp( cdist_far, split_dim, split_dist );
1009 const float dist_far2 = sq_length( cdist_far );
1011 if (dist_far2 < max_dist2)
1013 KD_KNN_STATS_ADD( node_pushes, 1u );
1019 const float4 last_cdist = stack[ stackp-1 ];
1020 const float last_dist2 = sq_length( last_cdist );
1022 const uint32 index =
1023 last_dist2 < dist_far2 ? stackp-1 : stackp;
1025 if (last_dist2 < dist_far2)
1026 stack[ stackp ] = last_cdist;
1028 stack[ index ] = make_float4(
1037 stack[ stackp++ ] = make_float4(
1045 KNN_REORDER_STACK(4, 3);
1046 KNN_REORDER_STACK(2, 1);
1047 KNN_REORDER_STACK(4, 2);
1048 KNN_REORDER_STACK(3, 1);
1049 KNN_REORDER_STACK(3, 2);
1057 for (uint32 i = 0; i < K; ++i)
1058 ((float2*)results)[i] = queue[i];
1061 template <u
int32 DIM>
1067 template <
typename VectorType,
typename Po
intIterator>
1068 __device__
static void lookup(
1069 const VectorType query,
1071 const uint2* kd_ranges,
1072 const uint2* kd_leaves,
1073 const PointIterator kd_points,
1076 lookup_2d( query, kd_nodes, kd_ranges, kd_leaves, kd_points, results );
1079 template <u
int32 K,
typename VectorType,
typename Po
intIterator>
1080 __device__
static void lookup(
1081 const VectorType query,
1083 const uint2* kd_ranges,
1084 const uint2* kd_leaves,
1085 const PointIterator kd_points,
1088 lookup_2d<K>( query, kd_nodes, kd_ranges, kd_leaves, kd_points, results );
1095 template <
typename VectorType,
typename Po
intIterator>
1096 __device__
static void lookup(
1097 const VectorType query,
1099 const uint2* kd_ranges,
1100 const uint2* kd_leaves,
1101 const PointIterator kd_points,
1104 lookup_3d( query, kd_nodes, kd_ranges, kd_leaves, kd_points, results );
1107 template <u
int32 K,
typename VectorType,
typename Po
intIterator>
1108 __device__
static void lookup(
1109 const VectorType query,
1111 const uint2* kd_ranges,
1112 const uint2* kd_leaves,
1113 const PointIterator kd_points,
1116 lookup_3d<K>( query, kd_nodes, kd_ranges, kd_leaves, kd_points, results );
1120 template <u
int32 DIM,
typename QueryIterator,
typename Po
intIterator>
1121 __global__
void lookup_kernel_1(
1122 const uint32 n_points,
1123 const QueryIterator points_begin,
1125 const uint2* kd_ranges,
1126 const uint2* kd_leaves,
1127 const PointIterator kd_points,
1130 const uint32 grid_size = gridDim.x * blockDim.x;
1133 for (uint32 base_idx = blockIdx.x * blockDim.x;
1134 base_idx < n_points;
1135 base_idx += grid_size)
1137 const uint32 index = threadIdx.x + base_idx;
1138 if (index >= n_points)
1142 points_begin[ index ],
1151 template <u
int32 DIM, u
int32 K,
typename QueryIterator,
typename Po
intIterator>
1153 __global__
void lookup_kernel(
1154 const uint32 n_points,
1155 const QueryIterator points_begin,
1157 const uint2* kd_ranges,
1158 const uint2* kd_leaves,
1159 const PointIterator kd_points,
1162 const uint32 grid_size = gridDim.x * blockDim.x;
1165 for (uint32 base_idx = blockIdx.x * blockDim.x;
1166 base_idx < n_points;
1167 base_idx += grid_size)
1169 const uint32 index = threadIdx.x + base_idx;
1170 if (index >= n_points)
1174 points_begin[ index ],
1179 results + index*K );
1193 template <u
int32 DIM>
1194 template <
typename QueryIterator,
typename Po
intIterator>
1196 const QueryIterator points_begin,
1197 const QueryIterator points_end,
1199 const uint2* kd_ranges,
1200 const uint2* kd_leaves,
1201 const PointIterator kd_points,
1204 const uint32 n_points = uint32( points_end - points_begin );
1206 const uint32 BLOCK_SIZE = 128;
1207 const uint32 max_blocks = (uint32)cuda::max_active_blocks(
1208 kd_knn::lookup_kernel_1<DIM,QueryIterator,PointIterator>, BLOCK_SIZE, 0);
1209 const uint32 n_blocks = cugar::min( max_blocks, uint32(n_points + BLOCK_SIZE-1) / BLOCK_SIZE );
1211 kd_knn::lookup_kernel_1<DIM> <<<n_blocks,BLOCK_SIZE>>> (
1231 template <u
int32 DIM>
1232 template <u
int32 K,
typename QueryIterator,
typename Po
intIterator>
1234 const QueryIterator points_begin,
1235 const QueryIterator points_end,
1237 const uint2* kd_ranges,
1238 const uint2* kd_leaves,
1239 const PointIterator kd_points,
1242 const uint32 n_points = uint32( points_end - points_begin );
1244 const uint32 BLOCK_SIZE = KNN_LOOKUP_BLOCK_SIZE;
1245 const uint32 max_blocks = (uint32)cuda::max_active_blocks(
1246 kd_knn::lookup_kernel<DIM,K,QueryIterator,PointIterator>, BLOCK_SIZE, 0);
1247 const uint32 n_blocks = cugar::min( max_blocks, uint32(n_points + BLOCK_SIZE-1) / BLOCK_SIZE );
1249 kd_knn::lookup_kernel<DIM,K> <<<n_blocks,BLOCK_SIZE>>> (
Definition: vector_view.h:87
Definition: knn_inline.h:1062
void run(const QueryIterator points_begin, const QueryIterator points_end, const Kd_node *kd_nodes, const uint2 *kd_ranges, const uint2 *kd_leaves, const PointIterator kd_points, Result *results)
Definition: knn_inline.h:1195
CUGAR_HOST_DEVICE uint32 is_leaf() const
Definition: kd_node.h:142
Definition: knn_inline.h:380
Definition: kd_node.h:110
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE uint32 size() const
Definition: priority_queue_inline.h:51
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE Key & top()
Definition: priority_queue_inline.h:106
Definition: knn_inline.h:406
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE void push(const Key key)
Definition: priority_queue_inline.h:59
CUGAR_HOST_DEVICE uint32 get_split_dim() const
Definition: kd_node.h:187
Definition: priority_queue.h:89
Define a vector_view POD type and plain_view() for std::vector.
Definition: diff.h:38
CUGAR_HOST_DEVICE uint32 get_leaf_index() const
Definition: kd_node.h:154
CUGAR_HOST_DEVICE float get_split_plane() const
Definition: kd_node.h:191
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE Out binary_cast(const In in)
Definition: types.h:288
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE uint8 comp(const uchar2 a, const char c)
Definition: numbers.h:218
CUGAR_HOST_DEVICE uint32 get_child_offset() const
Definition: kd_node.h:148
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE void pop()
Definition: priority_queue_inline.h:81