31 #include <cugar/basic/cuda/pointers.h> 35 FERMAT_DEVICE FERMAT_FORCEINLINE
45 FERMAT_DEVICE FERMAT_FORCEINLINE
56 float_pointer cdf = cdfs + cell_slot * cluster_count;
57 const uint32 cluster_idx =
cugar::upper_bound_index( cugar::min( z, one ) * cdf[cluster_count-1], cdf, cluster_count );
58 const float cdf_begin = cluster_idx ? cdf[cluster_idx - 1] : 0.0f;
59 const float cdf_end = cdf[cluster_idx];
60 const float cluster_pdf = cdf_end - cdf_begin;
63 const float cluster_z = (z - cdf_begin) / cluster_pdf;
64 const uint32 cluster_offset = uint_pointer(cluster_offsets)[cluster_idx];
65 const uint32 cluster_size = uint_pointer(cluster_offsets)[cluster_idx+1] - cluster_offset;
66 const uint32 index = cluster_offset +
cugar::quantize( cugar::min( cluster_z, one ), cluster_size );
69 *pdf = cluster_pdf / float(cluster_size);
71 *out_cluster_idx = cluster_idx;
78 FERMAT_DEVICE FERMAT_FORCEINLINE
82 const float* cdf = cdfs + cell_slot * cluster_count;
84 const float cdf_begin = cluster_idx ? cdf[cluster_idx - 1] : 0.0f;
85 const float cdf_end = cdf[cluster_idx];
86 const float cluster_pdf = cdf_end - cdf_begin;
89 const uint32 cluster_offset = cluster_offsets[cluster_idx];
90 const uint32 cluster_size = cluster_offsets[cluster_idx+1] - cluster_offset;
93 return cluster_pdf / float(cluster_size);
98 FERMAT_DEVICE FERMAT_FORCEINLINE
101 const float*
pdf = pdfs + cell_slot * cluster_count;
104 for (uint32 i = 0; i < 5; ++i)
106 const float cur = cugar::cuda::load<cugar::cuda::LOAD_VOLATILE>(pdf + cluster_idx);
107 const float val = cur * (1.0f - alpha) + update_val * alpha;
109 if (atomicCAS((uint32*)pdf + cluster_idx,
cugar::binary_cast<uint32>(cur), cugar::binary_cast<uint32>(val)) == cugar::binary_cast<uint32>(cur))
116 FERMAT_DEVICE FERMAT_FORCEINLINE
126 FERMAT_DEVICE FERMAT_FORCEINLINE
132 const uint32 cluster_count = cluster_counts[cell_slot];
133 const uint32* ends = cluster_ends + cell_slot * init_cluster_count;
136 const float* cdf = cdfs + cell_slot * init_cluster_count;
137 const uint32 cluster_idx =
cugar::upper_bound_index( cugar::min( z, one ) * cdf[cluster_count-1], cdf, cluster_count );
138 const float cdf_begin = cluster_idx ? cdf[cluster_idx - 1] : 0.0f;
139 const float cdf_end = cdf[cluster_idx];
140 const float cluster_pdf = cdf_end - cdf_begin;
143 const float cluster_z = (z - cdf_begin) / cluster_pdf;
144 const uint32 cluster_offset = cluster_idx ? ends[cluster_idx-1] : 0u;
145 const uint32 cluster_size = ends[cluster_idx] - cluster_offset;
146 const uint32 index = cluster_offset +
cugar::quantize( cugar::min( cluster_z, one ), cluster_size );
149 *pdf = cluster_pdf / float(cluster_size);
151 *out_cluster_idx = cluster_idx;
158 FERMAT_DEVICE FERMAT_FORCEINLINE
162 const uint32 cluster_count = cluster_counts[cell_slot];
163 const uint32* ends = cluster_ends + cell_slot * init_cluster_count;
166 const float* cdf = cdfs + cell_slot * init_cluster_count;
168 const float cdf_begin = cluster_idx ? cdf[cluster_idx - 1] : 0.0f;
169 const float cdf_end = cdf[cluster_idx];
170 const float cluster_pdf = cdf_end - cdf_begin;
173 const uint32 cluster_offset = cluster_idx ? ends[cluster_idx-1] : 0u;
174 const uint32 cluster_size = ends[cluster_idx] - cluster_offset;
177 return cluster_pdf / float(cluster_size);
182 FERMAT_DEVICE FERMAT_FORCEINLINE
185 const float*
pdf = pdfs + cell_slot * init_cluster_count;
188 for (uint32 i = 0; i < 5; ++i)
190 const float cur = cugar::cuda::load<cugar::cuda::LOAD_VOLATILE>(pdf + cluster_idx);
191 const float val = cur * (1.0f - alpha) + update_val * alpha;
193 if (atomicCAS((uint32*)pdf + cluster_idx,
cugar::binary_cast<uint32>(cur), cugar::binary_cast<uint32>(val)) == cugar::binary_cast<uint32>(cur))
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE index_type upper_bound_index(const Value x, Iterator begin, const index_type n)
Definition: algorithms.h:193
FERMAT_DEVICE float pdf(const uint32 cell_slot, const uint32 index) const
Definition: clustered_rl_inline.h:159
CUGAR_HOST_DEVICE uint32 quantize(const float x, const uint32 n)
Definition: numbers.h:600
Definition: pointers.h:125
FERMAT_DEVICE uint32 find_slot(const uint64 key)
Definition: clustered_rl_inline.h:117
CUGAR_DEVICE void insert(const KeyT key, const HashT hash_code)
Definition: hash.h:636
FERMAT_DEVICE uint32 sample(const uint32 cell_slot, const float z, float *pdf, uint32 *cluster_idx) const
Definition: clustered_rl_inline.h:127
FERMAT_DEVICE float pdf(const uint32 cell_slot, const uint32 index) const
Definition: clustered_rl_inline.h:79
FERMAT_DEVICE void update(const uint32 cell_slot, const uint32 cluster_idx, const float val, const float alpha=0.05f)
Definition: clustered_rl_inline.h:99
FERMAT_DEVICE uint32 sample(const uint32 cell_slot, const float z, float *pdf, uint32 *cluster_idx) const
Definition: clustered_rl_inline.h:46
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE Out binary_cast(const In in)
Definition: types.h:288
FERMAT_DEVICE void update(const uint32 cell_slot, const uint32 cluster_idx, const float val, const float alpha=0.05f)
Definition: clustered_rl_inline.h:183
FERMAT_DEVICE uint32 find_slot(const uint64 key)
Definition: clustered_rl_inline.h:36
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE uint32 hash(uint32 a)
Definition: numbers.h:649