Fermat
clustered_rl_inline.h
1 /*
2  * Fermat
3  *
4  * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #pragma once
30 
31 #include <cugar/basic/cuda/pointers.h>
32 
35 FERMAT_DEVICE FERMAT_FORCEINLINE
36 uint32 ClusteredRLView::find_slot(const uint64 key)
37 {
38  uint32 slot;
39  hashmap.insert(key, cugar::hash(key/HashMap::BUCKET_SIZE), &slot);
40  return slot;
41 }
42 
43 // given a cell and a random number, sample an item
44 //
45 FERMAT_DEVICE FERMAT_FORCEINLINE
46 uint32 ClusteredRLView::sample(const uint32 cell_slot, const float z, float* pdf, uint32* out_cluster_idx) const
47 {
48  const float one = cugar::binary_cast<float>(FERMAT_ALMOST_ONE_AS_INT);
49 
50  //typedef const float* float_pointer;
51  //typedef const uint32* uint_pointer;
54 
55  // 1. sample a cluster according to the CDF
56  float_pointer cdf = cdfs + cell_slot * cluster_count;
57  const uint32 cluster_idx = cugar::upper_bound_index( cugar::min( z, one ) * cdf[cluster_count-1], cdf, cluster_count );
58  const float cdf_begin = cluster_idx ? cdf[cluster_idx - 1] : 0.0f;
59  const float cdf_end = cdf[cluster_idx];
60  const float cluster_pdf = cdf_end - cdf_begin;
61 
62  // 2. select a VTL uniformly within the cluster and sample that uniformly
63  const float cluster_z = (z - cdf_begin) / cluster_pdf;
64  const uint32 cluster_offset = uint_pointer(cluster_offsets)[cluster_idx];
65  const uint32 cluster_size = uint_pointer(cluster_offsets)[cluster_idx+1] - cluster_offset;
66  const uint32 index = cluster_offset + cugar::quantize( cugar::min( cluster_z, one ), cluster_size );
67 
68  // 3. compute the pdf
69  *pdf = cluster_pdf / float(cluster_size);
70 
71  *out_cluster_idx = cluster_idx;
72 
73  return index;
74 }
75 
76 // given a cell and an item's index, return the sampling pdf of that item
77 //
78 FERMAT_DEVICE FERMAT_FORCEINLINE
79 float ClusteredRLView::pdf(const uint32 cell_slot, const uint32 index) const
80 {
81  // 1. find the cluster containing this index
82  const float* cdf = cdfs + cell_slot * cluster_count;
83  const uint32 cluster_idx = cugar::upper_bound_index( index, cluster_offsets + 1, cluster_count );
84  const float cdf_begin = cluster_idx ? cdf[cluster_idx - 1] : 0.0f;
85  const float cdf_end = cdf[cluster_idx];
86  const float cluster_pdf = cdf_end - cdf_begin;
87 
88  // 2. compute the cluster size
89  const uint32 cluster_offset = cluster_offsets[cluster_idx];
90  const uint32 cluster_size = cluster_offsets[cluster_idx+1] - cluster_offset;
91 
92  // 3. compute the pdf
93  return cluster_pdf / float(cluster_size);
94 }
95 
96 // update the value corresponding to sampled cluster
97 //
98 FERMAT_DEVICE FERMAT_FORCEINLINE
99 void ClusteredRLView::update(const uint32 cell_slot, const uint32 cluster_idx, const float update_val, const float alpha)
100 {
101  const float* pdf = pdfs + cell_slot * cluster_count;
102 
103  //while (1)
104  for (uint32 i = 0; i < 5; ++i) // perform 5 attempts only!
105  {
106  const float cur = cugar::cuda::load<cugar::cuda::LOAD_VOLATILE>(pdf + cluster_idx);
107  const float val = cur * (1.0f - alpha) + update_val * alpha;
108 
109  if (atomicCAS((uint32*)pdf + cluster_idx, cugar::binary_cast<uint32>(cur), cugar::binary_cast<uint32>(val)) == cugar::binary_cast<uint32>(cur))
110  break;
111  }
112 }
113 
116 FERMAT_DEVICE FERMAT_FORCEINLINE
117 uint32 AdaptiveClusteredRLView::find_slot(const uint64 key)
118 {
119  uint32 slot;
120  hashmap.insert(key, cugar::hash(key/HashMap::BUCKET_SIZE), &slot);
121  return slot;
122 }
123 
124 // given a cell and a random number, sample an item
125 //
126 FERMAT_DEVICE FERMAT_FORCEINLINE
127 uint32 AdaptiveClusteredRLView::sample(const uint32 cell_slot, const float z, float* pdf, uint32* out_cluster_idx) const
128 {
129  const float one = cugar::binary_cast<float>(FERMAT_ALMOST_ONE_AS_INT);
130 
131  // find the actual cluster count
132  const uint32 cluster_count = cluster_counts[cell_slot];
133  const uint32* ends = cluster_ends + cell_slot * init_cluster_count;
134 
135  // 1. sample a cluster according to the CDF
136  const float* cdf = cdfs + cell_slot * init_cluster_count;
137  const uint32 cluster_idx = cugar::upper_bound_index( cugar::min( z, one ) * cdf[cluster_count-1], cdf, cluster_count );
138  const float cdf_begin = cluster_idx ? cdf[cluster_idx - 1] : 0.0f;
139  const float cdf_end = cdf[cluster_idx];
140  const float cluster_pdf = cdf_end - cdf_begin;
141 
142  // 2. select a VTL uniformly within the cluster and sample that uniformly
143  const float cluster_z = (z - cdf_begin) / cluster_pdf;
144  const uint32 cluster_offset = cluster_idx ? ends[cluster_idx-1] : 0u;
145  const uint32 cluster_size = ends[cluster_idx] - cluster_offset;
146  const uint32 index = cluster_offset + cugar::quantize( cugar::min( cluster_z, one ), cluster_size );
147 
148  // 3. compute the pdf
149  *pdf = cluster_pdf / float(cluster_size);
150 
151  *out_cluster_idx = cluster_idx;
152 
153  return index;
154 }
155 
156 // given a cell and an item's index, return the sampling pdf of that item
157 //
158 FERMAT_DEVICE FERMAT_FORCEINLINE
159 float AdaptiveClusteredRLView::pdf(const uint32 cell_slot, const uint32 index) const
160 {
161  // find the actual cluster count
162  const uint32 cluster_count = cluster_counts[cell_slot];
163  const uint32* ends = cluster_ends + cell_slot * init_cluster_count;
164 
165  // 1. find the cluster containing this index
166  const float* cdf = cdfs + cell_slot * init_cluster_count;
167  const uint32 cluster_idx = cugar::upper_bound_index( index, ends, cluster_count );
168  const float cdf_begin = cluster_idx ? cdf[cluster_idx - 1] : 0.0f;
169  const float cdf_end = cdf[cluster_idx];
170  const float cluster_pdf = cdf_end - cdf_begin;
171 
172  // 2. compute the cluster size
173  const uint32 cluster_offset = cluster_idx ? ends[cluster_idx-1] : 0u;
174  const uint32 cluster_size = ends[cluster_idx] - cluster_offset;
175 
176  // 3. compute the pdf
177  return cluster_pdf / float(cluster_size);
178 }
179 
180 // update the value corresponding to sampled cluster
181 //
182 FERMAT_DEVICE FERMAT_FORCEINLINE
183 void AdaptiveClusteredRLView::update(const uint32 cell_slot, const uint32 cluster_idx, const float update_val, const float alpha)
184 {
185  const float* pdf = pdfs + cell_slot * init_cluster_count;
186 
187  //while (1)
188  for (uint32 i = 0; i < 5; ++i) // perform 5 attempts only!
189  {
190  const float cur = cugar::cuda::load<cugar::cuda::LOAD_VOLATILE>(pdf + cluster_idx);
191  const float val = cur * (1.0f - alpha) + update_val * alpha;
192 
193  if (atomicCAS((uint32*)pdf + cluster_idx, cugar::binary_cast<uint32>(cur), cugar::binary_cast<uint32>(val)) == cugar::binary_cast<uint32>(cur))
194  break;
195  }
196 }
197 
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE index_type upper_bound_index(const Value x, Iterator begin, const index_type n)
Definition: algorithms.h:193
FERMAT_DEVICE float pdf(const uint32 cell_slot, const uint32 index) const
Definition: clustered_rl_inline.h:159
CUGAR_HOST_DEVICE uint32 quantize(const float x, const uint32 n)
Definition: numbers.h:600
Definition: pointers.h:125
FERMAT_DEVICE uint32 find_slot(const uint64 key)
Definition: clustered_rl_inline.h:117
CUGAR_DEVICE void insert(const KeyT key, const HashT hash_code)
Definition: hash.h:636
FERMAT_DEVICE uint32 sample(const uint32 cell_slot, const float z, float *pdf, uint32 *cluster_idx) const
Definition: clustered_rl_inline.h:127
FERMAT_DEVICE float pdf(const uint32 cell_slot, const uint32 index) const
Definition: clustered_rl_inline.h:79
FERMAT_DEVICE void update(const uint32 cell_slot, const uint32 cluster_idx, const float val, const float alpha=0.05f)
Definition: clustered_rl_inline.h:99
FERMAT_DEVICE uint32 sample(const uint32 cell_slot, const float z, float *pdf, uint32 *cluster_idx) const
Definition: clustered_rl_inline.h:46
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE Out binary_cast(const In in)
Definition: types.h:288
FERMAT_DEVICE void update(const uint32 cell_slot, const uint32 cluster_idx, const float val, const float alpha=0.05f)
Definition: clustered_rl_inline.h:183
FERMAT_DEVICE uint32 find_slot(const uint64 key)
Definition: clustered_rl_inline.h:36
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE uint32 hash(uint32 a)
Definition: numbers.h:649