Fermat
tiled_sampling.h
1 /*
2  * Fermat
3  *
4  * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #pragma once
30 
31 #include <types.h>
32 #include <cugar/basic/numbers.h>
33 #include <algorithm>
34 #include <vector>
35 
38 
41 
44 inline float random()
45 {
46  return rand() / float(RAND_MAX);
47 }
48 
51 inline uint32 irandom(const uint32 N)
52 {
53  const float r = random();
54  return cugar::min(uint32(r * N), N - 1);
55 }
56 
58 {
59  FERMAT_HOST_DEVICE strided_vec() {}
60  FERMAT_HOST_DEVICE strided_vec(float* _ptr, const uint32 _off, const uint32 _stride) : base(_ptr), off(_off), stride(_stride) {}
61 
62  FERMAT_HOST_DEVICE const float& operator[] (const uint32 i) const { return base[off + i*stride]; }
63  FERMAT_HOST_DEVICE float& operator[] (const uint32 i) { return base[off + i*stride]; }
64 
65  float* base;
66  uint32 off;
67  uint32 stride;
68 };
69 
70 //
71 // Represents a 3d lattice of 3d points.
72 // The samples are arranged in a SOA layout, so that the point (x,y,z).i coordinates are located at (x + y*X + z*X*Y*3 + i*X*Y)
73 // (i.e. if the set has Z slices of X*Y points, there will be Z*3 consecutive arrays of X*Y floats).
74 //
76 {
77  sample_set_3d(const uint32 _x, const uint32 _y, float* _samples) :
78  X(_x), Y(_y), samples(_samples) {}
79 
80  uint32 size() const { return X*Y; }
81 
82  strided_vec operator() (const uint32 x, const uint32 y, const uint32 z) { return strided_vec(samples, z*X*Y * 3 + X*y + x, X*Y); }
83 
84  uint32 X;
85  uint32 Y;
86  float* samples;
87 };
88 
92 inline void mj_3d(uint32 X, uint32 Y, uint32 Z, sample_set_3d p, const uint32 slice)
93 {
94 #if 1
95  for (uint32 k = 0; k < Z; ++k)
96  {
97  for (uint32 j = 0; j < Y; ++j)
98  {
99  for (uint32 i = 0; i < X; ++i)
100  {
101  p(i, j, k)[slice + 0] = (i + (j + (k + random()) / Z) / Y) / X;
102  p(i, j, k)[slice + 1] = (j + (k + (i + random()) / X) / Z) / Y;
103  p(i, j, k)[slice + 2] = (k + (i + (j + random()) / Y) / X) / Z;
104  }
105  }
106  }
107 
108 #if 1
109  //
110  // Exchange the points among Z slices
111  //
112  for (uint32 k = 0; k < Z; ++k)
113  {
114  for (uint32 j = 0; j < Y; ++j)
115  {
116  for (uint32 i = 0; i < X; ++i)
117  {
118  const uint32 r = k + irandom(Z - k);
119 
120  std::swap(p(i, j, k)[slice+0], p(i, j, r)[slice+0]);
121  std::swap(p(i, j, k)[slice+1], p(i, j, r)[slice+1]);
122  std::swap(p(i, j, k)[slice+2], p(i, j, r)[slice+2]);
123  }
124  }
125  }
126  //
127  // Process Z slices
128  //
129  for (uint32 k = 0; k < Z; ++k)
130  {
131  //
132  // Exchange the X components among Y columns (Z/Y/X)
133  //
134  for (uint32 j = 0; j < Y; ++j)
135  {
136  const uint32 r = j + irandom(Y - j); // use correlated sampling
137 
138  for (uint32 i = 0; i < X; ++i) // x is the fastest variable
139  {
140  //const uint32 r = j + irandom(Y - j);
141  std::swap(p(i, j, k)[slice],
142  p(i, r, k)[slice]);
143  }
144  }
145 
146  //
147  // Exchange the Y components among X rows (Z/Y/X)
148  //
149  for (uint32 i = 0; i < X; ++i)
150  {
151  const uint32 r = i + irandom(X - i); // use correlated sampling
152 
153  for (uint32 j = 0; j < Y; ++j) // y is the fastest variable
154  {
155  //const uint r = i + irandom(X - i);
156  std::swap(p(i, j, k)[slice+1],
157  p(r, j, k)[slice+1]);
158  }
159  }
160  }
161 #else
162  //
163  // Exchange the X components among Z slices (Z/Y/X)
164  //
165  for (uint32 k = 0; k < Z; ++k)
166  {
167  const uint32 r = k + irandom(Z - k);
168 
169  for (uint32 j = 0; j < Y; ++j)
170  {
171  //const uint32 r = k + irandom(Z - k);
172 
173  for (uint32 i = 0; i < X; ++i) // x is the fastest variable
174  {
175  //const uint32 r = k + irandom(Z - k);
176  std::swap(p(i, j, k)[slice],
177  p(i, j, r)[slice]);
178  }
179  }
180  }
181  //
182  // Exchange the Y components among X rows (X/Z/Y)
183  //
184  for (uint32 i = 0; i < X; ++i)
185  {
186  const uint32 r = i + irandom(X - i);
187 
188  for (uint32 k = 0; k < Z; ++k)
189  {
190  //const uint32 r = i + irandom(X - i);
191 
192  for (uint32 j = 0; j < Y; ++j) // y is the fastest variable
193  {
194  //const uint r = i + irandom(X - i);
195  std::swap(p(i, j, k)[slice + 1],
196  p(r, j, k)[slice + 1]);
197  }
198  }
199  }
200  //
201  // Exchange the Z components among Y columns (Y/X/Z)
202  //
203  for (uint32 j = 0; j < Y; ++j)
204  {
205  const uint32 r = j + irandom(Y - j);
206 
207  for (uint32 i = 0; i < X; ++i)
208  {
209  //const uint32 r = j + irandom(Y - j);
210 
211  for (uint32 k = 0; k < Z; ++k) // z is the fastest variable
212  {
213  //const uint32 r = j + irandom(Y - j);
214  std::swap(p(i, j, k)[slice + 2],
215  p(i, r, k)[slice + 2]);
216  }
217  }
218  }
219 #endif
220 
221 #else
222  //
223  // Build Z independent 2d-multi-jittered grids of (x,y) points, together with some randomly shuffled z components
224  //
225  for (uint32 k = 0; k < Z; ++k)
226  {
227  for (uint32 j = 0; j < Y; ++j)
228  {
229  for (uint32 i = 0; i < X; ++i)
230  {
231  p(i, j, k)[slice + 0] = (i + (j + random()) / Y) / X;
232  p(i, j, k)[slice + 1] = (j + (i + random()) / X) / Y;
233  p(i, j, k)[slice + 2] = (i + (j + random()) / Y) / X;
234  }
235  }
236  }
237  //
238  // Process Z slices
239  //
240  for (uint32 k = 0; k < Z; ++k)
241  {
242  //
243  // Exchange the X components among Y columns (Z/Y/X)
244  //
245  for (uint32 j = 0; j < Y; ++j)
246  {
247  const uint32 r = j + irandom(Y - j); // use correlated sampling
248 
249  for (uint32 i = 0; i < X; ++i) // x is the fastest variable
250  {
251  //const uint32 r = j + irandom(Y - j);
252  std::swap(p(i, j, k)[slice],
253  p(i, r, k)[slice]);
254  }
255  }
256  //
257  // Exchange the Y components among X rows (Z/Y/X)
258  //
259  for (uint32 i = 0; i < X; ++i)
260  {
261  const uint r = i + irandom(X - i); // use correlated sampling
262 
263  for (uint32 j = 0; j < Y; ++j) // y is the fastest variable
264  {
265  //const uint r = i + irandom(X - i);
266  std::swap(p(i, j, k)[slice + 1],
267  p(r, j, k)[slice + 1]);
268  }
269  }
270  // Exchange the Z components randomly
271  for (uint32 n = 0; n < X*Y; ++n)
272  {
273  const uint r = n + irandom(X*Y - n);
274 
275  std::swap(p(n % X, n / X, k)[slice + 2],
276  p(r % X, r / X, k)[slice + 2]);
277  }
278  }
279 #endif
280 }
281 
287 inline void build_tiled_samples_3d(const uint32 X, const uint32 Y, const uint32 Z, float* samples)
288 {
289  // build the sample set
290  sample_set_3d set( X, Y, samples );
291 
292  mj_3d( X, Y, Z, set, 0);
293 
294  // randomize the order of the samples within each 3d slice
295  const uint32 SLICE_SIZE = X*Y;
296 
297  for (uint32 z = 0; z < Z; ++z)
298  {
299  for (uint32 i = 0; i < SLICE_SIZE; ++i)
300  {
301  const uint32 r = i + irandom(SLICE_SIZE - i);
302 
303  std::swap(samples[z * SLICE_SIZE * 3 + i + 0 * SLICE_SIZE], samples[z * SLICE_SIZE * 3 + r + 0 * SLICE_SIZE]);
304  std::swap(samples[z * SLICE_SIZE * 3 + i + 1 * SLICE_SIZE], samples[z * SLICE_SIZE * 3 + r + 1 * SLICE_SIZE]);
305  std::swap(samples[z * SLICE_SIZE * 3 + i + 2 * SLICE_SIZE], samples[z * SLICE_SIZE * 3 + r + 2 * SLICE_SIZE]);
306  }
307  }
308 }
309 
312 inline void load_samples(const char* nameradix, const uint32 X, const uint32 Y, const uint32 Z, float* samples)
313 {
314  for (uint32 z = 0; z < Z; ++z)
315  {
316  char slicename[256];
317  sprintf(slicename, "%s-%u.dat", nameradix, z);
318 
319  FILE* file = fopen(slicename, "rb");
320  if (file == NULL)
321  break;
322 
323  std::vector<float3> slice(X*Y);
324  if (fread(&slice[0], sizeof(float3), X*Y, file) != X*Y)
325  break;
326  fclose(file);
327 
328  // convert from AoS to SoA
329  for (uint32 i = 0; i < X*Y; ++i)
330  {
331  samples[z * X*Y * 3 + i + 0 * X*Y] = slice[i].x;
332  samples[z * X*Y * 3 + i + 1 * X*Y] = slice[i].y;
333  samples[z * X*Y * 3 + i + 2 * X*Y] = slice[i].z;
334  }
335  fprintf(stderr, " loaded sample slice %u (%u x %u)\n", z, X, Y);
336  }
337 }
338 
Definition: tiled_sampling.h:57
void build_tiled_samples_3d(const uint32 X, const uint32 Y, const uint32 Z, float *samples)
Definition: tiled_sampling.h:287
float random()
Definition: tiled_sampling.h:44
Definition: tiled_sampling.h:75
void load_samples(const char *nameradix, const uint32 X, const uint32 Y, const uint32 Z, float *samples)
Definition: tiled_sampling.h:312
void mj_3d(uint32 X, uint32 Y, uint32 Z, sample_set_3d p, const uint32 slice)
Definition: tiled_sampling.h:92
uint32 irandom(const uint32 N)
Definition: tiled_sampling.h:51