Fermat
psfpt_vertex_processor.h
1 /*
2  * Fermat
3  *
4  * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #pragma once
30 
31 #include <pathtracer_core.h>
32 #include <spatial_hash.h>
33 #include <cugar/basic/cuda/hash.h>
34 
37 
40 
50 {
51  static const uint32 DIFFUSE_COMP = 0x1u;
52  static const uint32 GLOSSY_COMP = 0x2u;
53  static const uint32 ALL_COMPS = 0x3u;
54 
56  union CacheInfo
57  {
58  const static uint32 INVALID = 0xFFFFFFFFu;
59  const static uint32 INVALID_SLOT = 0xFFFFFFFFu & ((1u << 29) - 1u);
60 
61  FERMAT_HOST_DEVICE CacheInfo() : packed(INVALID) {}
62  FERMAT_HOST_DEVICE CacheInfo(const uint32 _packed) : packed(_packed) {}
63  FERMAT_HOST_DEVICE CacheInfo(const uint32 _pixel, const uint32 _comp, const uint32 _new_entry) : pixel(_pixel), comp(_comp), new_entry(_new_entry){}
64 
65  FERMAT_HOST_DEVICE
66  bool is_invalid() const { return pixel == INVALID_SLOT; }
67 
68  FERMAT_HOST_DEVICE
69  bool is_valid() const { return pixel != INVALID_SLOT; }
70 
71  uint32 packed;
72  struct
73  {
74  uint32 pixel : 29;
75  uint32 comp : 2;
76  uint32 new_entry : 1;
77  };
78 
79  FERMAT_HOST_DEVICE operator uint32() const { return packed; }
80  };
82 
83  FERMAT_HOST_DEVICE FERMAT_FORCEINLINE
84  PSFPTVertexProcessor(float _firefly_filter = 1000.0f) : firefly_filter(_firefly_filter) {}
85 
86  FERMAT_HOST_DEVICE FERMAT_FORCEINLINE
87  cugar::Vector3f clamp_sample(const cugar::Vector3f v)
88  {
89  return cugar::is_finite(v) ? cugar::min( v, firefly_filter ) : cugar::Vector3f(0.0f);
90  }
91 
101  template <typename TPTContext>
102  FERMAT_DEVICE
104  TPTContext& context,
105  const RenderingContextView& renderer,
106  const PixelInfo pixel_info,
107  const EyeVertex& ev,
108  const float cone_radius,
109  const cugar::Bbox3f scene_bbox,
110  const uint32 prev_vertex_info,
111  const cugar::Vector3f w,
112  const float p_prev)
113  {
114  // access the vertex info we returned at the previous vertex along this path (sampled from the eye)
115  CacheInfo prev_cache_info(prev_vertex_info);
116 
117  // determine the cache slot
118  uint32 new_cache_slot = prev_cache_info.pixel;
119  bool new_cache_entry = false;
120 
121  // We should create a new cache entry if and only if:
122  // 1. none has been created so far along this path
123  // 2. the depth is sufficient
124  // 3. other conditions like the hit being at a minimum distance and the sampling probability being low enough (indicating a rough-enough interaction) hold
125  if (prev_cache_info.is_invalid() && context.in_bounce >= context.options.psf_depth /*&& hit.t >= context.options.psf_min_dist*/ && p_prev < context.options.psf_max_prob)
126  {
127  // use decorrelated samples
128  #if 1
129  const uint32 pixel_hash = pixel_info.pixel + renderer.instance * renderer.res_x * renderer.res_y;
131 
132  const float jitter[6] = {
133  cugar::randfloat( 0u, pixel_hash ),
134  cugar::randfloat( 1u, pixel_hash ),
135  cugar::randfloat( 2u, pixel_hash ),
136  cugar::randfloat( 3u, pixel_hash ),
137  cugar::randfloat( 4u, pixel_hash ),
138  cugar::randfloat( 5u, pixel_hash ),
139  };
141  #else
142  const float jitter[6] = {0};
143  #endif
144 
146  // compute a spatial hash
147  const float cone_scale = context.options.psf_width;
148  const float filter_scale = (context.in_bounce == 0.0f ? 2.0f : 1.0f);
149 
150  // compute a hash key based on jittered hashing of the position and normal coordinates
151  const uint64 shading_key = spatial_hash(
152  pixel_info.pixel,
153  ev.geom.position,
154  dot(ev.in, ev.geom.normal_s) > 0.0f ? ev.geom.normal_s : -ev.geom.normal_s,
155  ev.geom.tangent,
156  ev.geom.binormal,
157  context.bbox,
158  jitter,
159  cone_radius * cone_scale,
160  filter_scale);
162 
164  // insert into the hashmap using the computed hash key
165  if (context.psf_hashmap.insert(shading_key, cugar::hash(shading_key), &new_cache_slot) == true)
166  {
167  FERMAT_ASSERT(new_cache_slot < cugar::cuda::load<cugar::cuda::LOAD_VOLATILE>(context.psf_hashmap.count));
168  // initialize the cache entry
169  context.psf_values[new_cache_slot] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
170  }
171  FERMAT_ASSERT(new_cache_slot < cugar::cuda::load<cugar::cuda::LOAD_VOLATILE>(context.psf_hashmap.count));
172 
173  // increment the sample counter
174  cugar::atomic_add(&context.psf_values[new_cache_slot].w, 1.0f);
176 
178  // add two "references" to this sample, weighted by modulate( w, ev.bsdf.diffuse )
179  const cugar::Vector4f w_mod = modulate(cugar::Vector4f(w,0.0f), ev.material.diffuse);
180 
181  context.ref_queue.warp_append(
182  pixel_info,
183  CacheInfo(new_cache_slot, ALL_COMPS, 0),
184  (pixel_info.comp & Bsdf::kDiffuseMask) ? w_mod : cugar::Vector4f(0.0f),
185  (pixel_info.comp & Bsdf::kGlossyMask) && context.in_bounce ? w_mod : cugar::Vector4f(0.0f)
186  // at the first bounce, cache entries are only accumulated into the diffuse channel
187  );
189 
191  new_cache_entry = true;
193  }
194  return CacheInfo(new_cache_slot, 0, new_cache_entry);
195  }
197 
213  template <typename TPTContext>
214  FERMAT_DEVICE
216  const TPTContext& context,
217  const RenderingContextView& renderer,
218  const PixelInfo pixel_info,
219  const uint32 prev_vertex_info,
220  const uint32 vertex_info,
221  const EyeVertex& ev,
222  const cugar::Vector3f& f_d,
223  const cugar::Vector3f& f_g,
224  const cugar::Vector3f& w,
225  const cugar::Vector3f& f_L,
226  cugar::Vector3f& out_w_d,
227  cugar::Vector3f& out_w_g,
228  uint32& out_vertex_info)
229  {
230  const CacheInfo new_cache_info(vertex_info);
231 
232  const bool new_cache_entry = new_cache_info.new_entry;
233 
234  const CacheInfo out_cache_info = context.in_bounce < context.options.psf_depth ? CacheInfo(CacheInfo::INVALID) :
235  new_cache_entry ?
236  CacheInfo(new_cache_info.pixel, DIFFUSE_COMP, 0) : // cache the diffuse component only
237  CacheInfo(new_cache_info.pixel, ALL_COMPS, 0); // cache both diffuse and glossy components
238 
239  out_vertex_info = out_cache_info;
240 
241  // Three cases:
242  // 1. we are not doing any caching:
243  // 1.a: bounce = 0: we will accumulate the diffuse and glossy components separately to the frame-buffer
244  // 1.b: bounce > 0: we will accumulate the sum of the components to a single channel of the frame-buffer
245  // 2. this is a new cache entry (i.e. this is the first D vertex along a path), we demodulate the diffuse BSDF:
246  // out_w_d = f_d * f_L * G * mis_w
247  // and accumulate the glossy component to the glossy framebuffer
248  // 3. we are caching both the diffuse and glossy components:
249  // out_w_d = w * f_d * f_L * G * mis_w,
250  // out_w_g = w * f_g * f_L * G * mis_w;
251  // or rather, we could perform a single accumulation using (out_w_d + out_w_g) * f_L * G * mis_w
252  //
253  // Since in practice 1. and 3. end up in the same weights, this reduces to two cases:
254  // 1. this is a new and valid cache entry
255  // 2. all of the others
256  if (new_cache_entry && out_cache_info.is_valid())
257  {
258  out_w_d = demodulate( f_d, cugar::Vector4f(ev.material.diffuse).xyz() ) * f_L;
259  out_w_g = f_g * w.xyz() * f_L;
260  }
261  else
262  {
263  out_w_d = f_d * w.xyz() * f_L;
264  out_w_g = f_g * w.xyz() * f_L;
265  }
266  }
268 
283  template <typename TPTContext>
284  FERMAT_DEVICE
286  const TPTContext& context,
287  const RenderingContextView& renderer,
288  const PixelInfo pixel_info,
289  const uint32 prev_vertex_info,
290  const uint32 vertex_info,
291  const EyeVertex& ev,
292  const uint32 out_comp,
293  const cugar::Vector3f& g,
294  const cugar::Vector3f& w,
295  cugar::Vector3f& out_w,
296  uint32& out_vertex_info)
297  {
298  const CacheInfo prev_cache_info(prev_vertex_info);
299  const CacheInfo new_cache_info(vertex_info);
300 
301  const uint32 new_cache_slot = new_cache_info.pixel;
302  const bool new_cache_entry = new_cache_info.new_entry;
303 
304  const CacheInfo out_cache_info = prev_cache_info.is_invalid() && (out_comp & Bsdf::kGlossyMask) ?
305  prev_cache_info : // retain the invalid cache location
306  CacheInfo(new_cache_slot, ALL_COMPS, 0); // cache both diffuse and glossy components
307 
308  out_vertex_info = out_cache_info;
309 
310  // if this is a new "diffuse cache ray", i.e. if new_cache_entry && (out_comp & Bsdf::kDiffuseMask),
311  // we have to demodulate the BSDF weight. This will be compensated by a correctly weighted reference to the queue entry.
312  // The proper solution would be to use SH to encode incoming radiance.
313  if (new_cache_entry && (out_comp & Bsdf::kDiffuseMask))
314  out_w = demodulate(g, cugar::Vector4f(ev.material.diffuse).xyz());
315  else
316  out_w = g * w.xyz();
317  }
319 
330  template <typename TPTContext>
331  FERMAT_DEVICE
333  const TPTContext& context,
334  RenderingContextView& renderer,
335  const PixelInfo pixel_info,
336  const uint32 prev_vertex_info,
337  const uint32 vertex_info,
338  const EyeVertex& ev,
339  const cugar::Vector3f& out_w)
340  {
341  FBufferView& fb = renderer.fb;
342  FBufferChannelView& composited_channel = fb(FBufferDesc::COMPOSITED_C);
343  FBufferChannelView& direct_channel = fb(FBufferDesc::DIRECT_C);
344  FBufferChannelView& diffuse_channel = fb(FBufferDesc::DIFFUSE_C);
345  FBufferChannelView& specular_channel = fb(FBufferDesc::SPECULAR_C);
346 
347  // access the vertex info from the previous vertex
348  const CacheInfo prev_cache_info(prev_vertex_info);
349 
350  // clamp the sample value to avoid extreme fire-flies
351  const cugar::Vector3f clamped_out_w = clamp_sample( out_w );
352 
353  // unpack the pixel index & sampling component
354  const uint32 pixel_index = pixel_info.pixel;
355  const uint32 pixel_comp = pixel_info.comp;
356  const float frame_weight = context.frame_weight;
357 
358  // accumulate to the image only if prev_cache_info is invalid
359  if (prev_cache_info.is_invalid())
360  {
361  add_in<false>(composited_channel, pixel_index, clamped_out_w, frame_weight);
362 
363  // accumulate the per-component value to the proper output channel
364  if (context.in_bounce == 0)
365  add_in<false>(direct_channel, pixel_index, clamped_out_w, frame_weight);
366  else
367  {
368  if (pixel_comp & Bsdf::kDiffuseMask) add_in<true>(diffuse_channel, pixel_index, clamped_out_w, frame_weight);
369  if (pixel_comp & Bsdf::kGlossyMask) add_in<true>(specular_channel, pixel_index, clamped_out_w, frame_weight);
370  }
371  }
372  else
373  {
374  // accumulate to the cache entry
375  cugar::atomic_add(&context.psf_values[prev_cache_info.pixel].x, clamped_out_w.x);
376  cugar::atomic_add(&context.psf_values[prev_cache_info.pixel].y, clamped_out_w.y);
377  cugar::atomic_add(&context.psf_values[prev_cache_info.pixel].z, clamped_out_w.z);
378  }
379  }
381 
393  template <typename TPTContext>
394  FERMAT_DEVICE
396  const TPTContext& context,
397  RenderingContextView& renderer,
398  const PixelInfo pixel_info,
399  const uint32 vertex_info,
400  const bool shadow_hit,
401  const cugar::Vector3f& w_d,
402  const cugar::Vector3f& w_g)
403  {
404  FBufferView& fb = renderer.fb;
405  FBufferChannelView& composited_channel = fb(FBufferDesc::COMPOSITED_C);
406  FBufferChannelView& direct_channel = fb(FBufferDesc::DIRECT_C);
407  FBufferChannelView& diffuse_channel = fb(FBufferDesc::DIFFUSE_C);
408  FBufferChannelView& specular_channel = fb(FBufferDesc::SPECULAR_C);
409 
410  // unpack the pixel index & sampling component
411  const uint32 pixel_index = pixel_info.pixel;
412  const uint32 pixel_comp = pixel_info.comp;
413  const float frame_weight = context.frame_weight;
414 
415  // access the packed vertex info
416  const CacheInfo cache_info(vertex_info);
417 
418  if (shadow_hit == false)
419  {
420  // check if the cache cell is valid
421  if (cache_info.is_valid())
422  {
424  const uint32 cache_slot = cache_info.pixel;
425 
426  // check whether to add both components to the cache or just the diffuse one
427  const cugar::Vector3f w = (cache_info.comp == DIFFUSE_COMP) ? w_d : w_d + w_g;
428 
429  cugar::atomic_add(&context.psf_values[cache_slot].x, w.x);
430  cugar::atomic_add(&context.psf_values[cache_slot].y, w.y);
431  cugar::atomic_add(&context.psf_values[cache_slot].z, w.z);
433 
435  // if the glossy component was left out, we need to add it to the framebuffer
436  if (cache_info.comp == DIFFUSE_COMP)
437  {
438  add_in<false>(composited_channel, pixel_index, clamp_sample( w_g ), frame_weight);
439 
440  // select the right channel
441  FBufferChannelView& fb_channel = context.in_bounce == 0 || (pixel_comp & Bsdf::kGlossyMask) ?
442  specular_channel :
443  diffuse_channel;
444 
445  add_in<true>(fb_channel, pixel_index, clamp_sample( w_g ), context.frame_weight);
446  }
448  }
449  else //if (context.pass_type == PSFPT::kFinalPass)
450  {
452  add_in<false>(composited_channel, pixel_index, clamp_sample( w_d + w_g ), frame_weight);
453 
454  if (context.in_bounce == 0)
455  {
456  // accumulate the per-component values to the respective output channels
457  add_in<true>(diffuse_channel, pixel_index, clamp_sample( w_d ), context.frame_weight);
458  add_in<true>(specular_channel, pixel_index, clamp_sample( w_g ), context.frame_weight);
459  }
460  else
461  {
462  // accumulate the aggregate value to the proper output channel (only one will be true)
463  if (pixel_comp & Bsdf::kDiffuseMask) add_in<true>(diffuse_channel, pixel_index, clamp_sample( w_d + w_g ), frame_weight);
464  if (pixel_comp & Bsdf::kGlossyMask) add_in<true>(specular_channel, pixel_index, clamp_sample( w_d + w_g ), frame_weight);
465  }
467  }
468  }
469  }
471 
472  float firefly_filter; // this value biases the render, but is needed to avoid extreme fireflies...
473 };
474 
FERMAT_DEVICE void accumulate_nee(const TPTContext &context, RenderingContextView &renderer, const PixelInfo pixel_info, const uint32 vertex_info, const bool shadow_hit, const cugar::Vector3f &w_d, const cugar::Vector3f &w_g)
[PSFPTVertexProcessor::accumulate_emissive]
Definition: psfpt_vertex_processor.h:395
float firefly_filter
[PSFPTVertexProcessor::accumulate_nee]
Definition: psfpt_vertex_processor.h:472
FERMAT_DEVICE uint32 preprocess_vertex(TPTContext &context, const RenderingContextView &renderer, const PixelInfo pixel_info, const EyeVertex &ev, const float cone_radius, const cugar::Bbox3f scene_bbox, const uint32 prev_vertex_info, const cugar::Vector3f w, const float p_prev)
Definition: psfpt_vertex_processor.h:103
FERMAT_HOST_DEVICE FERMAT_FORCEINLINE PSFPTVertexProcessor(float _firefly_filter=1000.0f)
[PSFPTVertexProcessor::CacheInfo]
Definition: psfpt_vertex_processor.h:84
Definition: psfpt_vertex_processor.h:49
FERMAT_DEVICE void compute_scattering_weights(const TPTContext &context, const RenderingContextView &renderer, const PixelInfo pixel_info, const uint32 prev_vertex_info, const uint32 vertex_info, const EyeVertex &ev, const uint32 out_comp, const cugar::Vector3f &g, const cugar::Vector3f &w, cugar::Vector3f &out_w, uint32 &out_vertex_info)
[PSFPTVertexProcessor::compute_nee_weights]
Definition: psfpt_vertex_processor.h:285
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE float atomic_add(float *value, const float op)
Definition: atomics.h:100
FERMAT_FORCEINLINE FERMAT_HOST_DEVICE uint64 spatial_hash(const cugar::Vector3f P, const cugar::Vector3f N, const cugar::Bbox3f bbox, const float samples[6], const float cone_radius, const uint32 normal_bits=4)
Definition: spatial_hash.h:44
Definition: framebuffer.h:274
FERMAT_DEVICE void accumulate_emissive(const TPTContext &context, RenderingContextView &renderer, const PixelInfo pixel_info, const uint32 prev_vertex_info, const uint32 vertex_info, const EyeVertex &ev, const cugar::Vector3f &out_w)
[PSFPTVertexProcessor::compute_scattering_weights]
Definition: psfpt_vertex_processor.h:332
Definition: bpt_utils.h:583
Definition: framebuffer.h:125
Definition: pathtracer_core.h:527
Definition: renderer_view.h:80
[PSFPTVertexProcessor::CacheInfo]
Definition: psfpt_vertex_processor.h:56
FERMAT_DEVICE void compute_nee_weights(const TPTContext &context, const RenderingContextView &renderer, const PixelInfo pixel_info, const uint32 prev_vertex_info, const uint32 vertex_info, const EyeVertex &ev, const cugar::Vector3f &f_d, const cugar::Vector3f &f_g, const cugar::Vector3f &w, const cugar::Vector3f &f_L, cugar::Vector3f &out_w_d, cugar::Vector3f &out_w_g, uint32 &out_vertex_info)
[PSFPTVertexProcessor::preprocess_vertex]
Definition: psfpt_vertex_processor.h:215
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE uint32 hash(uint32 a)
Definition: numbers.h:649
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE float randfloat(unsigned i, unsigned p)
Definition: numbers.h:753