fermat/psfpt__vertex__processor_8h_source.html

 /*
  * Fermat
  *
  * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  *    * Redistributions of source code must retain the above copyright
  *      notice, this list of conditions and the following disclaimer.
  *    * Redistributions in binary form must reproduce the above copyright
  *      notice, this list of conditions and the following disclaimer in the
  *      documentation and/or other materials provided with the distribution.
  *    * Neither the name of the NVIDIA CORPORATION nor the
  *      names of its contributors may be used to endorse or promote products
  *      derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

 #pragma once

 #include <pathtracer_core.h>
 #include <spatial_hash.h>
 #include <cugar/basic/cuda/hash.h>


 struct PSFPTVertexProcessor
 {
     static const uint32 DIFFUSE_COMP = 0x1u;
     static const uint32 GLOSSY_COMP  = 0x2u;
     static const uint32 ALL_COMPS    = 0x3u;

     union CacheInfo
     {
         const static uint32 INVALID = 0xFFFFFFFFu;
         const static uint32 INVALID_SLOT = 0xFFFFFFFFu & ((1u << 29) - 1u);

         FERMAT_HOST_DEVICE CacheInfo() : packed(INVALID) {}
         FERMAT_HOST_DEVICE CacheInfo(const uint32 _packed) : packed(_packed) {}
         FERMAT_HOST_DEVICE CacheInfo(const uint32 _pixel, const uint32 _comp, const uint32 _new_entry) : pixel(_pixel), comp(_comp), new_entry(_new_entry){}

         FERMAT_HOST_DEVICE
         bool is_invalid() const { return pixel == INVALID_SLOT; }

         FERMAT_HOST_DEVICE
         bool is_valid() const { return pixel != INVALID_SLOT; }

         uint32  packed;
         struct
         {
             uint32 pixel        : 29;
             uint32 comp         : 2;
             uint32 new_entry    : 1;
         };

         FERMAT_HOST_DEVICE operator uint32() const { return packed; }
     };

     FERMAT_HOST_DEVICE FERMAT_FORCEINLINE
     PSFPTVertexProcessor(float _firefly_filter = 1000.0f) : firefly_filter(_firefly_filter) {}

     FERMAT_HOST_DEVICE FERMAT_FORCEINLINE
     cugar::Vector3f clamp_sample(const cugar::Vector3f v)
     {
         return cugar::is_finite(v) ? cugar::min( v, firefly_filter ) : cugar::Vector3f(0.0f);
     }

     template <typename TPTContext>
     FERMAT_DEVICE
     uint32 preprocess_vertex(
               TPTContext&           context,
         const RenderingContextView& renderer,
         const PixelInfo             pixel_info,
         const EyeVertex&            ev,
         const float                 cone_radius,
         const cugar::Bbox3f         scene_bbox,
         const uint32                prev_vertex_info,
         const cugar::Vector3f       w,
         const float                 p_prev)
     {
         // access the vertex info we returned at the previous vertex along this path (sampled from the eye)
         CacheInfo prev_cache_info(prev_vertex_info);

         // determine the cache slot
         uint32 new_cache_slot = prev_cache_info.pixel;
         bool   new_cache_entry = false;

         // We should create a new cache entry if and only if:
         //  1. none has been created so far along this path
         //  2. the depth is sufficient
         //  3. other conditions like the hit being at a minimum distance and the sampling probability being low enough (indicating a rough-enough interaction) hold
         if (prev_cache_info.is_invalid() && context.in_bounce >= context.options.psf_depth /*&& hit.t >= context.options.psf_min_dist*/ && p_prev < context.options.psf_max_prob)
         {
             // use decorrelated samples
             #if 1
             const uint32 pixel_hash = pixel_info.pixel + renderer.instance * renderer.res_x * renderer.res_y;

             const float jitter[6] = {
                 cugar::randfloat( 0u, pixel_hash ),
                 cugar::randfloat( 1u, pixel_hash ),
                 cugar::randfloat( 2u, pixel_hash ),
                 cugar::randfloat( 3u, pixel_hash ),
                 cugar::randfloat( 4u, pixel_hash ),
                 cugar::randfloat( 5u, pixel_hash ),
             };
             #else
             const float jitter[6] = {0};
             #endif

             // compute a spatial hash
             const float cone_scale   = context.options.psf_width;
             const float filter_scale = (context.in_bounce == 0.0f ? 2.0f : 1.0f);

             // compute a hash key based on jittered hashing of the position and normal coordinates
             const uint64 shading_key = spatial_hash(
                 pixel_info.pixel,
                 ev.geom.position,
                 dot(ev.in, ev.geom.normal_s) > 0.0f ? ev.geom.normal_s : -ev.geom.normal_s,
                 ev.geom.tangent,
                 ev.geom.binormal,
                 context.bbox,
                 jitter,
                 cone_radius * cone_scale,
                 filter_scale);

             // insert into the hashmap using the computed hash key
             if (context.psf_hashmap.insert(shading_key, cugar::hash(shading_key), &new_cache_slot) == true)
             {
                 FERMAT_ASSERT(new_cache_slot < cugar::cuda::load<cugar::cuda::LOAD_VOLATILE>(context.psf_hashmap.count));
                 // initialize the cache entry
                 context.psf_values[new_cache_slot] = make_float4(0.0f, 0.0f, 0.0f, 0.0f);
             }
             FERMAT_ASSERT(new_cache_slot < cugar::cuda::load<cugar::cuda::LOAD_VOLATILE>(context.psf_hashmap.count));

             // increment the sample counter
             cugar::atomic_add(&context.psf_values[new_cache_slot].w, 1.0f);

             // add two "references" to this sample, weighted by modulate( w, ev.bsdf.diffuse )
             const cugar::Vector4f w_mod = modulate(cugar::Vector4f(w,0.0f), ev.material.diffuse);

             context.ref_queue.warp_append(
                 pixel_info,
                 CacheInfo(new_cache_slot, ALL_COMPS, 0),
                 (pixel_info.comp & Bsdf::kDiffuseMask)                     ? w_mod : cugar::Vector4f(0.0f),
                 (pixel_info.comp & Bsdf::kGlossyMask) && context.in_bounce ? w_mod : cugar::Vector4f(0.0f)
                     // at the first bounce, cache entries are only accumulated into the diffuse channel
             );

             new_cache_entry = true;
         }
         return CacheInfo(new_cache_slot, 0, new_cache_entry);
     }

     template <typename TPTContext>
     FERMAT_DEVICE
     void compute_nee_weights(
         const TPTContext&           context,
         const RenderingContextView& renderer,
         const PixelInfo             pixel_info,
         const uint32                prev_vertex_info,
         const uint32                vertex_info,
         const EyeVertex&            ev,
         const cugar::Vector3f&      f_d,
         const cugar::Vector3f&      f_g,
         const cugar::Vector3f&      w,
         const cugar::Vector3f&      f_L,
               cugar::Vector3f&      out_w_d,
               cugar::Vector3f&      out_w_g,
               uint32&               out_vertex_info)
     {
         const CacheInfo new_cache_info(vertex_info);

         const bool new_cache_entry = new_cache_info.new_entry;

         const CacheInfo out_cache_info = context.in_bounce < context.options.psf_depth ? CacheInfo(CacheInfo::INVALID) :
             new_cache_entry ?
                 CacheInfo(new_cache_info.pixel, DIFFUSE_COMP, 0) :  // cache the diffuse component only
                 CacheInfo(new_cache_info.pixel, ALL_COMPS, 0);      // cache both diffuse and glossy components

         out_vertex_info = out_cache_info;

         // Three cases:
         //   1. we are not doing any caching:
         //      1.a: bounce = 0: we will accumulate the diffuse and glossy components separately to the frame-buffer
         //      1.b: bounce > 0: we will accumulate the sum of the components to a single channel of the frame-buffer
         //   2. this is a new cache entry (i.e. this is the first D vertex along a path), we demodulate the diffuse BSDF:
         //          out_w_d = f_d * f_L * G * mis_w
         //      and accumulate the glossy component to the glossy framebuffer
         //   3. we are caching both the diffuse and glossy components:
         //          out_w_d = w * f_d * f_L * G * mis_w,
         //          out_w_g = w * f_g * f_L * G * mis_w;
         //      or rather, we could perform a single accumulation using (out_w_d + out_w_g) * f_L * G * mis_w
         //
         // Since in practice 1. and 3. end up in the same weights, this reduces to two cases:
         //   1. this is a new and valid cache entry
         //   2. all of the others
         if (new_cache_entry && out_cache_info.is_valid())
         {
             out_w_d = demodulate( f_d, cugar::Vector4f(ev.material.diffuse).xyz() ) * f_L;
             out_w_g = f_g * w.xyz() * f_L;
         }
         else
         {
             out_w_d = f_d * w.xyz() * f_L;
             out_w_g = f_g * w.xyz() * f_L;
         }
     }

     template <typename TPTContext>
     FERMAT_DEVICE
     void compute_scattering_weights(
         const TPTContext&           context,
         const RenderingContextView& renderer,
         const PixelInfo             pixel_info,
         const uint32                prev_vertex_info,
         const uint32                vertex_info,
         const EyeVertex&            ev,
         const uint32                out_comp,
         const cugar::Vector3f&      g,
         const cugar::Vector3f&      w,
               cugar::Vector3f&      out_w,
               uint32&               out_vertex_info)
     {
         const CacheInfo prev_cache_info(prev_vertex_info);
         const CacheInfo new_cache_info(vertex_info);

         const uint32 new_cache_slot  = new_cache_info.pixel;
         const bool   new_cache_entry = new_cache_info.new_entry;

         const CacheInfo out_cache_info = prev_cache_info.is_invalid() && (out_comp & Bsdf::kGlossyMask) ?
             prev_cache_info :                           // retain the invalid cache location
             CacheInfo(new_cache_slot, ALL_COMPS, 0);    // cache both diffuse and glossy components

         out_vertex_info = out_cache_info;

         // if this is a new "diffuse cache ray", i.e. if new_cache_entry && (out_comp & Bsdf::kDiffuseMask),
         // we have to demodulate the BSDF weight. This will be compensated by a correctly weighted reference to the queue entry.
         // The proper solution would be to use SH to encode incoming radiance.
         if (new_cache_entry && (out_comp & Bsdf::kDiffuseMask))
             out_w = demodulate(g, cugar::Vector4f(ev.material.diffuse).xyz());
         else
             out_w = g * w.xyz();
     }

     template <typename TPTContext>
     FERMAT_DEVICE
     void accumulate_emissive(
         const TPTContext&           context,
               RenderingContextView& renderer,
         const PixelInfo             pixel_info,
         const uint32                prev_vertex_info,
         const uint32                vertex_info,
         const EyeVertex&            ev,
         const cugar::Vector3f&      out_w)
     {
         FBufferView& fb = renderer.fb;
         FBufferChannelView& composited_channel = fb(FBufferDesc::COMPOSITED_C);
         FBufferChannelView& direct_channel     = fb(FBufferDesc::DIRECT_C);
         FBufferChannelView& diffuse_channel    = fb(FBufferDesc::DIFFUSE_C);
         FBufferChannelView& specular_channel   = fb(FBufferDesc::SPECULAR_C);

         // access the vertex info from the previous vertex
         const CacheInfo prev_cache_info(prev_vertex_info);

         // clamp the sample value to avoid extreme fire-flies
         const cugar::Vector3f clamped_out_w = clamp_sample( out_w );

         // unpack the pixel index & sampling component
         const uint32 pixel_index = pixel_info.pixel;
         const uint32 pixel_comp  = pixel_info.comp;
         const float frame_weight = context.frame_weight;

         // accumulate to the image only if prev_cache_info is invalid
         if (prev_cache_info.is_invalid())
         {
             add_in<false>(composited_channel, pixel_index, clamped_out_w, frame_weight);

             // accumulate the per-component value to the proper output channel
             if (context.in_bounce == 0)
                 add_in<false>(direct_channel, pixel_index, clamped_out_w, frame_weight);
             else
             {
                 if (pixel_comp & Bsdf::kDiffuseMask) add_in<true>(diffuse_channel,  pixel_index, clamped_out_w, frame_weight);
                 if (pixel_comp & Bsdf::kGlossyMask)  add_in<true>(specular_channel, pixel_index, clamped_out_w, frame_weight);
             }
         }
         else
         {
             // accumulate to the cache entry
             cugar::atomic_add(&context.psf_values[prev_cache_info.pixel].x, clamped_out_w.x);
             cugar::atomic_add(&context.psf_values[prev_cache_info.pixel].y, clamped_out_w.y);
             cugar::atomic_add(&context.psf_values[prev_cache_info.pixel].z, clamped_out_w.z);
         }
     }

     template <typename TPTContext>
     FERMAT_DEVICE
     void accumulate_nee(
         const TPTContext&           context,
               RenderingContextView& renderer,
         const PixelInfo             pixel_info,
         const uint32                vertex_info,
         const bool                  shadow_hit,
         const cugar::Vector3f&      w_d,
         const cugar::Vector3f&      w_g)
     {
         FBufferView& fb = renderer.fb;
         FBufferChannelView& composited_channel = fb(FBufferDesc::COMPOSITED_C);
         FBufferChannelView& direct_channel     = fb(FBufferDesc::DIRECT_C);
         FBufferChannelView& diffuse_channel    = fb(FBufferDesc::DIFFUSE_C);
         FBufferChannelView& specular_channel   = fb(FBufferDesc::SPECULAR_C);

         // unpack the pixel index & sampling component
         const uint32 pixel_index = pixel_info.pixel;
         const uint32 pixel_comp  = pixel_info.comp;
         const float frame_weight = context.frame_weight;

         // access the packed vertex info
         const CacheInfo cache_info(vertex_info);

         if (shadow_hit == false)
         {
             // check if the cache cell is valid
             if (cache_info.is_valid())
             {
                 const uint32 cache_slot = cache_info.pixel;

                 // check whether to add both components to the cache or just the diffuse one
                 const cugar::Vector3f w = (cache_info.comp == DIFFUSE_COMP) ? w_d : w_d + w_g;

                 cugar::atomic_add(&context.psf_values[cache_slot].x, w.x);
                 cugar::atomic_add(&context.psf_values[cache_slot].y, w.y);
                 cugar::atomic_add(&context.psf_values[cache_slot].z, w.z);

                 // if the glossy component was left out, we need to add it to the framebuffer
                 if (cache_info.comp == DIFFUSE_COMP)
                 {
                     add_in<false>(composited_channel, pixel_index, clamp_sample( w_g ), frame_weight);

                     // select the right channel
                     FBufferChannelView& fb_channel = context.in_bounce == 0 || (pixel_comp & Bsdf::kGlossyMask) ?
                         specular_channel :
                         diffuse_channel;

                     add_in<true>(fb_channel, pixel_index, clamp_sample( w_g ), context.frame_weight);
                 }
             }
             else //if (context.pass_type == PSFPT::kFinalPass)
             {
                 add_in<false>(composited_channel, pixel_index, clamp_sample( w_d + w_g ), frame_weight);

                 if (context.in_bounce == 0)
                 {
                     // accumulate the per-component values to the respective output channels
                     add_in<true>(diffuse_channel,  pixel_index, clamp_sample( w_d ), context.frame_weight);
                     add_in<true>(specular_channel, pixel_index, clamp_sample( w_g ), context.frame_weight);
                 }
                 else
                 {
                     // accumulate the aggregate value to the proper output channel (only one will be true)
                     if (pixel_comp & Bsdf::kDiffuseMask) add_in<true>(diffuse_channel,  pixel_index, clamp_sample( w_d + w_g ), frame_weight);
                     if (pixel_comp & Bsdf::kGlossyMask)  add_in<true>(specular_channel, pixel_index, clamp_sample( w_d + w_g ), frame_weight);
                 }
             }
         }
     }

     float firefly_filter; // this value biases the render, but is needed to avoid extreme fireflies...
 };

PSFPTVertexProcessor::accumulate_nee
FERMAT_DEVICE void accumulate_nee(const TPTContext &context, RenderingContextView &renderer, const PixelInfo pixel_info, const uint32 vertex_info, const bool shadow_hit, const cugar::Vector3f &w_d, const cugar::Vector3f &w_g)
[PSFPTVertexProcessor::accumulate_emissive]
Definition: psfpt_vertex_processor.h:395

PSFPTVertexProcessor::firefly_filter
float firefly_filter
[PSFPTVertexProcessor::accumulate_nee]
Definition: psfpt_vertex_processor.h:472

PSFPTVertexProcessor::preprocess_vertex
FERMAT_DEVICE uint32 preprocess_vertex(TPTContext &context, const RenderingContextView &renderer, const PixelInfo pixel_info, const EyeVertex &ev, const float cone_radius, const cugar::Bbox3f scene_bbox, const uint32 prev_vertex_info, const cugar::Vector3f w, const float p_prev)
Definition: psfpt_vertex_processor.h:103

PSFPTVertexProcessor::PSFPTVertexProcessor
FERMAT_HOST_DEVICE FERMAT_FORCEINLINE PSFPTVertexProcessor(float _firefly_filter=1000.0f)
[PSFPTVertexProcessor::CacheInfo]
Definition: psfpt_vertex_processor.h:84

PSFPTVertexProcessor
Definition: psfpt_vertex_processor.h:49

PSFPTVertexProcessor::compute_scattering_weights
FERMAT_DEVICE void compute_scattering_weights(const TPTContext &context, const RenderingContextView &renderer, const PixelInfo pixel_info, const uint32 prev_vertex_info, const uint32 vertex_info, const EyeVertex &ev, const uint32 out_comp, const cugar::Vector3f &g, const cugar::Vector3f &w, cugar::Vector3f &out_w, uint32 &out_vertex_info)
[PSFPTVertexProcessor::compute_nee_weights]
Definition: psfpt_vertex_processor.h:285

cugar::atomic_add
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE float atomic_add(float *value, const float op)
Definition: atomics.h:100

spatial_hash
FERMAT_FORCEINLINE FERMAT_HOST_DEVICE uint64 spatial_hash(const cugar::Vector3f P, const cugar::Vector3f N, const cugar::Bbox3f bbox, const float samples[6], const float cone_radius, const uint32 normal_bits=4)
Definition: spatial_hash.h:44

cugar::Bbox< Vector3f >

FBufferView
Definition: framebuffer.h:274

PSFPTVertexProcessor::accumulate_emissive
FERMAT_DEVICE void accumulate_emissive(const TPTContext &context, RenderingContextView &renderer, const PixelInfo pixel_info, const uint32 prev_vertex_info, const uint32 vertex_info, const EyeVertex &ev, const cugar::Vector3f &out_w)
[PSFPTVertexProcessor::compute_scattering_weights]
Definition: psfpt_vertex_processor.h:332

EyeVertex
Definition: bpt_utils.h:583

cugar::Vector< float, 3 >

FBufferChannelView
Definition: framebuffer.h:125

PixelInfo
Definition: pathtracer_core.h:527

RenderingContextView
Definition: renderer_view.h:80

PSFPTVertexProcessor::CacheInfo
[PSFPTVertexProcessor::CacheInfo]
Definition: psfpt_vertex_processor.h:56

PSFPTVertexProcessor::compute_nee_weights
FERMAT_DEVICE void compute_nee_weights(const TPTContext &context, const RenderingContextView &renderer, const PixelInfo pixel_info, const uint32 prev_vertex_info, const uint32 vertex_info, const EyeVertex &ev, const cugar::Vector3f &f_d, const cugar::Vector3f &f_g, const cugar::Vector3f &w, const cugar::Vector3f &f_L, cugar::Vector3f &out_w_d, cugar::Vector3f &out_w_g, uint32 &out_vertex_info)
[PSFPTVertexProcessor::preprocess_vertex]
Definition: psfpt_vertex_processor.h:215

cugar::hash
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE uint32 hash(uint32 a)
Definition: numbers.h:649

cugar::randfloat
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE float randfloat(unsigned i, unsigned p)
Definition: numbers.h:753