Fermat
pathtracer_core.h
1 /*
2  * Fermat
3  *
4  * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #pragma once
30 
31 #include <pathtracer.h>
32 #include <tiled_sequence.h>
33 #include <bsdf.h>
34 #include <edf.h>
35 #include <mis_utils.h>
36 #include <bpt_utils.h>
37 #include <eaw.h>
38 #include <direct_lighting_mesh.h>
39 #include <direct_lighting_rl.h>
40 
469 
470 #define MIS_HEURISTIC POWER_HEURISTIC
471 
472 #define VTL_RL_HASH_SIZE (512u * 1024u)
473 
474 #if !defined(DEVICE_TIMING) || (DEVICE_TIMING == 0)
475 #define DEVICE_TIME(x)
476 #else
477 #define DEVICE_TIME(x) x
478 #endif
479 
480 enum PTDeviceTimers
481 {
482  SETUP_TIME = 0,
483  BRDF_EVAL_TIME = 1,
484  DIRLIGHT_SAMPLE_TIME = 2,
485  DIRLIGHT_EVAL_TIME = 3,
486  LIGHTS_PREPROCESS_TIME = 4,
487  LIGHTS_SAMPLE_TIME = 5,
488  LIGHTS_EVAL_TIME = 6,
489  LIGHTS_MAPPING_TIME = 7,
490  LIGHTS_UPDATE_TIME = 8,
491  TRACE_SHADOW_TIME = 9,
492  TRACE_SHADED_TIME = 10,
493  BRDF_SAMPLE_TIME = 11,
494  FBUFFER_WRITES_TIME = 12,
495  PREPROCESS_VERTEX_TIME = 13,
496  NEE_WEIGHTS_TIME = 14,
497  SCATTERING_WEIGHTS_TIME = 15,
498  TOTAL_TIME = 16
499 };
500 
501 // a simple device-side timer
502 //
504 {
505  FERMAT_DEVICE void start() { last = clock64(); }
506  FERMAT_DEVICE void restart() { last = clock64(); }
507  FERMAT_DEVICE uint64 take() { int64 first = last; last = clock64(); return uint64( last - first ); }
508 
509  int64 last;
510 };
511 
514 
517 
521 
528 {
529  FERMAT_HOST_DEVICE PixelInfo() {}
530  FERMAT_HOST_DEVICE PixelInfo(const uint32 _packed) : packed(_packed) {}
531  FERMAT_HOST_DEVICE PixelInfo(const uint32 _pixel, const uint32 _comp, const uint32 _diffuse = 0) : pixel(_pixel), comp(_comp), diffuse(_diffuse) {}
532 
533  FERMAT_HOST_DEVICE operator uint32() const { return packed; }
534 
535  uint32 packed;
536  struct
537  {
538  uint32 pixel : 27;
539  uint32 comp : 4;
540  uint32 diffuse : 1;
541  };
542 };
543 
544 FERMAT_DEVICE FERMAT_FORCEINLINE
545 void per_warp_atomic_add(uint64* ptr, uint64 val) // NOTE: ptr needs to be the same across the warp!
546 {
547 #if __CUDA_ARCH__ > 700
548  const unsigned int lane_id = threadIdx.x & 31;
549 
550  int pred;
551  int mask = __match_all_sync(__activemask(), val, &pred);
552  int leader = __ffs(mask) - 1; // select a leader
553 
554  if (lane_id == leader) // only the leader does the update
555  atomicAdd(ptr, val);
556 #else
557  const unsigned int lane_id = threadIdx.x & 31;
558 
559  int mask = __ballot_sync(__activemask(), true);
560  int leader = __ffs(mask) - 1; // select a leader
561 
562  if (lane_id == leader) // only the leader does the update
563  atomicAdd(ptr, val);
564 #endif
565 }
566 
569 template <typename TPTOptions>
571 {
572  TPTOptions options;
573  TiledSequenceView sequence;
574  float frame_weight;
575 
576  uint32 in_bounce : 27;
577  uint32 do_nee : 1;
578  uint32 do_accumulate_emissive : 1;
579  uint32 do_scatter : 1;
580 
581  cugar::Bbox3f bbox;
582 
583  uint64* device_timers;
584 };
585 
586 //------------------------------------------------------------------------------
587 
594 template <typename TPTContext>
595 FERMAT_HOST_DEVICE
597  TPTContext& context,
598  const RenderingContextView& renderer)
599 {
600  // decide whether to perform next-event estimation
601  context.do_nee =
602  renderer.mesh_vpls.n_vpls &&
603  ((context.in_bounce + 2 <= context.options.max_path_length) &&
604  ((context.in_bounce == 0 && context.options.direct_lighting_nee && context.options.direct_lighting) ||
605  (context.in_bounce > 0 && context.options.indirect_lighting_nee)));
606 
607  // decide whether to evaluate and accumulate emissive surfaces
608  context.do_accumulate_emissive =
609  ((context.in_bounce == 0 && context.options.visible_lights) ||
610  (context.in_bounce == 1 && context.options.direct_lighting_bsdf && context.options.direct_lighting) ||
611  (context.in_bounce > 1 && context.options.indirect_lighting_bsdf));
612 
613  // compute the number of path vertices we want to generate from the eye
614  const uint32 max_path_vertices = context.options.max_path_length +
615  ((context.options.max_path_length == 2 && context.options.direct_lighting_bsdf) ||
616  (context.options.max_path_length > 2 && context.options.indirect_lighting_bsdf) ? 1 : 0);
617 
618  // decide whether to perform scattering
619  context.do_scatter = (context.in_bounce + 2 < max_path_vertices);
620 }
621 
622 //------------------------------------------------------------------------------
633 template <typename TPTContext>
634 FERMAT_DEVICE
636  TPTContext& context,
637  RenderingContextView& renderer,
638  const uint2 pixel,
639  cugar::Vector3f U,
640  cugar::Vector3f V,
641  cugar::Vector3f W)
642 {
643  // use an optimized sampling pattern to rotate a Halton sequence
644  const cugar::Vector2f uv(
645  context.sequence.sample_2d(pixel.x, pixel.y, 0),
646  context.sequence.sample_2d(pixel.x, pixel.y, 1));
647 
648  const float2 d = make_float2(
649  (pixel.x + uv.x) / float(renderer.res_x),
650  (pixel.y + uv.y) / float(renderer.res_y)) * 2.f - 1.f;
651 
652  float3 ray_origin = renderer.camera.eye;
653  float3 ray_direction = d.x*U + d.y*V + W;
654 
655  return make_ray( ray_origin, ray_direction, 0u, 1e34f );
656 }
657 
658 //------------------------------------------------------------------------------
666 template <typename TPTContext>
667 FERMAT_DEVICE
669  TPTContext& context,
670  RenderingContextView& renderer,
671  const uint2 pixel)
672 {
673  // use an optimized sampling pattern to rotate a Halton sequence
674  const cugar::Vector2f uv(
675  context.sequence.sample_2d(pixel.x, pixel.y, 0),
676  context.sequence.sample_2d(pixel.x, pixel.y, 1));
677 
678  const float2 d = make_float2(
679  (pixel.x + uv.x) / float(renderer.res_x),
680  (pixel.y + uv.y) / float(renderer.res_y));
681 
682  float3 ray_origin = renderer.camera.eye;
683  float3 ray_direction = renderer.camera_sampler.sample_direction( d );
684 
685  return make_ray( ray_origin, ray_direction, 0u, 1e34f );
686 }
687 
688 //------------------------------------------------------------------------------
689 
705 template <typename TPTContext, typename TPTVertexProcessor>
706 FERMAT_DEVICE
708  TPTContext& context,
709  TPTVertexProcessor& vertex_processor,
710  RenderingContextView& renderer,
711  const bool shadow_hit,
712  const PixelInfo pixel_info,
713  const cugar::Vector3f w,
714  const cugar::Vector3f w_d,
715  const cugar::Vector3f w_g,
716  const uint32 vertex_info = uint32(-1),
717  const uint32 nee_vertex_id = uint32(-1),
718  const uint32 nee_sample_id = uint32(-1))
719 {
720  DEVICE_TIME( DeviceTimer timer );
721  DEVICE_TIME( timer.start() );
722 
723  // update the DL sampler state
724  context.dl.update( nee_vertex_id, nee_sample_id, w, shadow_hit == true );
725 
726  DEVICE_TIME( per_warp_atomic_add( context.device_timers + LIGHTS_UPDATE_TIME, timer.take() ) );
727 
728  vertex_processor.accumulate_nee(
729  context,
730  renderer,
731  pixel_info,
732  vertex_info,
733  shadow_hit,
734  w_d,
735  w_g );
736 
737  DEVICE_TIME( per_warp_atomic_add( context.device_timers + FBUFFER_WRITES_TIME, timer.take() ) );
738 }
739 
740 //------------------------------------------------------------------------------
741 
744 template <typename TPTContext>
745 FERMAT_DEVICE
746 float vertex_sample(const uint2 pixel, TPTContext& context, const uint32 i)
747 {
748  return context.sequence.sample_2d(pixel.x, pixel.y, (context.in_bounce + 1) * 6 + i);
749 }
750 
751 //------------------------------------------------------------------------------
771 template <typename TPTContext, typename TPTVertexProcessor>
772 FERMAT_DEVICE
774  TPTContext& context,
775  TPTVertexProcessor& vertex_processor,
776  RenderingContextView& renderer,
777  const uint32 bounce,
778  const PixelInfo pixel_info,
779  const uint2 pixel,
780  const MaskedRay& ray,
781  const Hit hit,
782  const cugar::Vector4f w,
783  const uint32 prev_vertex_info = uint32(-1),
784  const uint32 prev_nee_vertex_id = uint32(-1),
785  const cugar::Vector2f cone = cugar::Vector2f(0))
786 {
787  const float p_prev = w.w;
788 
789  const uint32 pixel_index = pixel_info.pixel;
790 
791  if (hit.t > 0.0f && hit.triId >= 0)
792  {
793  DEVICE_TIME( DeviceTimer timer );
794  DEVICE_TIME( timer.start() );
795 
796  EyeVertex ev;
797  ev.setup(ray, hit, w.xyz(), cugar::Vector4f(0.0f), bounce, renderer);
798 
799  DEVICE_TIME( per_warp_atomic_add( context.device_timers + SETUP_TIME, timer.take() ) );
800 
801  // write out gbuffer information
802  if (bounce == 0)
803  {
804  renderer.fb.gbuffer.geo(pixel_index) = GBufferView::pack_geometry(ev.geom.position, ev.geom.normal_s);
805  renderer.fb.gbuffer.uv(pixel_index) = make_float4(hit.u, hit.v, ev.geom.texture_coords.x, ev.geom.texture_coords.y);
806  renderer.fb.gbuffer.tri(pixel_index) = hit.triId;
807  renderer.fb.gbuffer.depth(pixel_index) = hit.t;
808 
809  // write surface albedos
810  renderer.fb(FBufferDesc::DIFFUSE_A, pixel_index) += cugar::Vector4f(ev.material.diffuse) * context.frame_weight;
811  renderer.fb(FBufferDesc::SPECULAR_A, pixel_index) += (cugar::Vector4f(ev.material.specular) + cugar::Vector4f(1.0f))*0.5f * context.frame_weight;
812  }
813 
814  DEVICE_TIME( per_warp_atomic_add( context.device_timers + FBUFFER_WRITES_TIME, timer.take() ) );
815 
816  // in order to select the footprint at the intersection, we use the formulation proposed by Bekaert:
817  // R(x_k) = h/sqrtf(p(x_k|x_[k-1])) + R(x_[k-1])
818  const float area_prob = cugar::rsqrtf(cone.y * ev.prev_G_prime);
819  const float cone_radius = cone.x + area_prob;
820 
821  // lookup / insert an NEE RL entry
822  uint32 nee_vertex_id = uint32(-1);
823  if (context.do_nee)
824  {
825  bool is_secondary_diffuse = pixel_info.diffuse;
826 
827  nee_vertex_id = context.dl.preprocess_vertex(
828  renderer,
829  ev,
830  pixel_info.pixel,
831  context.in_bounce,
832  is_secondary_diffuse,
833  cone_radius,
834  context.bbox );
835 
836  #if 0
837  // debug visualization
838  {
839  cugar::Vector3f c;
840  c.x = cugar::randfloat(0, nee_vertex_id);
841  c.y = cugar::randfloat(1, nee_vertex_id);
842  c.z = cugar::randfloat(2, nee_vertex_id);
843  add_in<false>(renderer.fb(FBufferDesc::COMPOSITED_C), pixel_info.pixel, c, context.frame_weight);
844  return;
845  }
846  #endif
847  }
848 
849  DEVICE_TIME( per_warp_atomic_add( context.device_timers + LIGHTS_PREPROCESS_TIME, timer.take() ) );
850 
851  const uint32 vertex_info = vertex_processor.preprocess_vertex(
852  context,
853  renderer,
854  pixel_info,
855  ev,
856  cone_radius,
857  context.bbox,
858  prev_vertex_info,
859  w.xyz(),
860  p_prev );
861 
862  DEVICE_TIME( per_warp_atomic_add( context.device_timers + PREPROCESS_VERTEX_TIME, timer.take() ) );
863 
864  // initialize our shifted sampling sequence
865  float samples[6];
866  for (uint32 i = 0; i < 6; ++i)
867  samples[i] = vertex_sample(pixel, context, i);
868 
869  // directional-lighting
870  if ((context.in_bounce + 2 <= context.options.max_path_length) &&
871  (context.in_bounce > 0 || context.options.direct_lighting) &&
872  renderer.dir_lights_count)
873  {
874  DEVICE_TIME( timer.restart() );
875 
876  // fetch the sampling dimensions
877  const float z[3] = { samples[0], samples[1], samples[2] }; // use dimensions 0,1,2
878 
879  VertexGeometryId light_vertex;
880  VertexGeometry light_vertex_geom;
881  float light_pdf;
882  Edf light_edf;
883 
884  // use the third dimension to select a light source
885  const uint32 light_idx = cugar::quantize( z[2], renderer.dir_lights_count );
886 
887  // sample the light source surface
888  renderer.dir_lights[ light_idx ].sample(ev.geom.position, z, &light_vertex.prim_id, &light_vertex.uv, &light_vertex_geom, &light_pdf, &light_edf);
889 
890  // multiply by the light selection probability
891  light_pdf /= renderer.dir_lights_count;
892 
893  DEVICE_TIME( per_warp_atomic_add( context.device_timers + DIRLIGHT_SAMPLE_TIME, timer.take() ) );
894 
895  // join the light sample with the current vertex
896  cugar::Vector3f out = (light_vertex_geom.position - ev.geom.position);
897 
898  const float d2 = fmaxf(1.0e-8f, cugar::square_length(out));
899 
900  // normalize the outgoing direction
901  out *= rsqrtf(d2);
902 
903  cugar::Vector3f f_s_comp[Bsdf::kNumComponents];
904  float p_s_comp[Bsdf::kNumComponents];
905 
906  ev.bsdf.f_and_p(ev.geom, ev.in, out, f_s_comp, p_s_comp, cugar::kProjectedSolidAngle);
907 
908  // check which paths are enabled
909  const bool eval_diffuse = context.options.diffuse_scattering;
910  const bool eval_glossy = context.options.glossy_scattering; // TODO: handle the indirect_glossy toggle here
911 
912  #if 0
913  cugar::Vector3f f_s(0.0f);
914  float p_s(0.0f);
915 
916  if (eval_diffuse)
917  {
918  f_s += f_s_comp[Bsdf::kDiffuseReflectionIndex] + f_s_comp[Bsdf::kDiffuseTransmissionIndex];
919  p_s += p_s_comp[Bsdf::kDiffuseReflectionIndex] + p_s_comp[Bsdf::kDiffuseTransmissionIndex];
920  }
921  if (eval_glossy)
922  {
923  f_s += f_s_comp[Bsdf::kGlossyReflectionIndex] + f_s_comp[Bsdf::kGlossyTransmissionIndex];
924  p_s += p_s_comp[Bsdf::kGlossyReflectionIndex] + p_s_comp[Bsdf::kGlossyTransmissionIndex];
925  }
926  #endif
927  DEVICE_TIME( per_warp_atomic_add( context.device_timers + BRDF_EVAL_TIME, timer.take() ) );
928 
929  // evaluate the light's EDF and the surface BSDF
930  const cugar::Vector3f f_L = light_edf.f(light_vertex_geom, light_vertex_geom.position, -out) / light_pdf;
931 
932  DEVICE_TIME( per_warp_atomic_add( context.device_timers + DIRLIGHT_EVAL_TIME, timer.take() ) );
933 
934  // evaluate the geometric term
935  const float G = fabsf(cugar::dot(out, ev.geom.normal_s) * cugar::dot(out, light_vertex_geom.normal_s)) / d2;
936 
937  // TODO: perform MIS with the possibility of directly hitting the light source
938  const float mis_w = 1.0f;
939 
940  // calculate the output weights
941  cugar::Vector3f out_w_d;
942  cugar::Vector3f out_w_g;
943  uint32 out_vertex_info;
944 
945  vertex_processor.compute_nee_weights(
946  context,
947  renderer,
948  pixel_info,
949  prev_vertex_info,
950  vertex_info,
951  ev,
952  eval_diffuse ? f_s_comp[Bsdf::kDiffuseReflectionIndex] + f_s_comp[Bsdf::kDiffuseTransmissionIndex] : cugar::Vector3f(0.0f),
953  eval_glossy ? f_s_comp[Bsdf::kGlossyReflectionIndex] + f_s_comp[Bsdf::kGlossyTransmissionIndex] : cugar::Vector3f(0.0f),
954  w.xyz(),
955  f_L * G * mis_w,
956  out_w_d,
957  out_w_g,
958  out_vertex_info );
959 
960  DEVICE_TIME( per_warp_atomic_add( context.device_timers + NEE_WEIGHTS_TIME, timer.take() ) );
961 
962  #if 0
963  // calculate the cumulative sample weight, equal to f_L * f_s * G / p
964  const cugar::Vector3f out_w = w.xyz() * f_L * f_s * G * mis_w;
965  #else
966  // calculate the cumulative sample weight
967  const cugar::Vector3f out_w = out_w_d + out_w_g;
968  #endif
969 
970  if (cugar::max_comp(out_w) > 0.0f && cugar::is_finite(out_w))
971  {
972  DEVICE_TIME( timer.restart() );
973 
974  // find the right side of the normal
975  const cugar::Vector3f N = dot(ev.geom.normal_s,ray.dir) > 0.0f ? -ev.geom.normal_s : ev.geom.normal_s;
976 
977  // enqueue the output ray
978  MaskedRay out_ray;
979  out_ray.origin = ev.geom.position - ray.dir * 1.0e-3f; // shift back in space along the viewing direction
980  out_ray.dir = (light_vertex_geom.position - out_ray.origin); //out;
981  out_ray.mask = 0x1u; // shadow flag
982  out_ray.tmax = 0.9999f; //d * 0.9999f;
983 
984  context.trace_shadow_ray( vertex_processor, renderer, pixel_info, out_ray, out_w, out_w_d, out_w_g, vertex_info );
985 
986  DEVICE_TIME( per_warp_atomic_add( context.device_timers + TRACE_SHADOW_TIME, timer.take() ) );
987  }
988  }
989 
990  // perform next-event estimation to compute direct lighting
991  if (context.do_nee)
992  {
993  DEVICE_TIME( timer.restart() );
994 
995  // fetch the sampling dimensions
996  const float z[3] = { samples[0], samples[1], samples[2] }; // use dimensions 0,1,2
997  //const float z[3] = {
998  // vertex_sample(pixel, bounce, 0u),
999  // vertex_sample(pixel, bounce, 1u),
1000  // vertex_sample(pixel, bounce, 2u)
1001  //}; // use dimensions 0,1,2
1002 
1003  VertexGeometryId light_vertex;
1004  VertexGeometry light_vertex_geom;
1005  float light_pdf;
1006  Edf light_edf;
1007 
1008  // sample the light source surface
1009  const uint32 nee_sample_id = context.dl.sample( nee_vertex_id, z, &light_vertex, &light_vertex_geom, &light_pdf, &light_edf );
1010 
1011  DEVICE_TIME( per_warp_atomic_add( context.device_timers + LIGHTS_SAMPLE_TIME, timer.take() ) );
1012 
1013  // join the light sample with the current vertex
1014  cugar::Vector3f out = (light_vertex_geom.position - ev.geom.position);
1015 
1016  const float d2 = fmaxf(1.0e-8f, cugar::square_length(out));
1017 
1018  // normalize the outgoing direction
1019  out *= rsqrtf(d2);
1020 
1021  cugar::Vector3f f_s_comp[Bsdf::kNumComponents];
1022  float p_s_comp[Bsdf::kNumComponents];
1023 
1024  ev.bsdf.f_and_p(ev.geom, ev.in, out, f_s_comp, p_s_comp, cugar::kProjectedSolidAngle);
1025 
1026  // check which paths are enabled
1027  const bool eval_diffuse = context.options.diffuse_scattering;
1028  const bool eval_glossy = context.options.glossy_scattering; // TODO: handle the indirect_glossy toggle here
1029 
1030  cugar::Vector3f f_s(0.0f);
1031  float p_s(0.0f);
1032 
1033  if (eval_diffuse)
1034  {
1035  f_s += f_s_comp[Bsdf::kDiffuseReflectionIndex] + f_s_comp[Bsdf::kDiffuseTransmissionIndex];
1036  p_s += p_s_comp[Bsdf::kDiffuseReflectionIndex] + p_s_comp[Bsdf::kDiffuseTransmissionIndex];
1037  }
1038  if (eval_glossy)
1039  {
1040  f_s += f_s_comp[Bsdf::kGlossyReflectionIndex] + f_s_comp[Bsdf::kGlossyTransmissionIndex];
1041  p_s += p_s_comp[Bsdf::kGlossyReflectionIndex] + p_s_comp[Bsdf::kGlossyTransmissionIndex];
1042  }
1043 
1044  DEVICE_TIME( per_warp_atomic_add( context.device_timers + BRDF_EVAL_TIME, timer.take() ) );
1045 
1046  // evaluate the light's EDF and the surface BSDF
1047  const cugar::Vector3f f_L = light_edf.f(light_vertex_geom, light_vertex_geom.position, -out) / light_pdf;
1048 
1049  DEVICE_TIME( per_warp_atomic_add( context.device_timers + LIGHTS_EVAL_TIME, timer.take() ) );
1050 
1051  // evaluate the geometric term
1052  const float G = fabsf(cugar::dot(out, ev.geom.normal_s) * cugar::dot(out, light_vertex_geom.normal_s)) / d2;
1053 
1054  // TODO: perform MIS with the possibility of directly hitting the light source
1055  const float p1 = light_pdf;
1056  const float p2 = p_s * G;
1057  const float mis_w =
1058  (bounce == 0 && context.options.direct_lighting_bsdf) ||
1059  (bounce > 0 && context.options.indirect_lighting_bsdf) ? mis_heuristic<MIS_HEURISTIC>(p1, p2) : 1.0f;
1060 
1061  // calculate the output weights
1062  cugar::Vector3f out_w_d;
1063  cugar::Vector3f out_w_g;
1064  uint32 out_vertex_info;
1065 
1066  vertex_processor.compute_nee_weights(
1067  context,
1068  renderer,
1069  pixel_info,
1070  prev_vertex_info,
1071  vertex_info,
1072  ev,
1073  eval_diffuse ? f_s_comp[Bsdf::kDiffuseReflectionIndex] + f_s_comp[Bsdf::kDiffuseTransmissionIndex] : cugar::Vector3f(0.0f),
1074  eval_glossy ? f_s_comp[Bsdf::kGlossyReflectionIndex] + f_s_comp[Bsdf::kGlossyTransmissionIndex] : cugar::Vector3f(0.0f),
1075  w.xyz(),
1076  f_L * G * mis_w,
1077  out_w_d,
1078  out_w_g,
1079  out_vertex_info );
1080 
1081  DEVICE_TIME( per_warp_atomic_add( context.device_timers + NEE_WEIGHTS_TIME, timer.take() ) );
1082 
1083  #if 0
1084  // calculate the cumulative sample weight, equal to f_L * f_s * G / p
1085  const cugar::Vector3f out_w = w.xyz() * f_L * f_s * G * mis_w;
1086  #else
1087  // calculate the cumulative sample weight
1088  const cugar::Vector3f out_w = out_w_d + out_w_g;
1089  #endif
1090 
1091  if (cugar::max_comp(out_w) > 0.0f && cugar::is_finite(out_w))
1092  {
1093  DEVICE_TIME( timer.restart() );
1094 
1095  // enqueue the output ray
1096  MaskedRay out_ray;
1097  out_ray.origin = ev.geom.position - ray.dir * 1.0e-4f; // shift back in space along the viewing direction
1098  out_ray.dir = (light_vertex_geom.position - out_ray.origin); //out;
1099  out_ray.mask = 0x2u;
1100  out_ray.tmax = 0.9999f; //d * 0.9999f;
1101 
1102  context.trace_shadow_ray( vertex_processor, renderer, pixel_info, out_ray, out_w, out_w_d, out_w_g, vertex_info, nee_vertex_id, nee_sample_id );
1103 
1104  DEVICE_TIME( per_warp_atomic_add( context.device_timers + TRACE_SHADOW_TIME, timer.take() ) );
1105  }
1106  }
1107 
1108  // accumulate the emissive component along the incoming direction
1109  if (context.do_accumulate_emissive)
1110  {
1111  DEVICE_TIME( timer.restart() );
1112 
1113  VertexGeometry light_vertex_geom = ev.geom;
1114  float light_pdf;
1115  Edf light_edf;
1116 
1117  context.dl.map( prev_nee_vertex_id, hit.triId, cugar::Vector2f(hit.u, hit.v), light_vertex_geom, &light_pdf, &light_edf );
1118 
1119  DEVICE_TIME( per_warp_atomic_add( context.device_timers + LIGHTS_MAPPING_TIME, timer.take() ) );
1120 
1121  // evaluate the edf's output along the incoming direction
1122  const cugar::Vector3f f_L = light_edf.f(light_vertex_geom, light_vertex_geom.position, ev.in);
1123 
1124  DEVICE_TIME( per_warp_atomic_add( context.device_timers + LIGHTS_EVAL_TIME, timer.take() ) );
1125 
1126  const float d2 = fmaxf(1.0e-10f, hit.t * hit.t);
1127 
1128  // compute the MIS weight with next event estimation at the previous vertex
1129  const float G_partial = fabsf(cugar::dot(ev.in, light_vertex_geom.normal_s)) / d2; // NOTE: G_partial doesn't include the dot product between 'in and the normal at the previous vertex
1130 
1131  const float p1 = G_partial * p_prev; // NOTE: p_prev is the solid angle probability of sampling the BSDF at the previous vertex, i.e. p_proj * dot(in,normal)
1132  const float p2 = light_pdf;
1133  const float mis_w =
1134  (bounce == 1 && context.options.direct_lighting_nee) ||
1135  (bounce > 1 && context.options.indirect_lighting_nee) ? mis_heuristic<MIS_HEURISTIC>(p1, p2) : 1.0f;
1136 
1137  // and accumulate the weighted contribution
1138  const cugar::Vector3f out_w = w.xyz() * f_L * mis_w;
1139 
1140  // and accumulate the weighted contribution
1141  if (cugar::max_comp(out_w) > 0.0f && cugar::is_finite(out_w))
1142  {
1143  vertex_processor.accumulate_emissive(
1144  context,
1145  renderer,
1146  pixel_info,
1147  prev_vertex_info,
1148  vertex_info,
1149  ev,
1150  out_w );
1151  }
1152 
1153  DEVICE_TIME( per_warp_atomic_add( context.device_timers + FBUFFER_WRITES_TIME, timer.take() ) );
1154  }
1155 
1156  // trace a bounce ray
1157  if (context.do_scatter)
1158  {
1159  DEVICE_TIME( timer.restart() );
1160 
1161  // fetch the sampling dimensions
1162  const float z[3] = { samples[3], samples[4], samples[5] }; // use dimensions 3,4,5
1163  //const float z[3] = {
1164  // vertex_sample(pixel, bounce, 3u),
1165  // vertex_sample(pixel, bounce, 4u),
1166  // vertex_sample(pixel, bounce, 5u)
1167  //}; // use dimensions 3,4,5
1168 
1169  // sample a scattering event
1170  cugar::Vector3f out(0.0f);
1171  cugar::Vector3f g(0.0f);
1172  float p(0.0f);
1173  float p_proj(0.0f);
1174  Bsdf::ComponentType out_comp(Bsdf::kAbsorption);
1175 
1176  // check which components we have to sample
1177  uint32 component_mask = uint32(Bsdf::kAllComponents);
1178  {
1179  // disable diffuse scattering if not allowed
1180  if (context.options.diffuse_scattering == false)
1181  component_mask &= ~uint32(Bsdf::kDiffuseMask);
1182 
1183  // disable glossy scattering if:
1184  // 1. indirect glossy scattering is disabled, OR
1185  // 2. we have sampled a diffuse reflection and indirect_glossy == false (TODO)
1186  if (context.options.glossy_scattering == false)
1187  component_mask &= ~uint32(Bsdf::kGlossyMask);
1188  }
1189 
1190  scatter(ev, z, out_comp, out, p, p_proj, g, true, false, false, Bsdf::ComponentType(component_mask));
1191 
1192  DEVICE_TIME( per_warp_atomic_add( context.device_timers + BRDF_SAMPLE_TIME, timer.take() ) );
1193 
1194  // compute the output weight
1195  cugar::Vector3f out_w;
1196  uint32 out_vertex_info;
1197 
1198  vertex_processor.compute_scattering_weights(
1199  context,
1200  renderer,
1201  pixel_info,
1202  prev_vertex_info,
1203  vertex_info,
1204  ev,
1205  out_comp,
1206  g,
1207  w.xyz(),
1208  out_w,
1209  out_vertex_info );
1210 
1211  DEVICE_TIME( per_warp_atomic_add( context.device_timers + SCATTERING_WEIGHTS_TIME, timer.take() ) );
1212 
1213  if (p != 0.0f && cugar::max_comp(out_w) > 0.0f && cugar::is_finite(out_w))
1214  {
1215  // enqueue the output ray
1216  MaskedRay out_ray;
1217  out_ray.origin = ev.geom.position;
1218  out_ray.dir = out;
1219  out_ray.mask = __float_as_uint(1.0e-3f);
1220  out_ray.tmax = 1.0e8f;
1221 
1222  const float out_p = p;
1223 
1224  // in order to select the footprint, we use the formulation proposed by Bekaert:
1225  // R(x_k) = h/sqrtf(p(x_k|x_[k-1])) + R(x_[k-1])
1226  const float min_p = 32.0f; // 32 corresponds to a maximum angle of ~10 degrees
1227  const cugar::Vector2f out_cone(cone_radius, cugar::max(out_p, min_p));
1228  // tan(alpha) = 1/sqrt(out_p) => out_p = 1/tan(alpha)^2
1229  // out_p > min_p => 1/tan(alpha)^2 > min_p => tan(alpha)^2 < 1/min_p => tan(alpha) < 1/min_p^2
1230 
1231  // mark if the path ever went through a diffuse bounce
1232  bool is_secondary_diffuse = pixel_info.diffuse || (out_comp & Bsdf::kDiffuseMask);
1233 
1234  context.trace_ray(
1235  vertex_processor,
1236  renderer,
1237  PixelInfo(pixel_index, out_comp, is_secondary_diffuse),
1238  out_ray,
1239  cugar::Vector4f(out_w, out_p),
1240  out_cone,
1241  out_vertex_info,
1242  nee_vertex_id );
1243 
1244  DEVICE_TIME( per_warp_atomic_add( context.device_timers + TRACE_SHADED_TIME, timer.take() ) );
1245  return true; // continue the path
1246  }
1247  }
1248  }
1249  else
1250  {
1251  // hit the environment - perform sky lighting
1252  }
1253  return false; // stop the path
1254 }
1255 //------------------------------------------------------------------------------
1256 
CUGAR_HOST_DEVICE uint32 quantize(const float x, const uint32 n)
Definition: numbers.h:600
FERMAT_DEVICE float vertex_sample(const uint2 pixel, TPTContext &context, const uint32 i)
Definition: pathtracer_core.h:746
FERMAT_HOST_DEVICE void compute_per_bounce_options(TPTContext &context, const RenderingContextView &renderer)
Definition: pathtracer_core.h:596
Definition: vertex.h:105
Definition: pathtracer_core.h:503
ComponentType
Definition: bsdf.h:139
Definition: vertex.h:92
FERMAT_HOST_DEVICE FERMAT_FORCEINLINE cugar::Vector3f sample_direction(const cugar::Vector2f ndc) const
Definition: camera.h:278
Definition: tiled_sequence.h:53
Definition: bpt_utils.h:583
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE Vector3f f(const DifferentialGeometry &geometry, const Vector3f in, const Vector3f out) const
Definition: lambert_edf.h:60
FERMAT_DEVICE MaskedRay generate_primary_ray(TPTContext &context, RenderingContextView &renderer, const uint2 pixel, cugar::Vector3f U, cugar::Vector3f V, cugar::Vector3f W)
Definition: pathtracer_core.h:635
Definition: pathtracer_core.h:570
Definition: ray.h:68
FERMAT_HOST_DEVICE FERMAT_FORCEINLINE bool scatter(const VertexType &v, const float z[3], Bsdf::ComponentType &out_component, cugar::Vector3f &out, float &out_p, float &out_p_proj, cugar::Vector3f &out_w, bool RR=true, bool output_alpha=true, bool evaluate_full_bsdf=false, Bsdf::ComponentType components=Bsdf::kAllComponents)
Definition: bpt_utils.h:1070
FERMAT_HOST_DEVICE bool sample(const float *Z, uint32_t *prim_id, cugar::Vector2f *uv, VertexGeometry *geom, float *pdf, Edf *edf) const
Definition: lights.h:521
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE uint8 comp(const uchar2 a, const char c)
Definition: numbers.h:218
Definition: edf.h:49
Definition: ray.h:55
Definition: pathtracer_core.h:527
FERMAT_DEVICE void solve_occlusion(TPTContext &context, TPTVertexProcessor &vertex_processor, RenderingContextView &renderer, const bool shadow_hit, const PixelInfo pixel_info, const cugar::Vector3f w, const cugar::Vector3f w_d, const cugar::Vector3f w_g, const uint32 vertex_info=uint32(-1), const uint32 nee_vertex_id=uint32(-1), const uint32 nee_sample_id=uint32(-1))
Definition: pathtracer_core.h:707
Definition: renderer_view.h:80
FERMAT_DEVICE bool shade_vertex(TPTContext &context, TPTVertexProcessor &vertex_processor, RenderingContextView &renderer, const uint32 bounce, const PixelInfo pixel_info, const uint2 pixel, const MaskedRay &ray, const Hit hit, const cugar::Vector4f w, const uint32 prev_vertex_info=uint32(-1), const uint32 prev_nee_vertex_id=uint32(-1), const cugar::Vector2f cone=cugar::Vector2f(0))
Definition: pathtracer_core.h:773
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE float randfloat(unsigned i, unsigned p)
Definition: numbers.h:753