31 #include <pathtracer.h> 32 #include <tiled_sequence.h> 35 #include <mis_utils.h> 36 #include <bpt_utils.h> 38 #include <direct_lighting_mesh.h> 39 #include <direct_lighting_rl.h> 470 #define MIS_HEURISTIC POWER_HEURISTIC 472 #define VTL_RL_HASH_SIZE (512u * 1024u) 474 #if !defined(DEVICE_TIMING) || (DEVICE_TIMING == 0) 475 #define DEVICE_TIME(x) 477 #define DEVICE_TIME(x) x 484 DIRLIGHT_SAMPLE_TIME = 2,
485 DIRLIGHT_EVAL_TIME = 3,
486 LIGHTS_PREPROCESS_TIME = 4,
487 LIGHTS_SAMPLE_TIME = 5,
488 LIGHTS_EVAL_TIME = 6,
489 LIGHTS_MAPPING_TIME = 7,
490 LIGHTS_UPDATE_TIME = 8,
491 TRACE_SHADOW_TIME = 9,
492 TRACE_SHADED_TIME = 10,
493 BRDF_SAMPLE_TIME = 11,
494 FBUFFER_WRITES_TIME = 12,
495 PREPROCESS_VERTEX_TIME = 13,
496 NEE_WEIGHTS_TIME = 14,
497 SCATTERING_WEIGHTS_TIME = 15,
505 FERMAT_DEVICE
void start() { last = clock64(); }
506 FERMAT_DEVICE
void restart() { last = clock64(); }
507 FERMAT_DEVICE uint64 take() { int64 first = last; last = clock64();
return uint64( last - first ); }
530 FERMAT_HOST_DEVICE
PixelInfo(
const uint32 _packed) : packed(_packed) {}
531 FERMAT_HOST_DEVICE PixelInfo(
const uint32 _pixel,
const uint32 _comp,
const uint32 _diffuse = 0) : pixel(_pixel),
comp(_comp), diffuse(_diffuse) {}
533 FERMAT_HOST_DEVICE
operator uint32()
const {
return packed; }
544 FERMAT_DEVICE FERMAT_FORCEINLINE
545 void per_warp_atomic_add(uint64* ptr, uint64 val)
547 #if __CUDA_ARCH__ > 700 548 const unsigned int lane_id = threadIdx.x & 31;
551 int mask = __match_all_sync(__activemask(), val, &pred);
552 int leader = __ffs(mask) - 1;
554 if (lane_id == leader)
557 const unsigned int lane_id = threadIdx.x & 31;
559 int mask = __ballot_sync(__activemask(),
true);
560 int leader = __ffs(mask) - 1;
562 if (lane_id == leader)
569 template <
typename TPTOptions>
576 uint32 in_bounce : 27;
578 uint32 do_accumulate_emissive : 1;
579 uint32 do_scatter : 1;
583 uint64* device_timers;
594 template <
typename TPTContext>
602 renderer.mesh_vpls.n_vpls &&
603 ((context.in_bounce + 2 <= context.options.max_path_length) &&
604 ((context.in_bounce == 0 && context.options.direct_lighting_nee && context.options.direct_lighting) ||
605 (context.in_bounce > 0 && context.options.indirect_lighting_nee)));
608 context.do_accumulate_emissive =
609 ((context.in_bounce == 0 && context.options.visible_lights) ||
610 (context.in_bounce == 1 && context.options.direct_lighting_bsdf && context.options.direct_lighting) ||
611 (context.in_bounce > 1 && context.options.indirect_lighting_bsdf));
614 const uint32 max_path_vertices = context.options.max_path_length +
615 ((context.options.max_path_length == 2 && context.options.direct_lighting_bsdf) ||
616 (context.options.max_path_length > 2 && context.options.indirect_lighting_bsdf) ? 1 : 0);
619 context.do_scatter = (context.in_bounce + 2 < max_path_vertices);
633 template <
typename TPTContext>
645 context.sequence.sample_2d(pixel.x, pixel.y, 0),
646 context.sequence.sample_2d(pixel.x, pixel.y, 1));
648 const float2 d = make_float2(
649 (pixel.x + uv.x) /
float(renderer.res_x),
650 (pixel.y + uv.y) /
float(renderer.res_y)) * 2.f - 1.f;
652 float3 ray_origin = renderer.camera.eye;
653 float3 ray_direction = d.x*U + d.y*V + W;
655 return make_ray( ray_origin, ray_direction, 0u, 1e34f );
666 template <
typename TPTContext>
675 context.sequence.sample_2d(pixel.x, pixel.y, 0),
676 context.sequence.sample_2d(pixel.x, pixel.y, 1));
678 const float2 d = make_float2(
679 (pixel.x + uv.x) /
float(renderer.res_x),
680 (pixel.y + uv.y) /
float(renderer.res_y));
682 float3 ray_origin = renderer.camera.eye;
685 return make_ray( ray_origin, ray_direction, 0u, 1e34f );
705 template <
typename TPTContext,
typename TPTVertexProcessor>
709 TPTVertexProcessor& vertex_processor,
711 const bool shadow_hit,
716 const uint32 vertex_info = uint32(-1),
717 const uint32 nee_vertex_id = uint32(-1),
718 const uint32 nee_sample_id = uint32(-1))
721 DEVICE_TIME( timer.start() );
724 context.dl.update( nee_vertex_id, nee_sample_id, w, shadow_hit ==
true );
726 DEVICE_TIME( per_warp_atomic_add( context.device_timers + LIGHTS_UPDATE_TIME, timer.take() ) );
728 vertex_processor.accumulate_nee(
737 DEVICE_TIME( per_warp_atomic_add( context.device_timers + FBUFFER_WRITES_TIME, timer.take() ) );
744 template <
typename TPTContext>
748 return context.sequence.sample_2d(pixel.x, pixel.y, (context.in_bounce + 1) * 6 + i);
771 template <
typename TPTContext,
typename TPTVertexProcessor>
775 TPTVertexProcessor& vertex_processor,
783 const uint32 prev_vertex_info = uint32(-1),
784 const uint32 prev_nee_vertex_id = uint32(-1),
787 const float p_prev = w.w;
789 const uint32 pixel_index = pixel_info.pixel;
791 if (hit.t > 0.0f && hit.triId >= 0)
794 DEVICE_TIME( timer.start() );
799 DEVICE_TIME( per_warp_atomic_add( context.device_timers + SETUP_TIME, timer.take() ) );
804 renderer.fb.gbuffer.geo(pixel_index) = GBufferView::pack_geometry(ev.geom.position, ev.geom.normal_s);
805 renderer.fb.gbuffer.uv(pixel_index) = make_float4(hit.u, hit.v, ev.geom.texture_coords.x, ev.geom.texture_coords.y);
806 renderer.fb.gbuffer.tri(pixel_index) = hit.triId;
807 renderer.fb.gbuffer.depth(pixel_index) = hit.t;
810 renderer.fb(FBufferDesc::DIFFUSE_A, pixel_index) +=
cugar::Vector4f(ev.material.diffuse) * context.frame_weight;
814 DEVICE_TIME( per_warp_atomic_add( context.device_timers + FBUFFER_WRITES_TIME, timer.take() ) );
818 const float area_prob = cugar::rsqrtf(cone.y * ev.prev_G_prime);
819 const float cone_radius = cone.x + area_prob;
822 uint32 nee_vertex_id = uint32(-1);
825 bool is_secondary_diffuse = pixel_info.diffuse;
827 nee_vertex_id = context.dl.preprocess_vertex(
832 is_secondary_diffuse,
843 add_in<false>(renderer.fb(FBufferDesc::COMPOSITED_C), pixel_info.pixel, c, context.frame_weight);
849 DEVICE_TIME( per_warp_atomic_add( context.device_timers + LIGHTS_PREPROCESS_TIME, timer.take() ) );
851 const uint32 vertex_info = vertex_processor.preprocess_vertex(
862 DEVICE_TIME( per_warp_atomic_add( context.device_timers + PREPROCESS_VERTEX_TIME, timer.take() ) );
866 for (uint32 i = 0; i < 6; ++i)
870 if ((context.in_bounce + 2 <= context.options.max_path_length) &&
871 (context.in_bounce > 0 || context.options.direct_lighting) &&
872 renderer.dir_lights_count)
874 DEVICE_TIME( timer.restart() );
877 const float z[3] = { samples[0], samples[1], samples[2] };
885 const uint32 light_idx =
cugar::quantize( z[2], renderer.dir_lights_count );
888 renderer.dir_lights[ light_idx ].
sample(ev.geom.position, z, &light_vertex.prim_id, &light_vertex.uv, &light_vertex_geom, &light_pdf, &light_edf);
891 light_pdf /= renderer.dir_lights_count;
893 DEVICE_TIME( per_warp_atomic_add( context.device_timers + DIRLIGHT_SAMPLE_TIME, timer.take() ) );
898 const float d2 = fmaxf(1.0e-8f, cugar::square_length(out));
904 float p_s_comp[Bsdf::kNumComponents];
906 ev.bsdf.f_and_p(ev.geom, ev.in, out, f_s_comp, p_s_comp, cugar::kProjectedSolidAngle);
909 const bool eval_diffuse = context.options.diffuse_scattering;
910 const bool eval_glossy = context.options.glossy_scattering;
918 f_s += f_s_comp[Bsdf::kDiffuseReflectionIndex] + f_s_comp[Bsdf::kDiffuseTransmissionIndex];
919 p_s += p_s_comp[Bsdf::kDiffuseReflectionIndex] + p_s_comp[Bsdf::kDiffuseTransmissionIndex];
923 f_s += f_s_comp[Bsdf::kGlossyReflectionIndex] + f_s_comp[Bsdf::kGlossyTransmissionIndex];
924 p_s += p_s_comp[Bsdf::kGlossyReflectionIndex] + p_s_comp[Bsdf::kGlossyTransmissionIndex];
927 DEVICE_TIME( per_warp_atomic_add( context.device_timers + BRDF_EVAL_TIME, timer.take() ) );
930 const cugar::Vector3f f_L = light_edf.
f(light_vertex_geom, light_vertex_geom.position, -out) / light_pdf;
932 DEVICE_TIME( per_warp_atomic_add( context.device_timers + DIRLIGHT_EVAL_TIME, timer.take() ) );
935 const float G = fabsf(cugar::dot(out, ev.geom.normal_s) * cugar::dot(out, light_vertex_geom.normal_s)) / d2;
938 const float mis_w = 1.0f;
943 uint32 out_vertex_info;
945 vertex_processor.compute_nee_weights(
952 eval_diffuse ? f_s_comp[Bsdf::kDiffuseReflectionIndex] + f_s_comp[Bsdf::kDiffuseTransmissionIndex] :
cugar::Vector3f(0.0f),
953 eval_glossy ? f_s_comp[Bsdf::kGlossyReflectionIndex] + f_s_comp[Bsdf::kGlossyTransmissionIndex] :
cugar::Vector3f(0.0f),
960 DEVICE_TIME( per_warp_atomic_add( context.device_timers + NEE_WEIGHTS_TIME, timer.take() ) );
970 if (cugar::max_comp(out_w) > 0.0f && cugar::is_finite(out_w))
972 DEVICE_TIME( timer.restart() );
975 const cugar::Vector3f N = dot(ev.geom.normal_s,ray.dir) > 0.0f ? -ev.geom.normal_s : ev.geom.normal_s;
979 out_ray.origin = ev.geom.position - ray.dir * 1.0e-3f;
980 out_ray.dir = (light_vertex_geom.position - out_ray.origin);
982 out_ray.tmax = 0.9999f;
984 context.trace_shadow_ray( vertex_processor, renderer, pixel_info, out_ray, out_w, out_w_d, out_w_g, vertex_info );
986 DEVICE_TIME( per_warp_atomic_add( context.device_timers + TRACE_SHADOW_TIME, timer.take() ) );
993 DEVICE_TIME( timer.restart() );
996 const float z[3] = { samples[0], samples[1], samples[2] };
1009 const uint32 nee_sample_id = context.dl.sample( nee_vertex_id, z, &light_vertex, &light_vertex_geom, &light_pdf, &light_edf );
1011 DEVICE_TIME( per_warp_atomic_add( context.device_timers + LIGHTS_SAMPLE_TIME, timer.take() ) );
1014 cugar::Vector3f out = (light_vertex_geom.position - ev.geom.position);
1016 const float d2 = fmaxf(1.0e-8f, cugar::square_length(out));
1022 float p_s_comp[Bsdf::kNumComponents];
1024 ev.bsdf.f_and_p(ev.geom, ev.in, out, f_s_comp, p_s_comp, cugar::kProjectedSolidAngle);
1027 const bool eval_diffuse = context.options.diffuse_scattering;
1028 const bool eval_glossy = context.options.glossy_scattering;
1035 f_s += f_s_comp[Bsdf::kDiffuseReflectionIndex] + f_s_comp[Bsdf::kDiffuseTransmissionIndex];
1036 p_s += p_s_comp[Bsdf::kDiffuseReflectionIndex] + p_s_comp[Bsdf::kDiffuseTransmissionIndex];
1040 f_s += f_s_comp[Bsdf::kGlossyReflectionIndex] + f_s_comp[Bsdf::kGlossyTransmissionIndex];
1041 p_s += p_s_comp[Bsdf::kGlossyReflectionIndex] + p_s_comp[Bsdf::kGlossyTransmissionIndex];
1044 DEVICE_TIME( per_warp_atomic_add( context.device_timers + BRDF_EVAL_TIME, timer.take() ) );
1047 const cugar::Vector3f f_L = light_edf.
f(light_vertex_geom, light_vertex_geom.position, -out) / light_pdf;
1049 DEVICE_TIME( per_warp_atomic_add( context.device_timers + LIGHTS_EVAL_TIME, timer.take() ) );
1052 const float G = fabsf(cugar::dot(out, ev.geom.normal_s) * cugar::dot(out, light_vertex_geom.normal_s)) / d2;
1055 const float p1 = light_pdf;
1056 const float p2 = p_s * G;
1058 (bounce == 0 && context.options.direct_lighting_bsdf) ||
1059 (bounce > 0 && context.options.indirect_lighting_bsdf) ? mis_heuristic<MIS_HEURISTIC>(p1, p2) : 1.0f;
1064 uint32 out_vertex_info;
1066 vertex_processor.compute_nee_weights(
1073 eval_diffuse ? f_s_comp[Bsdf::kDiffuseReflectionIndex] + f_s_comp[Bsdf::kDiffuseTransmissionIndex] :
cugar::Vector3f(0.0f),
1074 eval_glossy ? f_s_comp[Bsdf::kGlossyReflectionIndex] + f_s_comp[Bsdf::kGlossyTransmissionIndex] :
cugar::Vector3f(0.0f),
1081 DEVICE_TIME( per_warp_atomic_add( context.device_timers + NEE_WEIGHTS_TIME, timer.take() ) );
1091 if (cugar::max_comp(out_w) > 0.0f && cugar::is_finite(out_w))
1093 DEVICE_TIME( timer.restart() );
1097 out_ray.origin = ev.geom.position - ray.dir * 1.0e-4f;
1098 out_ray.dir = (light_vertex_geom.position - out_ray.origin);
1099 out_ray.mask = 0x2u;
1100 out_ray.tmax = 0.9999f;
1102 context.trace_shadow_ray( vertex_processor, renderer, pixel_info, out_ray, out_w, out_w_d, out_w_g, vertex_info, nee_vertex_id, nee_sample_id );
1104 DEVICE_TIME( per_warp_atomic_add( context.device_timers + TRACE_SHADOW_TIME, timer.take() ) );
1109 if (context.do_accumulate_emissive)
1111 DEVICE_TIME( timer.restart() );
1117 context.dl.map( prev_nee_vertex_id, hit.triId,
cugar::Vector2f(hit.u, hit.v), light_vertex_geom, &light_pdf, &light_edf );
1119 DEVICE_TIME( per_warp_atomic_add( context.device_timers + LIGHTS_MAPPING_TIME, timer.take() ) );
1122 const cugar::Vector3f f_L = light_edf.
f(light_vertex_geom, light_vertex_geom.position, ev.in);
1124 DEVICE_TIME( per_warp_atomic_add( context.device_timers + LIGHTS_EVAL_TIME, timer.take() ) );
1126 const float d2 = fmaxf(1.0e-10f, hit.t * hit.t);
1129 const float G_partial = fabsf(cugar::dot(ev.in, light_vertex_geom.normal_s)) / d2;
1131 const float p1 = G_partial * p_prev;
1132 const float p2 = light_pdf;
1134 (bounce == 1 && context.options.direct_lighting_nee) ||
1135 (bounce > 1 && context.options.indirect_lighting_nee) ? mis_heuristic<MIS_HEURISTIC>(p1, p2) : 1.0f;
1141 if (cugar::max_comp(out_w) > 0.0f && cugar::is_finite(out_w))
1143 vertex_processor.accumulate_emissive(
1153 DEVICE_TIME( per_warp_atomic_add( context.device_timers + FBUFFER_WRITES_TIME, timer.take() ) );
1157 if (context.do_scatter)
1159 DEVICE_TIME( timer.restart() );
1162 const float z[3] = { samples[3], samples[4], samples[5] };
1177 uint32 component_mask = uint32(Bsdf::kAllComponents);
1180 if (context.options.diffuse_scattering ==
false)
1181 component_mask &= ~uint32(Bsdf::kDiffuseMask);
1186 if (context.options.glossy_scattering ==
false)
1187 component_mask &= ~uint32(Bsdf::kGlossyMask);
1190 scatter(ev, z, out_comp, out, p, p_proj, g,
true,
false,
false,
Bsdf::ComponentType(component_mask));
1192 DEVICE_TIME( per_warp_atomic_add( context.device_timers + BRDF_SAMPLE_TIME, timer.take() ) );
1196 uint32 out_vertex_info;
1198 vertex_processor.compute_scattering_weights(
1211 DEVICE_TIME( per_warp_atomic_add( context.device_timers + SCATTERING_WEIGHTS_TIME, timer.take() ) );
1213 if (p != 0.0f && cugar::max_comp(out_w) > 0.0f && cugar::is_finite(out_w))
1217 out_ray.origin = ev.geom.position;
1219 out_ray.mask = __float_as_uint(1.0e-3f);
1220 out_ray.tmax = 1.0e8f;
1222 const float out_p = p;
1226 const float min_p = 32.0f;
1227 const cugar::Vector2f out_cone(cone_radius, cugar::max(out_p, min_p));
1232 bool is_secondary_diffuse = pixel_info.diffuse || (out_comp & Bsdf::kDiffuseMask);
1237 PixelInfo(pixel_index, out_comp, is_secondary_diffuse),
1244 DEVICE_TIME( per_warp_atomic_add( context.device_timers + TRACE_SHADED_TIME, timer.take() ) );
CUGAR_HOST_DEVICE uint32 quantize(const float x, const uint32 n)
Definition: numbers.h:600
FERMAT_DEVICE float vertex_sample(const uint2 pixel, TPTContext &context, const uint32 i)
Definition: pathtracer_core.h:746
FERMAT_HOST_DEVICE void compute_per_bounce_options(TPTContext &context, const RenderingContextView &renderer)
Definition: pathtracer_core.h:596
Definition: pathtracer_core.h:503
ComponentType
Definition: bsdf.h:139
FERMAT_HOST_DEVICE FERMAT_FORCEINLINE cugar::Vector3f sample_direction(const cugar::Vector2f ndc) const
Definition: camera.h:278
Definition: tiled_sequence.h:53
Definition: bpt_utils.h:583
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE Vector3f f(const DifferentialGeometry &geometry, const Vector3f in, const Vector3f out) const
Definition: lambert_edf.h:60
FERMAT_DEVICE MaskedRay generate_primary_ray(TPTContext &context, RenderingContextView &renderer, const uint2 pixel, cugar::Vector3f U, cugar::Vector3f V, cugar::Vector3f W)
Definition: pathtracer_core.h:635
Definition: pathtracer_core.h:570
FERMAT_HOST_DEVICE FERMAT_FORCEINLINE bool scatter(const VertexType &v, const float z[3], Bsdf::ComponentType &out_component, cugar::Vector3f &out, float &out_p, float &out_p_proj, cugar::Vector3f &out_w, bool RR=true, bool output_alpha=true, bool evaluate_full_bsdf=false, Bsdf::ComponentType components=Bsdf::kAllComponents)
Definition: bpt_utils.h:1070
FERMAT_HOST_DEVICE bool sample(const float *Z, uint32_t *prim_id, cugar::Vector2f *uv, VertexGeometry *geom, float *pdf, Edf *edf) const
Definition: lights.h:521
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE uint8 comp(const uchar2 a, const char c)
Definition: numbers.h:218
Definition: pathtracer_core.h:527
FERMAT_DEVICE void solve_occlusion(TPTContext &context, TPTVertexProcessor &vertex_processor, RenderingContextView &renderer, const bool shadow_hit, const PixelInfo pixel_info, const cugar::Vector3f w, const cugar::Vector3f w_d, const cugar::Vector3f w_g, const uint32 vertex_info=uint32(-1), const uint32 nee_vertex_id=uint32(-1), const uint32 nee_sample_id=uint32(-1))
Definition: pathtracer_core.h:707
Definition: renderer_view.h:80
FERMAT_DEVICE bool shade_vertex(TPTContext &context, TPTVertexProcessor &vertex_processor, RenderingContextView &renderer, const uint32 bounce, const PixelInfo pixel_info, const uint2 pixel, const MaskedRay &ray, const Hit hit, const cugar::Vector4f w, const uint32 prev_vertex_info=uint32(-1), const uint32 prev_nee_vertex_id=uint32(-1), const cugar::Vector2f cone=cugar::Vector2f(0))
Definition: pathtracer_core.h:773
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE float randfloat(unsigned i, unsigned p)
Definition: numbers.h:753