Fermat
bpt_kernels.h
1 /*
2  * Fermat
3  *
4  * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #pragma once
30 
31 #include <bpt_context.h>
32 #include <bpt_utils.h>
33 #include <bpt_options.h>
35 
38 
41 
42 #define SECONDARY_EYE_VERTICES_BLOCKSIZE 128
43 #define SECONDARY_EYE_VERTICES_CTA_BLOCKS 6
44 
45 #define SECONDARY_LIGHT_VERTICES_BLOCKSIZE 128
46 #define SECONDARY_LIGHT_VERTICES_CTA_BLOCKS 6
47 
48 #define BPT_FULL_BSDF_EVALUATION 1
49 
57 {
58  FERMAT_HOST_DEVICE FERMAT_FORCEINLINE
60  const BPTOptionsBase& _options,
61  VertexSampling _light_sampling = VertexSampling::kAll,
62  VertexOrdering _light_ordering = VertexOrdering::kRandomOrdering,
63  VertexSampling _eye_sampling = VertexSampling::kAll,
64  bool _use_rr = true) :
65  max_path_length(_options.max_path_length),
66  light_sampling(uint32(_light_sampling)),
67  light_ordering(uint32(_light_ordering)),
68  eye_sampling(uint32(_eye_sampling)),
69  use_vpls(_options.use_vpls),
70  use_rr(_use_rr),
71  light_tracing(_options.light_tracing),
72  direct_lighting_nee(_options.direct_lighting_nee),
73  direct_lighting_bsdf(_options.direct_lighting_bsdf),
74  indirect_lighting_nee(_options.indirect_lighting_nee),
75  indirect_lighting_bsdf(_options.indirect_lighting_bsdf),
76  visible_lights(_options.visible_lights) {}
77 
78  FERMAT_HOST_DEVICE FERMAT_FORCEINLINE
79  BPTConfigBase(
80  uint32 _max_path_length = 6,
81  VertexSampling _light_sampling = VertexSampling::kAll,
82  VertexOrdering _light_ordering = VertexOrdering::kRandomOrdering,
83  VertexSampling _eye_sampling = VertexSampling::kAll,
84  bool _use_vpls = true,
85  bool _use_rr = true,
86  float _light_tracing = 0.0f,
87  bool _direct_lighting_nee = true,
88  bool _direct_lighting_bsdf = true,
89  bool _indirect_lighting_nee = true,
90  bool _indirect_lighting_bsdf = true,
91  bool _visible_lights = true) :
92  max_path_length(_max_path_length),
93  light_sampling(uint32(_light_sampling)),
94  light_ordering(uint32(_light_ordering)),
95  eye_sampling(uint32(_eye_sampling)),
96  use_vpls(_use_vpls),
97  use_rr(_use_rr),
98  light_tracing(_light_tracing),
99  direct_lighting_nee(_direct_lighting_nee),
100  direct_lighting_bsdf(_direct_lighting_bsdf),
101  indirect_lighting_nee(_indirect_lighting_nee),
102  indirect_lighting_bsdf(_indirect_lighting_bsdf),
103  visible_lights(_visible_lights) {}
104 
105  FERMAT_HOST_DEVICE FERMAT_FORCEINLINE
106  BPTConfigBase(const BPTConfigBase& other) :
107  max_path_length(other.max_path_length),
108  light_sampling(other.light_sampling),
109  light_ordering(other.light_ordering),
110  eye_sampling(other.eye_sampling),
111  use_vpls(other.use_vpls),
112  use_rr(other.use_rr),
113  light_tracing(other.light_tracing),
114  direct_lighting_nee(other.direct_lighting_nee),
115  direct_lighting_bsdf(other.direct_lighting_bsdf),
116  indirect_lighting_nee(other.indirect_lighting_nee),
117  indirect_lighting_bsdf(other.indirect_lighting_bsdf),
118  visible_lights(other.visible_lights) {}
119 
120  uint32 max_path_length : 10;
121  uint32 light_sampling : 1;
122  uint32 light_ordering : 1;
123  uint32 eye_sampling : 1;
124  uint32 use_vpls : 1;
125  uint32 use_rr : 1;
126  uint32 direct_lighting_nee : 1;
127  uint32 direct_lighting_bsdf : 1;
128  uint32 indirect_lighting_nee : 1;
129  uint32 indirect_lighting_bsdf : 1;
130  uint32 visible_lights : 1;
131  float light_tracing;
132 
137  FERMAT_HOST_DEVICE FERMAT_FORCEINLINE
138  bool terminate_light_subpath(const uint32 path_id, const uint32 s) const { return s >= max_path_length + 1; }
139 
144  FERMAT_HOST_DEVICE FERMAT_FORCEINLINE
145  bool terminate_eye_subpath(const uint32 path_id, const uint32 t) const { return t >= max_path_length + 1; }
146 
151  FERMAT_HOST_DEVICE FERMAT_FORCEINLINE
152  bool store_light_vertex(const uint32 path_id, const uint32 s, const bool absorbed) const
153  {
154  return (VertexSampling(light_sampling) == VertexSampling::kAll) || absorbed;
155  }
156 
162  FERMAT_HOST_DEVICE FERMAT_FORCEINLINE
163  bool perform_connection(const uint32 eye_path_id, const uint32 t, const bool absorbed) const
164  {
165  return
166  (t == 1 && direct_lighting_nee) ||
167  (t > 1 && indirect_lighting_nee);
168  }
169 
175  FERMAT_HOST_DEVICE FERMAT_FORCEINLINE
176  bool accumulate_emissive(const uint32 eye_path_id, const uint32 t, const bool absorbed) const
177  {
178  return
179  (t == 2 && visible_lights) ||
180  (t == 3 && direct_lighting_bsdf) ||
181  (t > 3 && indirect_lighting_bsdf);
182  }
183 
186  template <typename TBPTContext>
187  FERMAT_HOST_DEVICE FERMAT_FORCEINLINE
189  const uint32 light_path_id,
190  const uint32 depth,
191  const VertexGeometryId v_id,
192  TBPTContext& context,
193  RenderingContextView& renderer) const
194  {}
195 
198  template <typename TBPTContext>
199  FERMAT_HOST_DEVICE FERMAT_FORCEINLINE
201  const uint32 eye_path_id,
202  const uint32 depth,
203  const VertexGeometryId v_id,
204  const EyeVertex& v,
205  TBPTContext& context,
206  RenderingContextView& renderer) const
207  {}
208 };
209 
217 {
221  template <typename TBPTContext>
222  FERMAT_HOST_DEVICE
223  void sink(
224  const uint32 channel,
225  const cugar::Vector4f value,
226  const uint32 light_path_id,
227  const uint32 eye_path_id,
228  const uint32 s,
229  const uint32 t,
230  TBPTContext& context,
231  RenderingContextView& renderer)
232  {}
233 
237  template <typename TBPTContext>
238  FERMAT_HOST_DEVICE
240  const Bsdf::ComponentType component,
241  const cugar::Vector4f value,
242  const uint32 eye_path_id,
243  const uint32 t,
244  TBPTContext& context,
245  RenderingContextView& renderer)
246  {}
247 };
249 
252 
253 namespace bpt {
254 
257 
260 
264 
275 template <typename TPrimaryCoordinates, typename TBPTContext, typename TBPTConfig>
276 FERMAT_HOST_DEVICE
278  const uint32 light_path_id,
279  const uint32 n_light_paths,
280  const TPrimaryCoordinates& primary_coords,
281  TBPTContext& context,
282  RenderingContextView& renderer,
283  TBPTConfig& config)
284 {
285  //if (light_path_id == 0)
286  //{
287  // if (config.light_sampling == VertexSampling::kAll &&
288  // config.light_ordering == VertexOrdering::kRandomOrdering)
289  // *context.light_vertices.vertex_counter = n_light_paths;
290  //}
291 
292  if (VertexOrdering(config.light_ordering) == VertexOrdering::kPathOrdering)
293  {
294  // initialize the number of vertices for this path
295  context.light_vertices.vertex_counts[light_path_id] = 0;
296 
297  // temporarily store an invalid light vertex - used in case the light subpath gets terminated early due to RR
298  context.light_vertices.vertex_path_id[light_path_id] = uint32(-1);
299  }
300 
301  // check whether we have anything to do
302  if (config.terminate_light_subpath(light_path_id, 0) == true)
303  return;
304 
305  VPL light_vertex;
306  VertexGeometry geom;
307  float pdf;
308  Edf edf;
309 
310  if (config.use_vpls)
311  {
312  light_vertex = context.light_vertices.vertex[light_path_id];
313 
314  renderer.mesh_vpls.map(light_vertex.prim_id, light_vertex.uv, &geom, &pdf, &edf);
315  }
316  else
317  {
318  float samples[3];
319  for (uint32 i = 0; i < 3; ++i)
320  samples[i] = primary_coords.sample(light_path_id, 0, i);
321 
322  renderer.mesh_light.sample(samples, &light_vertex.prim_id, &light_vertex.uv, &geom, &pdf, &edf);
323  }
324 
325  // store the compact vertex information
326  config.visit_light_vertex(
327  light_path_id,
328  0,
329  light_vertex,
330  context,
331  renderer);
332 
333  const bool terminate = config.terminate_light_subpath(light_path_id, 1);
334 
335  if (terminate || (VertexSampling(config.light_sampling) == VertexSampling::kAll))
336  {
337  const uint32 slot = (VertexOrdering(config.light_ordering) == VertexOrdering::kRandomOrdering) ?
338  #if defined(FERMAT_DEVICE_COMPILATION)
339  cugar::cuda::warp_increment(context.light_vertices.vertex_counter) :
340  #else
341  cugar::atomic_add(context.light_vertices.vertex_counter, 1u) :
342  #endif
343  light_path_id;
344 
345  const uint32 packed_normal = pack_direction(geom.normal_s);
346 
347  // store the light vertex
348  context.light_vertices.vertex_gbuffer[slot] = pack_edf(edf);
349  context.light_vertices.vertex_pos[slot] = cugar::Vector4f(geom.position, cugar::binary_cast<float>(packed_normal));
350  context.light_vertices.vertex_input[slot] = make_uint2(0, cugar::to_rgbe(cugar::Vector3f(1.0f) / pdf)); // the material emission factor
351  context.light_vertices.vertex_weights[slot] = PathWeights(
352  0.0f, // p(-2)g(-2)p(-1)
353  1.0f * pdf); // p(-1)g(-1) = pdf : we want p(-1)g(-1)p(0) = p(0)*pdf - which will happen because we are setting p(0) = p(0) and g(-1) = pdf
354 
355  // (over-)write the path id
356  context.light_vertices.vertex_path_id[slot] = light_path_id;
357 
358  if (VertexOrdering(config.light_ordering) == VertexOrdering::kPathOrdering)
359  {
360  // set the number of path vertices to one
361  context.light_vertices.vertex_counts[light_path_id] = 1;
362  }
363  }
364 
365  if (terminate == false)
366  {
367  float samples[3];
368  for (uint32 i = 0; i < 3; ++i)
369  samples[i] = primary_coords.sample(light_path_id, 1, i);
370 
371  cugar::Vector3f out;
372  cugar::Vector3f f;
373  cugar::Vector3f g;
374  float p;
375  float p_proj;
376 
377  // sample an outgoing direction
378  edf.sample(cugar::Vector2f(samples[0], samples[1]), geom, geom.position, out, g, p, p_proj);
379 
380  f = g * p_proj;
381  g /= pdf;
382 
383  Ray out_ray;
384  out_ray.origin = geom.position;
385  out_ray.dir = out;
386  out_ray.tmin = 1.0e-4f;
387  out_ray.tmax = 1.0e8f;
388 
389  // fetch the output slot
390  const uint32 slot = context.scatter_queue.warp_append_slot();
391 
392  // write the output ray/info
393  context.scatter_queue.rays[slot] = out_ray;
394  context.scatter_queue.weights[slot] = cugar::Vector4f(g, 0.0f);
395  context.scatter_queue.probs[slot] = p; // we need to track the solid angle probability of the last vertex
396  context.scatter_queue.pixels[slot] = PixelInfo(light_path_id, FBufferDesc::DIFFUSE_C).packed;
397  context.scatter_queue.path_weights[slot] = TempPathWeights::light_vertex_1( pdf, p_proj, fabsf(dot(geom.normal_s, out)) );
398  }
399 }
400 
419 template <typename TPrimaryCoordinates, typename TBPTContext, typename TBPTConfig>
420 FERMAT_HOST_DEVICE
422  const uint32 queue_idx,
423  const uint32 n_light_paths,
424  const TPrimaryCoordinates& primary_coords,
425  TBPTContext& context,
426  RenderingContextView& renderer,
427  TBPTConfig& config)
428 {
429  const PixelInfo pixel_info = context.in_queue.pixels[queue_idx];
430  const Ray ray = context.in_queue.rays[queue_idx];
431  const Hit hit = context.in_queue.hits[queue_idx];
432  const cugar::Vector4f w = context.in_queue.weights[queue_idx];
433  const TempPathWeights path_weights = context.in_queue.path_weights[queue_idx];
434 
435  const uint32 light_path_id = pixel_info.pixel;
436 
437  if (hit.t > 0.0f && hit.triId >= 0)
438  {
439  // store the compact vertex information
440  config.visit_light_vertex(
441  light_path_id,
442  context.in_bounce + 1,
443  VertexGeometryId(hit.triId, hit.u, hit.v),
444  context,
445  renderer);
446 
447  // setup the light vertex
448  LightVertex lv;
449  lv.setup(ray, hit, w.xyz(), path_weights, context.in_bounce + 1, renderer);
450 
451  bool absorbed = true;
452 
453  // trace a bounce ray
454  if (config.terminate_light_subpath(light_path_id, context.in_bounce + 2) == false)
455  {
456  // initialize our sampling sequence
457  float z[3];
458  for (uint32 i = 0; i < 3; ++i)
459  z[i] = primary_coords.sample(light_path_id, context.in_bounce + 2, i);
460 
461  // sample a scattering event
462  cugar::Vector3f out(0.0f);
463  cugar::Vector3f out_w(0.0f);
464  cugar::Vector3f g(0.0f);
465  float p(0.0f);
466  float p_proj(0.0f);
467  Bsdf::ComponentType out_comp(Bsdf::kAbsorption);
468 
469  scatter(lv, z, out_comp, out, p, p_proj, out_w, config.use_rr, true, BPT_FULL_BSDF_EVALUATION);
470 
471  if (cugar::max_comp(out_w) > 0.0f)
472  {
473  // enqueue the output ray
474  Ray out_ray;
475  out_ray.origin = lv.geom.position;
476  out_ray.dir = out;
477  out_ray.tmin = 1.0e-4f;
478  out_ray.tmax = 1.0e8f;
479 
480  const PixelInfo out_pixel = pixel_info;
481 
482  context.scatter_queue.warp_append(
483  out_pixel,
484  out_ray,
485  cugar::Vector4f(out_w, w.w),
486  0.0f,
487  TempPathWeights( lv, out, p_proj ));
488 
489  absorbed = false;
490  }
491  }
492 
493  // store the light vertex
494  if (config.store_light_vertex(light_path_id, context.in_bounce + 2, absorbed))
495  {
496  const uint32 slot = (VertexOrdering(config.light_ordering) == VertexOrdering::kRandomOrdering) ?
497  #if defined(FERMAT_DEVICE_COMPILATION)
498  cugar::cuda::warp_increment(context.light_vertices.vertex_counter) :
499  #else
500  cugar::atomic_add(context.light_vertices.vertex_counter, 1u) :
501  #endif
502  light_path_id + context.light_vertices.vertex_counts[light_path_id] * n_light_paths; // store all vertices: use the global vertex index
503 
504  const uint32 packed_normal = pack_direction(lv.geom.normal_s);
505  const uint32 packed_direction = pack_direction(lv.in);
506 
507  context.light_vertices.vertex[slot] = VPL( hit.triId, cugar::Vector2f(hit.u, hit.v), 0.0f );
508  context.light_vertices.vertex_gbuffer[slot] = pack_bsdf(lv.material);
509  context.light_vertices.vertex_pos[slot] = cugar::Vector4f(lv.geom.position, cugar::binary_cast<float>(packed_normal));
510  context.light_vertices.vertex_input[slot] = make_uint2(packed_direction, cugar::to_rgbe(w.xyz()));
511  context.light_vertices.vertex_weights[slot] = PathWeights(
512  lv.pGp_sum, // f(i-2)g(i-2)f(i-1)
513  lv.prev_pG); // f(i-1)g(i-1)
514 
515  context.light_vertices.vertex_path_id[slot] = light_path_id | ((context.in_bounce + 1) << 24);
516 
517  if (VertexOrdering(config.light_ordering) == VertexOrdering::kPathOrdering)
518  {
519  // keep track of how many vertices this path has
520  context.light_vertices.vertex_counts[light_path_id]++;
521  }
522  }
523  }
524  else
525  {
526  // hit the environment - nothing to do
527  }
528 }
529 
540 template <typename TPrimaryCoordinates, typename TBPTContext, typename TBPTConfig>
541 FERMAT_HOST_DEVICE
543  const uint32 idx,
544  const uint32 n_eye_paths,
545  const uint32 n_light_paths,
546  const TPrimaryCoordinates& primary_coords,
547  TBPTContext& context,
548  RenderingContextView& renderer,
549  TBPTConfig& config)
550 {
551  const cugar::Vector2f uv(
552  primary_coords.sample(idx, 1, 0),
553  primary_coords.sample(idx, 1, 1));
554 
555  const cugar::Vector2f d = uv * 2.f - cugar::Vector2f(1.f);
556 
557  EyeVertex ev;
558 
559  config.visit_eye_vertex(
560  idx,
561  0u,
562  VertexGeometryId(0, uv), // store uv's in vertex 0 (even if one day these coordinates should be dedicated to lens uv's...)
563  ev,
564  context,
565  renderer );
566 
567  // write the pixel index
568  context.in_queue.pixels[idx] = idx;
569 
570  cugar::Vector3f ray_origin = renderer.camera.eye;
571  cugar::Vector3f ray_direction = d.x*context.camera_U + d.y*context.camera_V + context.camera_W;
572 
573  ((float4*)context.in_queue.rays)[2 * idx + 0] = make_float4(ray_origin.x, ray_origin.y, ray_origin.z, 0.0f); // origin, tmin
574  ((float4*)context.in_queue.rays)[2 * idx + 1] = make_float4(ray_direction.x, ray_direction.y, ray_direction.z, 1e34f); // dir, tmax
575 
576  // write the filter weight
577  context.in_queue.weights[idx] = cugar::Vector4f(1.0f, 1.0f, 1.0f, 1.0f);
578 
579  const float p_e = camera_direction_pdf(context.camera_U, context.camera_V, context.camera_W, context.camera_W_len, context.camera_square_focal_length, cugar::normalize(ray_direction), true);
580  const float cos_theta = dot(cugar::normalize(ray_direction), context.camera_W) / context.camera_W_len;
581 
582  // write the path weights
583  context.in_queue.path_weights[idx] = TempPathWeights::eye_vertex_1( p_e, cos_theta, config.light_tracing );
584 
585  if (idx == 0)
586  *context.in_queue.size = n_eye_paths;
587 }
588 
614 template <typename TSampleSink, typename TPrimaryCoordinates, typename TBPTContext, typename TBPTConfig>
615 FERMAT_HOST_DEVICE
617  const uint32 queue_idx,
618  const uint32 n_eye_paths,
619  const uint32 n_light_paths,
620  TSampleSink& sample_sink,
621  const TPrimaryCoordinates& primary_coords,
622  TBPTContext& context,
623  RenderingContextView& renderer,
624  TBPTConfig& config)
625 {
626  const PixelInfo pixel_info = context.in_queue.pixels[queue_idx];
627  const Ray ray = context.in_queue.rays[queue_idx];
628  const Hit hit = context.in_queue.hits[queue_idx];
629  const cugar::Vector4f w = context.in_queue.weights[queue_idx];
630  const TempPathWeights path_weights = context.in_queue.path_weights[queue_idx];
631 
632  const uint32 eye_path_id = pixel_info.pixel;
633 
634  //bool sinked_path = false;
635 
636  if (hit.t > 0.0f && hit.triId >= 0)
637  {
638  // setup the eye vertex
639  EyeVertex ev;
640  ev.setup(ray, hit, w.xyz(), path_weights, context.in_bounce, renderer);
641 
642  // store the compact vertex information
643  config.visit_eye_vertex(
644  eye_path_id,
645  context.in_bounce + 1,
646  VertexGeometryId(hit.triId, cugar::Vector2f(hit.u, hit.v)),
647  ev,
648  context,
649  renderer);
650 
651  bool absorbed = true;
652 
653  // trace a bounce ray
654  if (config.terminate_eye_subpath(eye_path_id, context.in_bounce + 2) == false)
655  {
656  // fetch the sampling dimensions
657  float z[3];
658  for (uint32 i = 0; i < 3; ++i)
659  z[i] = primary_coords.sample(pixel_info.pixel, context.in_bounce + 2, i);
660 
661  // sample a scattering event
662  cugar::Vector3f out(0.0f);
663  cugar::Vector3f out_w(0.0f);
664  float p(0.0f);
665  float p_proj(0.0f);
666  Bsdf::ComponentType out_comp(Bsdf::kAbsorption);
667 
668  scatter(ev, z, out_comp, out, p, p_proj, out_w, config.use_rr, true, BPT_FULL_BSDF_EVALUATION);
669 
670  if (cugar::max_comp(out_w) > 0.0f)
671  {
672  // record an eye scattering event
673  sample_sink.sink_eye_scattering_event(
674  out_comp,
675  cugar::Vector4f(out_w, w.w),
676  pixel_info.pixel,
677  context.in_bounce + 2,
678  context,
679  renderer);
680 
681  // enqueue the output ray
682  Ray out_ray;
683  out_ray.origin = ev.geom.position;
684  out_ray.dir = out;
685  out_ray.tmin = 1.0e-4f;
686  out_ray.tmax = 1.0e8f;
687 
688  const float out_p = p;
689 
690  const PixelInfo out_pixel = context.in_bounce ?
691  pixel_info : // if this sample is a secondary bounce, use the previously selected channel
692  PixelInfo(pixel_info.pixel, channel_selector(out_comp)); // otherwise (i.e. this is the first bounce) choose the output channel for the rest of the path
693 
694  context.scatter_queue.warp_append(
695  out_pixel, out_ray,
696  cugar::Vector4f(out_w, w.w),
697  out_p,
698  TempPathWeights( ev, out, p_proj ));
699 
700  //sinked_path = true;
701  absorbed = false;
702  }
703  }
704 
705  // compute the maximum depth a light vertex might have
706  const int32 max_path_verts = config.max_path_length + 1;
707  const int32 max_s = max_path_verts - ev.depth - 2;
708  const int32 max_light_depth = max_s - 1;
709 
710  // perform a bidirectional connection
711  if (max_light_depth >= 0 &&
712  config.perform_connection(eye_path_id, context.in_bounce + 2, absorbed))
713  {
714  const bool single_connection =
715  VertexOrdering(config.light_ordering) == VertexOrdering::kRandomOrdering ||
716  VertexSampling(config.light_sampling) == VertexSampling::kEnd;
717 
718  if (single_connection)
719  {
720  uint32 light_idx;
721  float light_weight;
722 
723  if (VertexOrdering(config.light_ordering) == VertexOrdering::kRandomOrdering)
724  {
725  // fetch the sampling dimensions
726  float z[3];
727  for (uint32 i = 0; i < 3; ++i)
728  z[i] = primary_coords.sample(pixel_info.pixel, context.in_bounce + 2, 3 + i);
729 
730  const uint32 n_light_vertex_paths = context.light_vertices.vertex_counts[0];
731  const uint32 n_light_vertices = context.light_vertices.vertex_counter[0];
732 
733  //
734  // The theory: we want to accumulate all VPLs at each depth, weighted by 1/#(light_vertex_paths);
735  // an alternative estimator is to pick one at each depth, and weight its contribution by w_depth = #photon(depth)/#light_vertex_paths.
736  // The practice: we pick 1 VPL out of all of them, with a probability of picking it at a given depth of p_depth = #photon(depth)/#photon(total);
737  // hence, we need to reweight this by w_depth / p_depth =
738  // #photon(depth) / #light_vertex_paths / (#photon(depth)/#photon(total)) =
739  // #photon(depth) / #light_vertex_paths * #photon(total) / #photon(depth) =
740  // #photon(total) / #light_vertex_paths;
741 
742  // select a VPL with z[2]
743  light_idx = cugar::quantize(z[2], n_light_vertices);
744  light_weight = float(n_light_vertices) / float(n_light_vertex_paths);
745  }
746  else
747  {
748  light_idx = eye_path_id;
749  light_weight = 1.0f;
750  }
751 
752  // setup a light vertex
753  cugar::Vector4f light_pos = context.light_vertices.vertex_pos[light_idx];
754  const uint2 light_in = context.light_vertices.vertex_input[light_idx];
755  uint4 light_gbuffer = context.light_vertices.vertex_gbuffer[light_idx];
756  PathWeights light_weights = context.light_vertices.vertex_weights[light_idx];
757  const uint32 light_vertex_id = context.light_vertices.vertex_path_id[light_idx];
758  const uint32 light_path_id = light_vertex_id & 0xFFFFFF;
759  const uint32 light_depth = light_vertex_id >> 24;
760 
761  // make sure the light vertex is valid
762  if (light_vertex_id != uint32(-1))
763  {
764  // setup the light vertex
765  LightVertex lv;
766  lv.setup(light_pos, light_in, light_gbuffer, light_weights, light_depth, renderer);
767 
768  // evaluate the connection
769  cugar::Vector3f out;
770  cugar::Vector3f out_w;
771  float d;
772 
773  eval_connection(ev, lv, out, out_w, d, config.use_rr, config.direct_lighting_nee, config.direct_lighting_bsdf);
774 
775  // multiply by the light vertex weight
776  out_w *= light_weight;
777 
778  if (cugar::max_comp(out_w) > 0.0f && cugar::is_finite(out_w))
779  {
780  // recompute d for intersection calculations
781  if (SHADOW_BIAS) d = cugar::length(lv.geom.position - (ev.geom.position + ev.in * SHADOW_BIAS));
782 
783  // enqueue the output ray
784  Ray out_ray;
785  //out_ray.origin = ev.geom.position + ev.in * SHADOW_BIAS; // move the origin slightly towards the viewer
786  //out_ray.dir = out;
787  //out_ray.tmin = SHADOW_TMIN;
788  //out_ray.tmax = d * 0.9999f;
789  out_ray.origin = ev.geom.position + ev.in * SHADOW_BIAS; // shift back in space along the viewing direction
790  out_ray.dir = (lv.geom.position - out_ray.origin); //out;
791  out_ray.tmin = SHADOW_TMIN;
792  out_ray.tmax = 0.9999f;
793 
794  const PixelInfo out_pixel = context.in_bounce ?
795  pixel_info : // if this sample is a secondary bounce, use the previously selected channel
796  PixelInfo(pixel_info.pixel, FBufferDesc::DIRECT_C); // otherwise (i.e. this is the first bounce) choose the direct-lighting output channel
797 
798  const uint32 slot = context.shadow_queue.warp_append_slot();
799 
800  context.shadow_queue.pixels[slot] = out_pixel.packed;
801  context.shadow_queue.rays[slot] = out_ray;
802  context.shadow_queue.weights[slot] = cugar::Vector4f(out_w, w.w);
803  context.shadow_queue.light_path_id[slot] = light_path_id | ((light_depth + 1) << 24) | ((ev.depth + 2) << 28); // NOTE: light_depth + 1 represents the technique 's, i.e. the number of light subpath vertices
804 
805  //sinked_path = true;
806  }
807  }
808  }
809  else
810  {
811  const uint32 eye_to_light_paths = n_eye_paths / n_light_paths;
812 
813  const uint32 light_path_id =
814  (n_light_paths == n_eye_paths) ? pixel_info.pixel : // use the same light subpath index as this eye subpath
815  pixel_info.pixel / eye_to_light_paths; // pick one at random
816 
817  // compute the maximum depth a light vertex might have
818  const int32 n_light_vertices = context.light_vertices.vertex_counts[light_path_id];
819 
820  // perform a bidirectional connection for each light vertex
821  for (uint32 light_depth = config.direct_lighting_nee ? 0 : 1;
822  light_depth < cugar::min(n_light_vertices, max_light_depth + 1);
823  light_depth++)
824  {
825  const float light_weight = 1.0f;
826 
827  const uint32 light_idx = light_path_id + light_depth * n_light_paths;
828 
829  // setup a light vertex
830  cugar::Vector4f light_pos = context.light_vertices.vertex_pos[light_idx];
831  const uint2 light_in = context.light_vertices.vertex_input[light_idx];
832  uint4 light_gbuffer = context.light_vertices.vertex_gbuffer[light_idx];
833  PathWeights light_weights = context.light_vertices.vertex_weights[light_idx];
834 
835  // setup the light vertex
836  LightVertex lv;
837  lv.setup(light_pos, light_in, light_gbuffer, light_weights, light_depth, renderer);
838 
839  // evaluate the connection
840  cugar::Vector3f out;
841  cugar::Vector3f out_w;
842  float d;
843 
844  eval_connection(ev, lv, out, out_w, d, config.use_rr, config.direct_lighting_nee, config.direct_lighting_bsdf);
845 
846  // multiply by the light vertex weight
847  out_w *= light_weight;
848 
849  if (cugar::max_comp(out_w) > 0.0f && cugar::is_finite(out_w))
850  {
851  // recompute d for intersection calculations
852  if (SHADOW_BIAS) d = cugar::length(lv.geom.position - (ev.geom.position + ev.in * SHADOW_BIAS));
853 
854  // enqueue the output ray
855  Ray out_ray;
856  //out_ray.origin = ev.geom.position + ev.in * SHADOW_BIAS; // move the origin slightly towards the viewer
857  //out_ray.dir = out;
858  //out_ray.tmin = SHADOW_TMIN;
859  //out_ray.tmax = d * 0.9999f;
860  out_ray.origin = ev.geom.position + ev.in * SHADOW_BIAS; // shift back in space along the viewing direction
861  out_ray.dir = (lv.geom.position - out_ray.origin); //out;
862  out_ray.tmin = SHADOW_TMIN;
863  out_ray.tmax = 0.9999f;
864 
865  const PixelInfo out_pixel = context.in_bounce ?
866  pixel_info : // if this sample is a secondary bounce, use the previously selected channel
867  PixelInfo(pixel_info.pixel, FBufferDesc::DIRECT_C); // otherwise (i.e. this is the first bounce) choose the direct-lighting output channel
868 
869  const uint32 slot = context.shadow_queue.warp_append_slot();
870 
871  context.shadow_queue.pixels[slot] = out_pixel.packed;
872  context.shadow_queue.rays[slot] = out_ray;
873  context.shadow_queue.weights[slot] = cugar::Vector4f(out_w, w.w);
874  context.shadow_queue.light_path_id[slot] = light_path_id | ((light_depth + 1) << 24) | ((ev.depth + 2) << 28); // NOTE: light_depth + 1 represents the technique 's, i.e. the number of light subpath vertices
875 
876  //sinked_path = true;
877  }
878  }
879  }
880  }
881 
882  // accumulate the emissive component along the incoming direction
883  if (config.accumulate_emissive(eye_path_id, context.in_bounce + 2, absorbed))
884  {
885  cugar::Vector3f out_w = eval_incoming_emission(ev, renderer, config.direct_lighting_nee, config.indirect_lighting_nee, config.use_vpls);
886 
887  if (cugar::max_comp(out_w) > 0.0f && cugar::is_finite(out_w))
888  {
889  sample_sink.sink(
890  pixel_info.channel,
891  cugar::Vector4f(out_w, w.w),
892  0,
893  pixel_info.pixel,
894  0,
895  context.in_bounce + 2,
896  context,
897  renderer);
898 
899  //sinked_path = true;
900  }
901  }
902  }
903  else
904  {
905  // hit the environment - perform sky lighting
906  }
907 
908  //if (sinked_path == false)
909  // sample_sink.null_path(eye_path_id, context, renderer);
910 }
911 
922 template <typename TBPTContext, typename TBPTConfig>
923 FERMAT_HOST_DEVICE
924 void connect_to_camera(const uint32 light_idx, const uint32 n_light_paths, TBPTContext& context, RenderingContextView& renderer, const TBPTConfig& config)
925 {
926  // pure light tracing: connect with a light vertex chosen at random
927  {
928  const uint32 light_depth = context.light_vertices.vertex_path_id[ light_idx ] >> 24;
929  const float light_weight = 1.0f / float(n_light_paths);
930 
931  const uint2 light_in = context.light_vertices.vertex_input[light_idx];
932 
933  //VertexGeometryId light_vertex = context.light_vertices.vertex[light_idx];
934  VertexGeometry light_vertex_geom;
935  cugar::Vector4f light_pos = context.light_vertices.vertex_pos[light_idx];
936  uint4 light_gbuffer = context.light_vertices.vertex_gbuffer[light_idx];
937  cugar::Vector3f light_in_dir = unpack_direction(light_in.x);
938  cugar::Vector3f light_in_alpha = cugar::from_rgbe(light_in.y);
939  PathWeights light_weights = context.light_vertices.vertex_weights[light_idx];
940  //float light_pdf = cugar::binary_cast<float>(light_gbuffer.w);
941 
942  #if 0
943  // evaluate the differential geometry at the light vertex (TODO: replace using pre-encoded position and normal (and later on tangents?))
944  VertexGeometryId light_vertex = context.light_vertices.vertex[light_idx];
945  setup_differential_geometry(renderer.mesh, light_vertex.prim_id, light_vertex.uv.x, light_vertex.uv.y, &light_vertex_geom);
946  #else
947  light_vertex_geom.position = light_pos.xyz();
948  light_vertex_geom.normal_s = unpack_direction(cugar::binary_cast<uint32>(light_pos.w));
949  light_vertex_geom.normal_g = light_vertex_geom.normal_s;
950  light_vertex_geom.tangent = cugar::orthogonal(light_vertex_geom.normal_s);
951  light_vertex_geom.binormal = cugar::cross(light_vertex_geom.normal_s, light_vertex_geom.tangent);
952  #endif
953 
954  // start evaluating the geometric term
955  const float d2 = fmaxf(1.0e-8f, cugar::square_length(light_vertex_geom.position - cugar::Vector3f(renderer.camera.eye)));
956  const float d = sqrtf(d2);
957 
958  // join the light sample with the current vertex
959  const cugar::Vector3f out = (light_vertex_geom.position - cugar::Vector3f(renderer.camera.eye)) / d;
960 
961  // evaluate the geometric term
962  const float cos_theta = cugar::dot(out, context.camera_W) / context.camera_W_len;
963  const float G = fabsf(cos_theta * cugar::dot(out, light_vertex_geom.normal_s)) / d2;
964 
965  // evaluate the camera BSDF
966  float out_x;
967  float out_y;
968 
969  const float p_s = camera_direction_pdf(context.camera_U, context.camera_V, context.camera_W, context.camera_W_len, context.camera_square_focal_length, out, &out_x, &out_y);
970  const float f_s = p_s * float(renderer.res_x * renderer.res_y);
971 
972  if (f_s)
973  {
974  cugar::Vector4f out_w;
975 
976  if (light_depth == 0) // this is a primary VPL / light vertex
977  {
978  if (0) // visible lights (a very silly strategy)
979  {
980  // build the local BSDF (EDF)
981  //Edf light_bsdf(light_material);
982  Edf light_bsdf(cugar::from_rgbe(light_gbuffer.x));
983 
984  // evaluate the light's EDF and the surface BSDF
985  const cugar::Vector3f f_L = light_bsdf.f(light_vertex_geom, light_vertex_geom.position, -out);
986  const float p_L = light_bsdf.p(light_vertex_geom, light_vertex_geom.position, -out, cugar::kProjectedSolidAngle);
987 
988  const float pGp = pdf_product( p_s, G, p_L );
989  const float next_pGp = pdf_product( p_L, light_weights.pG );
990  const float mis_w =
991  (config.visible_lights == 0) ? 1.0f :
992  bpt_mis(pGp / (/*n_light_paths * */config.light_tracing), next_pGp, light_weights.pGp_sum);
993 
994  // calculate the cumulative sample weight, equal to f_L * f_s * G / p
995  out_w = cugar::Vector4f(light_in_alpha * f_L * f_s * G * mis_w, 1.0f) * light_weight;
996  }
997  else
998  out_w = cugar::Vector4f(0.0f);
999  }
1000  else
1001  {
1002  // build the local BSDF
1003  Bsdf light_bsdf = unpack_bsdf(renderer, light_gbuffer );
1004 
1005  // evaluate the light's EDF and the surface BSDF
1006  const cugar::Vector3f f_L = light_bsdf.f(light_vertex_geom, light_in_dir, -out);
1007  const float p_L = light_bsdf.p(light_vertex_geom, light_in_dir, -out, cugar::kProjectedSolidAngle);
1008 
1009  const float pGp = pdf_product( p_s, G, p_L );
1010  const float next_pGp = pdf_product( cugar::max_comp(f_L), light_weights.pG );
1011  const float mis_w =
1012  (light_depth == 1 &&
1013  config.direct_lighting_nee == 0 &&
1014  config.direct_lighting_bsdf == 0) ? 1.0f :
1015  (light_depth > 1 &&
1016  config.indirect_lighting_nee == 0 &&
1017  config.indirect_lighting_bsdf == 0) ? 1.0f :
1018  bpt_mis(pGp / (/*n_light_paths * */config.light_tracing), next_pGp, light_weights.pGp_sum);
1019 
1020  // calculate the cumulative sample weight, equal to f_L * f_s * G / p
1021  out_w = cugar::Vector4f(light_in_alpha * f_L * f_s * G * mis_w, 1.0f) * light_weight;
1022  }
1023 
1024  if (cugar::max_comp(out_w.xyz()) > 0.0f && cugar::is_finite(out_w.xyz()))
1025  {
1026  // enqueue the output ray
1027  Ray out_ray;
1028  #if 0
1029  out_ray.origin = renderer.camera.eye;
1030  out_ray.dir = out;
1031  out_ray.tmin = SHADOW_TMIN;
1032  out_ray.tmax = d * 0.9999f;
1033  #else
1034  out_ray.origin = light_vertex_geom.position + light_in_dir * SHADOW_BIAS;
1035  out_ray.dir = renderer.camera.eye - out_ray.origin;
1036  out_ray.tmin = SHADOW_TMIN;
1037  out_ray.tmax = 0.9999f;
1038  #endif
1039 
1040  // compute the pixel index
1041  const PixelInfo out_pixel = PixelInfo(
1042  cugar::quantize(out_x*0.5f + 0.5f, renderer.res_x) +
1043  cugar::quantize(out_y*0.5f + 0.5f, renderer.res_y) * renderer.res_x,
1044  FBufferDesc::DIRECT_C);
1045 
1046  context.shadow_queue.warp_append(out_pixel, out_ray, out_w, 1.0f);
1047  }
1048  }
1049  }
1050 }
1051 
1062 template <typename TSampleSink, typename TBPTContext>
1063 FERMAT_HOST_DEVICE
1064 void solve_occlusion(const uint32 queue_idx, TSampleSink& sample_sink, TBPTContext& context, RenderingContextView& renderer)
1065 {
1066  const PixelInfo pixel_info = context.shadow_queue.pixels[queue_idx];
1067  const Hit hit = context.shadow_queue.hits[queue_idx];
1068  const cugar::Vector4f w = context.shadow_queue.weights[queue_idx];
1069  const uint32 light_path_id = context.shadow_queue.light_path_id[queue_idx];
1070 
1071  // TODO: break this up in separate diffuse and specular components
1072  const float vis = (hit.t < 0.0f) ? 1.0f : 0.0f;
1073 
1074  const uint32 s = (light_path_id >> 24) & 0xF;
1075  const uint32 t = (light_path_id >> 28) & 0xF;
1076 
1077  sample_sink.sink(pixel_info.channel, w * vis, light_path_id & 0xFFFFFF, pixel_info.pixel, s, t, context, renderer);
1078 }
1079 
1081 
1082 template <typename TBPTContext, typename TBPTConfig>
1083 __global__
1084 void light_tracing_kernel(const uint32 n_light_paths, TBPTContext context, RenderingContextView renderer, TBPTConfig config)
1085 {
1086  const uint32 thread_id = threadIdx.x + blockIdx.x * blockDim.x;
1087 
1088  if (VertexOrdering(config.light_ordering) == VertexOrdering::kRandomOrdering)
1089  {
1090  // compute the maximum depth a VPL/photon light might have
1091  const int32 max_light_depth = config.max_path_length - 1;
1092 
1093  const uint32 n_light_vertices = context.light_vertices.vertex_counts[max_light_depth];
1094 
1095  const uint32 light_idx = thread_id;;
1096 
1097  if (light_idx < n_light_vertices)
1098  connect_to_camera(light_idx, n_light_paths, context, renderer, config);
1099  }
1100  else
1101  {
1102  const uint32 light_path_id = thread_id;
1103 
1104  if (light_path_id < n_light_paths)
1105  {
1106  const uint32 vertex_count = context.light_vertices.vertex_counts[light_path_id];
1107  for (uint32 i = 0; i < vertex_count; ++i)
1108  connect_to_camera(light_path_id + i * n_light_paths, n_light_paths, context, renderer, config);
1109  }
1110  }
1111 }
1112 
1113 template <typename TBPTContext, typename TBPTConfig>
1114 void light_tracing(const uint32 n_light_paths, TBPTContext& context, RenderingContextView& renderer, TBPTConfig& config)
1115 {
1116  uint32 n_threads;
1117 
1118  if (VertexOrdering(config.light_ordering) == VertexOrdering::kRandomOrdering)
1119  {
1120  // compute the maximum depth a VPL/photon light might have
1121  const int32 max_light_depth = config.max_path_length - 1;
1122 
1123  cudaMemcpy(&n_threads, &context.light_vertices.vertex_counts[max_light_depth], sizeof(uint32), cudaMemcpyDeviceToHost);
1124  }
1125  else
1126  n_threads = n_light_paths;
1127 
1128  // do not execute if nothing to do
1129  if (n_threads)
1130  {
1131  const uint32 blockSize(128);
1132  const dim3 gridSize(cugar::divide_ri(n_threads, blockSize));
1133 
1134  light_tracing_kernel << < gridSize, blockSize >> > (n_light_paths, context, renderer, config);
1135  }
1136 }
1137 
1138 template <typename TSampleSink, typename TBPTContext>
1139 __global__
1140 void solve_occlusions_kernel(const uint32 in_queue_size, TSampleSink sample_sink, TBPTContext context, RenderingContextView renderer)
1141 {
1142  const uint32 thread_id = threadIdx.x + blockIdx.x * blockDim.x;
1143 
1144  if (thread_id < in_queue_size) // *context.shadow_queue.size
1145  solve_occlusion(thread_id, sample_sink, context, renderer);
1146 }
1147 
1148 template <typename TSampleSink, typename TBPTContext>
1149 void solve_occlusions(const uint32 in_queue_size, TSampleSink sample_sink, TBPTContext context, RenderingContextView renderer)
1150 {
1151  // bail-out if nothing to do
1152  if (in_queue_size == 0)
1153  return;
1154 
1155  const uint32 blockSize(128);
1156  const dim3 gridSize(cugar::divide_ri(in_queue_size, blockSize));
1157  solve_occlusions_kernel << < gridSize, blockSize >> > (in_queue_size, sample_sink, context, renderer);
1158 }
1159 
1160 template <typename TPrimaryCoordinates, typename TBPTContext, typename TBPTConfig>
1161 __global__
1162 void generate_primary_light_vertices_kernel(const uint32 n_light_paths, TPrimaryCoordinates primary_coords, TBPTContext context, RenderingContextView renderer, const TBPTConfig config)
1163 {
1164  const uint32 light_path_id = threadIdx.x + blockIdx.x * blockDim.x;
1165 
1166  if (light_path_id < n_light_paths)
1167  generate_primary_light_vertex(light_path_id, n_light_paths, primary_coords, context, renderer, config);
1168 }
1169 
1170 template <typename TPrimaryCoordinates, typename TBPTContext, typename TBPTConfig>
1171 void generate_primary_light_vertices(const uint32 n_light_paths, TPrimaryCoordinates primary_coords, TBPTContext context, RenderingContextView renderer, const TBPTConfig config)
1172 {
1173  // do not execute the kernel if nothing to do
1174  if (n_light_paths)
1175  {
1176  const uint32 blockSize(128);
1177  const dim3 gridSize(cugar::divide_ri(n_light_paths, blockSize));
1178  generate_primary_light_vertices_kernel << < gridSize, blockSize >> > (n_light_paths, primary_coords, context, renderer, config);
1179  }
1180 
1181  // update the per-level cumulative vertex counts
1182  if (VertexOrdering(config.light_ordering) == VertexOrdering::kRandomOrdering)
1183  cudaMemcpy(context.light_vertices.vertex_counts, context.light_vertices.vertex_counter, sizeof(uint32), cudaMemcpyDeviceToDevice);
1184 }
1185 
1186 
1187 template <typename TPrimaryCoordinates, typename TBPTContext, typename TBPTConfig>
1188 __global__
1189 __launch_bounds__(SECONDARY_LIGHT_VERTICES_BLOCKSIZE, SECONDARY_LIGHT_VERTICES_CTA_BLOCKS)
1190 void process_secondary_light_vertices_kernel(const uint32 in_queue_size, const uint32 n_light_paths, TPrimaryCoordinates primary_coords, TBPTContext context, RenderingContextView renderer, const TBPTConfig config)
1191 {
1192  const uint32 thread_id = threadIdx.x + blockIdx.x * blockDim.x;
1193 
1194  if (thread_id < in_queue_size) // *context.in_queue.size
1195  process_secondary_light_vertex(thread_id, n_light_paths, primary_coords, context, renderer, config);
1196 }
1197 
1198 template <typename TPrimaryCoordinates, typename TBPTContext, typename TBPTConfig>
1199 void process_secondary_light_vertices(const uint32 in_queue_size, const uint32 n_light_paths, TPrimaryCoordinates primary_coords, TBPTContext context, RenderingContextView renderer, const TBPTConfig config)
1200 {
1201  // do not execute the kernel if nothing to do
1202  if (in_queue_size)
1203  {
1204  const uint32 blockSize(SECONDARY_LIGHT_VERTICES_BLOCKSIZE);
1205  const dim3 gridSize(cugar::divide_ri(in_queue_size, blockSize));
1206  process_secondary_light_vertices_kernel << < gridSize, blockSize >> > (in_queue_size, n_light_paths, primary_coords, context, renderer, config);
1207  }
1208 
1209  // update the per-level cumulative vertex counts
1210  if (VertexOrdering(config.light_ordering) == VertexOrdering::kRandomOrdering)
1211  cudaMemcpy(context.light_vertices.vertex_counts + context.in_bounce + 1, context.light_vertices.vertex_counter, sizeof(uint32), cudaMemcpyDeviceToDevice);
1212 }
1213 
1214 
1215 template <typename TPrimaryCoordinates, typename TBPTContext, typename TBPTConfig>
1216 __global__
1217 void generate_primary_eye_vertices_kernel(const uint32 n_eye_paths, const uint32 n_light_paths, TPrimaryCoordinates primary_coords, TBPTContext context, RenderingContextView renderer, const TBPTConfig config)
1218 {
1219  const uint32 eye_path_id = threadIdx.x + blockIdx.x * blockDim.x;
1220 
1221  if (eye_path_id < n_eye_paths)
1222  generate_primary_eye_vertex(eye_path_id, n_eye_paths, n_light_paths, primary_coords, context, renderer, config);
1223 }
1224 
1225 template <typename TPrimaryCoordinates, typename TBPTConfig, typename TBPTContext>
1226 void generate_primary_eye_vertices(const uint32 n_eye_paths, const uint32 n_light_paths, TPrimaryCoordinates primary_coords, TBPTContext context, RenderingContextView renderer, const TBPTConfig config)
1227 {
1228  const uint32 blockSize(128);
1229  const dim3 gridSize(cugar::divide_ri(n_eye_paths, blockSize));
1230  generate_primary_eye_vertices_kernel << < gridSize, blockSize >> > (n_eye_paths, n_light_paths, primary_coords, context, renderer, config);
1231 }
1232 
1233 template <typename TSampleSink, typename TPrimaryCoordinates, typename TBPTContext, typename TBPTConfig>
1234 __global__
1235 __launch_bounds__(SECONDARY_EYE_VERTICES_BLOCKSIZE, SECONDARY_EYE_VERTICES_CTA_BLOCKS)
1236 void process_secondary_eye_vertices_kernel(const uint32 in_queue_size, const uint32 n_eye_paths, const uint32 n_light_paths, TSampleSink sink, TPrimaryCoordinates primary_coords, TBPTContext context, RenderingContextView renderer, const TBPTConfig config)
1237 {
1238  const uint32 thread_id = threadIdx.x + blockIdx.x * blockDim.x;
1239 
1240  if (thread_id < in_queue_size) // *context.in_queue.size
1241  process_secondary_eye_vertex(thread_id, n_eye_paths, n_light_paths, sink, primary_coords, context, renderer, config);
1242 }
1243 
1244 template <typename TSampleSink, typename TPrimaryCoordinates, typename TBPTContext, typename TBPTConfig>
1245 void process_secondary_eye_vertices(const uint32 in_queue_size, const uint32 n_eye_paths, const uint32 n_light_paths, TSampleSink sink, TPrimaryCoordinates primary_coords, TBPTContext context, RenderingContextView renderer, const TBPTConfig config)
1246 {
1247  const uint32 blockSize(SECONDARY_EYE_VERTICES_BLOCKSIZE);
1248  const dim3 gridSize(cugar::divide_ri(in_queue_size, blockSize));
1249  process_secondary_eye_vertices_kernel << < gridSize, blockSize >> > (in_queue_size, n_eye_paths, n_light_paths, sink, primary_coords, context, renderer, config);
1250 }
1251 
1254 
1255 } // namespace bpt
FERMAT_HOST_DEVICE FERMAT_FORCEINLINE void visit_eye_vertex(const uint32 eye_path_id, const uint32 depth, const VertexGeometryId v_id, const EyeVertex &v, TBPTContext &context, RenderingContextView &renderer) const
Definition: bpt_kernels.h:200
CUGAR_HOST_DEVICE uint32 quantize(const float x, const uint32 n)
Definition: numbers.h:600
FERMAT_HOST_DEVICE FERMAT_FORCEINLINE bool accumulate_emissive(const uint32 eye_path_id, const uint32 t, const bool absorbed) const
Definition: bpt_kernels.h:176
__device__ __forceinline__ unsigned int warp_increment(unsigned int *ptr)
Definition: warp_atomics.h:56
FERMAT_HOST_DEVICE void generate_primary_eye_vertex(const uint32 idx, const uint32 n_eye_paths, const uint32 n_light_paths, const TPrimaryCoordinates &primary_coords, TBPTContext &context, RenderingContextView &renderer, TBPTConfig &config)
Definition: bpt_kernels.h:542
Definition: bpt_utils.h:110
FERMAT_HOST_DEVICE FERMAT_FORCEINLINE uint4 pack_bsdf(const MeshMaterial &material)
Definition: bpt_utils.h:215
FERMAT_HOST_DEVICE FERMAT_FORCEINLINE bool terminate_light_subpath(const uint32 path_id, const uint32 s) const
Definition: bpt_kernels.h:138
FERMAT_HOST_DEVICE FERMAT_FORCEINLINE void visit_light_vertex(const uint32 light_path_id, const uint32 depth, const VertexGeometryId v_id, TBPTContext &context, RenderingContextView &renderer) const
Definition: bpt_kernels.h:188
Definition: vertex.h:105
FERMAT_HOST_DEVICE cugar::Vector3f eval_incoming_emission(const EyeVertex &ev, const RenderingContextView &renderer, bool direct_lighting_nee, bool indirect_lighting_nee, bool use_vpls)
Definition: bpt_utils.h:1034
FERMAT_HOST_DEVICE void process_secondary_light_vertex(const uint32 queue_idx, const uint32 n_light_paths, const TPrimaryCoordinates &primary_coords, TBPTContext &context, RenderingContextView &renderer, TBPTConfig &config)
Definition: bpt_kernels.h:421
FERMAT_HOST_DEVICE FERMAT_FORCEINLINE bool store_light_vertex(const uint32 path_id, const uint32 s, const bool absorbed) const
Definition: bpt_kernels.h:152
FERMAT_HOST_DEVICE void sink_eye_scattering_event(const Bsdf::ComponentType component, const cugar::Vector4f value, const uint32 eye_path_id, const uint32 t, TBPTContext &context, RenderingContextView &renderer)
Definition: bpt_kernels.h:239
__global__ __launch_bounds__(SHADE_HITS_BLOCKSIZE, SHADE_HITS_CTA_BLOCKS) void shade_hits_kernel(const uint32 in_queue_size
[SampleSinkBaseBlock]
Definition: bpt_control.h:287
Definition: lights.h:59
FERMAT_HOST_DEVICE void map(const uint32_t prim_id, const cugar::Vector2f &uv, VertexGeometry *geom, float *pdf, Edf *edf) const
Definition: lights.h:584
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE float atomic_add(float *value, const float op)
Definition: atomics.h:100
FERMAT_HOST_DEVICE void process_secondary_eye_vertex(const uint32 queue_idx, const uint32 n_eye_paths, const uint32 n_light_paths, TSampleSink &sample_sink, const TPrimaryCoordinates &primary_coords, TBPTContext &context, RenderingContextView &renderer, TBPTConfig &config)
Definition: bpt_kernels.h:616
ComponentType
Definition: bsdf.h:139
FERMAT_HOST_DEVICE void sink(const uint32 channel, const cugar::Vector4f value, const uint32 light_path_id, const uint32 eye_path_id, const uint32 s, const uint32 t, TBPTContext &context, RenderingContextView &renderer)
Definition: bpt_kernels.h:223
Definition: vertex.h:92
CUGAR_HOST_DEVICE L divide_ri(const L x, const R y)
Definition: numbers.h:180
FERMAT_HOST_DEVICE FERMAT_FORCEINLINE Bsdf unpack_bsdf(const RenderingContextView &renderer, const uint4 packed_info, const TransportType transport=kParticleTransport)
Definition: bpt_utils.h:240
FERMAT_HOST_DEVICE void connect_to_camera(const uint32 light_idx, const uint32 n_light_paths, TBPTContext &context, RenderingContextView &renderer, const TBPTConfig &config)
Definition: bpt_kernels.h:924
FERMAT_HOST_DEVICE void setup_differential_geometry(const MeshView &mesh, const uint32 tri_id, const float u, const float v, VertexGeometry *geom, float *pdf=0)
Definition: mesh_utils.h:185
Definition: bpt_utils.h:131
FERMAT_HOST_DEVICE void solve_occlusion(const uint32 queue_idx, TSampleSink &sample_sink, TBPTContext &context, RenderingContextView &renderer)
Definition: bpt_kernels.h:1064
FERMAT_HOST_DEVICE FERMAT_FORCEINLINE uint32 pack_direction(const cugar::Vector3f &dir)
Definition: vertex.h:123
Definition: bpt_utils.h:583
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE Vector3f f(const DifferentialGeometry &geometry, const Vector3f in, const Vector3f out) const
Definition: lambert_edf.h:60
Definition: ray.h:42
Definition: ray.h:68
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE float p(const DifferentialGeometry &geometry, const Vector3f in, const Vector3f out, const SphericalMeasure measure=kProjectedSolidAngle) const
Definition: lambert_edf.h:80
void light_tracing(const uint32 n_light_paths, TSampleSink sample_sink, TBPTContext &context, const TBPTConfig &config, RenderingContext &renderer, RenderingContextView &renderer_view)
Definition: bpt_control.h:576
FERMAT_HOST_DEVICE FERMAT_FORCEINLINE bool scatter(const VertexType &v, const float z[3], Bsdf::ComponentType &out_component, cugar::Vector3f &out, float &out_p, float &out_p_proj, cugar::Vector3f &out_w, bool RR=true, bool output_alpha=true, bool evaluate_full_bsdf=false, Bsdf::ComponentType components=Bsdf::kAllComponents)
Definition: bpt_utils.h:1070
Definition: bpt_kernels.h:216
FERMAT_HOST_DEVICE bool sample(const float *Z, uint32_t *prim_id, cugar::Vector2f *uv, VertexGeometry *geom, float *pdf, Edf *edf) const
Definition: lights.h:521
Definition: bpt_kernels.h:56
FERMAT_HOST_DEVICE FERMAT_FORCEINLINE cugar::Vector3f unpack_direction(const uint32 packed_dir)
Definition: vertex.h:133
FERMAT_HOST_DEVICE float camera_direction_pdf(const cugar::Vector3f &U, const cugar::Vector3f &V, const cugar::Vector3f &W, const float W_len, const float square_focal_length, const cugar::Vector3f out, float *out_x=0, float *out_y=0)
Definition: camera.h:206
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE void sample(const Vector2f u, const DifferentialGeometry &geometry, const Vector3f in, Vector3f &out, Vector3f &g, float &p, float &p_proj) const
Definition: lambert_edf.h:90
FERMAT_HOST_DEVICE FERMAT_FORCEINLINE bool perform_connection(const uint32 eye_path_id, const uint32 t, const bool absorbed) const
Definition: bpt_kernels.h:163
Definition: edf.h:49
Definition: pathtracer_core.h:527
Definition: bsdf.h:123
FERMAT_HOST_DEVICE FERMAT_FORCEINLINE bool terminate_eye_subpath(const uint32 path_id, const uint32 t) const
Definition: bpt_kernels.h:145
FERMAT_FORCEINLINE FERMAT_HOST_DEVICE cugar::Vector3f f(const cugar::DifferentialGeometry &geometry, const cugar::Vector3f w_i, const cugar::Vector3f w_o, const ComponentType components=kAllComponents) const
Definition: bsdf.h:312
Definition: renderer_view.h:80
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE uint32 length(const vector_view< Iterator > &vec)
Definition: vector_view.h:228
Definition: bpt_utils.h:311
Define CUDA based warp adders.
FERMAT_HOST_DEVICE void generate_primary_light_vertex(const uint32 light_path_id, const uint32 n_light_paths, const TPrimaryCoordinates &primary_coords, TBPTContext &context, RenderingContextView &renderer, TBPTConfig &config)
Definition: bpt_kernels.h:277
FERMAT_HOST_DEVICE FERMAT_FORCEINLINE uint4 pack_edf(const Edf &edf)
Definition: bpt_utils.h:189
FERMAT_FORCEINLINE FERMAT_HOST_DEVICE float p(const cugar::DifferentialGeometry &geometry, const cugar::Vector3f w_i, const cugar::Vector3f w_o, const cugar::SphericalMeasure measure=cugar::kProjectedSolidAngle, const bool RR=true, const ComponentType components=kAllComponents) const
Definition: bsdf.h:474
Definition: bpt_options.h:42