Fermat
pathtracer_kernels.h
1 /*
2  * Fermat
3  *
4  * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #pragma once
30 
31 #include <pathtracer_core.h>
32 #include <pathtracer_queues.h>
33 #include <renderer.h>
34 #include <rt.h>
35 #include <cugar/basic/memory_arena.h>
36 
39 
42 
43 #define SHADE_HITS_BLOCKSIZE 64
44 #define SHADE_HITS_CTA_BLOCKS 8 // Maxwell / Volta : 16 - Turing : 8
45 
50 {
51  PTRayQueue in_queue;
52  PTRayQueue shadow_queue;
53  PTRayQueue scatter_queue;
54 
55  template <typename TPTVertexProcessor>
56  FERMAT_DEVICE
57  void trace_ray(
58  TPTVertexProcessor& _vertex_processor,
59  RenderingContextView& _renderer,
60  const PixelInfo _pixel,
61  const MaskedRay _ray,
62  const cugar::Vector4f _weight,
63  const cugar::Vector2f _cone = cugar::Vector2f(0),
64  const uint32 _vertex_info = uint32(-1),
65  const uint32 _nee_slot = uint32(-1))
66  {
67  scatter_queue.warp_append( _pixel, _ray, _weight, _cone, _vertex_info, _nee_slot );
68  }
69 
70  template <typename TPTVertexProcessor>
71  FERMAT_DEVICE
72  void trace_shadow_ray(
73  TPTVertexProcessor& _vertex_processor,
74  RenderingContextView& _renderer,
75  const PixelInfo _pixel,
76  const MaskedRay _ray,
77  const cugar::Vector3f _weight,
78  const cugar::Vector3f _weight_d,
79  const cugar::Vector3f _weight_g,
80  const uint32 _vertex_info = uint32(-1),
81  const uint32 _nee_slot = uint32(-1),
82  const uint32 _nee_sample = uint32(-1))
83  {
84  shadow_queue.warp_append( _pixel, _ray, cugar::Vector4f(_weight, 0.0f), cugar::Vector4f(_weight_d, 0.0f), cugar::Vector4f(_weight_g, 0.0f), _vertex_info, _nee_slot, _nee_sample );
85  }
86 };
87 
90 inline
92  PTOptions options,
93  const uint32 n_pixels,
94  PTRayQueue& input_queue,
95  PTRayQueue& scatter_queue,
96  PTRayQueue& shadow_queue,
97  cugar::memory_arena& arena)
98 {
99  input_queue.rays = arena.alloc<MaskedRay>(n_pixels);
100  input_queue.hits = arena.alloc<Hit>(n_pixels);
101  input_queue.weights = arena.alloc<float4>(n_pixels);
102  input_queue.weights_d = NULL;
103  input_queue.weights_g = NULL;
104  input_queue.pixels = arena.alloc<uint4>(n_pixels);
105  input_queue.cones = arena.alloc<float2>(n_pixels);
106  input_queue.size = arena.alloc<uint32>(1);
107 
108  scatter_queue.rays = arena.alloc<MaskedRay>(n_pixels);
109  scatter_queue.hits = arena.alloc<Hit>(n_pixels);
110  scatter_queue.weights = arena.alloc<float4>(n_pixels);
111  scatter_queue.weights_d = NULL;
112  scatter_queue.weights_g = NULL;
113  scatter_queue.pixels = arena.alloc<uint4>(n_pixels);
114  scatter_queue.cones = arena.alloc<float2>(n_pixels);
115  scatter_queue.size = arena.alloc<uint32>(1);
116 
117  const uint32 n_shadow_rays = 2u;
118 
119  shadow_queue.rays = arena.alloc<MaskedRay>(n_pixels*n_shadow_rays);
120  shadow_queue.hits = arena.alloc<Hit>(n_pixels*n_shadow_rays);
121  shadow_queue.weights = arena.alloc<float4>(n_pixels*n_shadow_rays);
122  shadow_queue.weights_d = arena.alloc<float4>(n_pixels*n_shadow_rays);
123  shadow_queue.weights_g = arena.alloc<float4>(n_pixels*n_shadow_rays);
124  shadow_queue.pixels = arena.alloc<uint4>(n_pixels*n_shadow_rays);
125  shadow_queue.size = arena.alloc<uint32>(1);
126 }
127 
128 //------------------------------------------------------------------------------
133 template <typename TPTContext>
134 __global__ void generate_primary_rays_kernel(TPTContext context, RenderingContextView renderer, cugar::Vector3f U, cugar::Vector3f V, cugar::Vector3f W, const float W_len, const float square_pixel_focal_length)
135 {
136  const uint2 pixel = make_uint2(
137  threadIdx.x + blockIdx.x*blockDim.x,
138  threadIdx.y + blockIdx.y*blockDim.y );
139 
140  if (pixel.x >= renderer.res_x || pixel.y >= renderer.res_y)
141  return;
142 
143  const int idx = pixel.x + pixel.y*renderer.res_x;
144 
145  const MaskedRay ray = generate_primary_ray( context, renderer, pixel, U, V, W );
146 
147  reinterpret_cast<float4*>(context.in_queue.rays)[2 * idx + 0] = make_float4(ray.origin.x, ray.origin.y, ray.origin.z, __uint_as_float(ray.mask)); // origin, tmin
148  reinterpret_cast<float4*>(context.in_queue.rays)[2 * idx + 1] = make_float4(ray.dir.x, ray.dir.y, ray.dir.z, ray.tmax); // dir, tmax
149 
150  // write the filter weight
151  context.in_queue.weights[idx] = cugar::Vector4f(1.0f, 1.0f, 1.0f, 1.0f);
152 
153  const float out_p = camera_direction_pdf(U, V, W, W_len, square_pixel_focal_length, ray.dir, false);
154 
155  // write the pixel index
156  context.in_queue.pixels[idx] = make_uint4( idx, uint32(-1), uint32(-1), uint32(-1) );
157 
158  // write the ray cone
159  context.in_queue.cones[idx] = make_float2( 0, out_p );
160 
161  if (idx == 0)
162  *context.in_queue.size = renderer.res_x * renderer.res_y;
163 }
164 
165 //------------------------------------------------------------------------------
170 template <typename TPTContext>
171 void generate_primary_rays(TPTContext context, const RenderingContextView renderer)
172 {
173  cugar::Vector3f U, V, W;
174  camera_frame(renderer.camera, renderer.aspect, U, V, W);
175 
176  const float square_pixel_focal_length = renderer.camera.square_pixel_focal_length(renderer.res_x, renderer.res_y);
177 
178  dim3 blockSize(32, 16);
179  dim3 gridSize(cugar::divide_ri(renderer.res_x, blockSize.x), cugar::divide_ri(renderer.res_y, blockSize.y));
180  generate_primary_rays_kernel << < gridSize, blockSize >> > (context, renderer, U, V, W, length(W), square_pixel_focal_length);
181 }
182 //------------------------------------------------------------------------------
183 
189 template <uint32 NUM_WARPS, typename TPTContext, typename TPTVertexProcessor>
190 __global__
191 __launch_bounds__(SHADE_HITS_BLOCKSIZE, SHADE_HITS_CTA_BLOCKS)
192 void shade_hits_kernel(const uint32 in_queue_size, TPTContext context, TPTVertexProcessor vertex_processor, RenderingContextView renderer)
193 {
194  const uint32 thread_id = threadIdx.x + blockIdx.x * blockDim.x;
195 
196  if (thread_id < in_queue_size) // *context.in_queue.size
197  {
198  const uint4 packed_pixel = cugar::cuda::load<cugar::cuda::LOAD_CG>( &context.in_queue.pixels[thread_id] ); // make sure we use a vectorized load
199 
200  const PixelInfo pixel_info = packed_pixel.x;
201  const uint32 prev_vertex_info = packed_pixel.y;
202  const uint32 prev_nee_slot = packed_pixel.z;
203  const float2 cone = context.in_queue.cones[thread_id];
204  const MaskedRay ray = context.in_queue.rays[thread_id];
205  const Hit hit = context.in_queue.hits[thread_id];
206  const cugar::Vector4f w = context.in_queue.weights[thread_id];
207 
208  const uint2 pixel = make_uint2(
209  pixel_info.pixel % renderer.res_x,
210  pixel_info.pixel / renderer.res_x
211  );
212 
213  shade_vertex(
214  context,
215  vertex_processor,
216  renderer,
217  context.in_bounce,
218  pixel_info,
219  pixel,
220  ray,
221  hit,
222  w,
223  prev_vertex_info,
224  prev_nee_slot,
225  cone );
226  }
227 }
228 
234 template <typename TPTContext, typename TPTVertexProcessor>
235 void shade_hits(const uint32 in_queue_size, TPTContext context, TPTVertexProcessor vertex_processor, RenderingContextView renderer)
236 {
237  const uint32 blockSize(SHADE_HITS_BLOCKSIZE);
238  const dim3 gridSize(cugar::divide_ri(in_queue_size, blockSize));
239 
240  shade_hits_kernel<blockSize / 32><<< gridSize, blockSize >>>( in_queue_size, context, vertex_processor, renderer );
241 }
242 
248 template <typename TPTContext, typename TPTVertexProcessor>
249 __global__
250 void solve_occlusion_kernel(const uint32 in_queue_size, TPTContext context, TPTVertexProcessor vertex_processor, RenderingContextView renderer)
251 {
252  const uint32 thread_id = threadIdx.x + blockIdx.x * blockDim.x;
253 
254  if (thread_id < in_queue_size) // *context.shadow_queue.size
255  {
256  const PixelInfo pixel_info = context.shadow_queue.pixels[thread_id].x;
257  const uint32 vertex_info = context.shadow_queue.pixels[thread_id].y;
258  const uint32 nee_slot = context.shadow_queue.pixels[thread_id].z;
259  const uint32 nee_sample = context.shadow_queue.pixels[thread_id].w;
260  const Hit hit = context.shadow_queue.hits[thread_id];
261  const cugar::Vector4f w = context.shadow_queue.weights[thread_id];
262  const cugar::Vector4f w_d = context.shadow_queue.weights_d[thread_id];
263  const cugar::Vector4f w_g = context.shadow_queue.weights_g[thread_id];
264 
265  solve_occlusion( context, vertex_processor, renderer, hit.t > 0.0f, pixel_info, w.xyz(), w_d.xyz(), w_g.xyz(), vertex_info, nee_slot, nee_sample );
266  }
267 }
268 
274 template <typename TPTContext, typename TPTVertexProcessor>
275 void solve_occlusion(const uint32 in_queue_size, TPTContext context, TPTVertexProcessor vertex_processor, RenderingContextView renderer)
276 {
277  const uint32 blockSize(128);
278  const dim3 gridSize(cugar::divide_ri(in_queue_size, blockSize));
279  solve_occlusion_kernel<<< gridSize, blockSize >>>( in_queue_size, context, vertex_processor, renderer );
280 }
281 
285 {
289  {
290  primary_rt_time = 0.0f;
291  path_rt_time = 0.0f;
292  shadow_rt_time = 0.0f;
293  path_shade_time = 0.0f;
294  shadow_shade_time = 0.0f;
295 
296  shade_events = 0;
297  }
298 
300  float path_rt_time;
304  uint64 shade_events;
305 };
306 
309 template <typename TPTContext, typename TPTVertexProcessor>
311  TPTContext& context,
312  TPTVertexProcessor& vertex_processor,
313  RenderingContext& renderer,
314  RenderingContextView& renderer_view,
315  PTLoopStats& stats)
316 {
317  generate_primary_rays(context, renderer_view);
318  CUDA_CHECK(cugar::cuda::sync_and_check_error("generate primary rays"));
319 
320  cudaMemset(context.device_timers, 0x00, sizeof(uint64) * 16);
321 
322  for (context.in_bounce = 0;
323  context.in_bounce < context.options.max_path_length;
324  context.in_bounce++)
325  {
326  uint32 in_queue_size;
327 
328  // fetch the amount of tasks in the queue
329  cudaMemcpy(&in_queue_size, context.in_queue.size, sizeof(uint32), cudaMemcpyDeviceToHost);
330 
331  // check whether there's still any work left
332  if (in_queue_size == 0)
333  break;
334 
335  // update per bounce options
336  compute_per_bounce_options( context, renderer_view );
337 
338  // trace the rays generated at the previous bounce
339  //
340  {
341  FERMAT_CUDA_TIME(cugar::cuda::ScopedTimer<float> trace_timer(context.in_bounce == 0 ? &stats.primary_rt_time : &stats.path_rt_time));
342 
343  renderer.get_rt_context()->trace(in_queue_size, (Ray*)context.in_queue.rays, context.in_queue.hits);
344  CUDA_CHECK(cugar::cuda::sync_and_check_error("trace shaded"));
345  }
346 
347  // reset the output queue counters
348  cudaMemset(context.shadow_queue.size, 0x00, sizeof(uint32));
349  cudaMemset(context.scatter_queue.size, 0x00, sizeof(uint32));
350  CUDA_CHECK(cugar::cuda::check_error("memset"));
351 
352  // perform lighting at this bounce
353  //
354  {
355  FERMAT_CUDA_TIME(cugar::cuda::ScopedTimer<float> shade_timer(&stats.path_shade_time));
356 
357  shade_hits(in_queue_size, context, vertex_processor, renderer_view);
358  CUDA_CHECK(cugar::cuda::sync_and_check_error("shade hits"));
359 
360  stats.shade_events += in_queue_size;
361  }
362 
363  // trace & accumulate occlusion queries
364  {
365  uint32 shadow_queue_size;
366  cudaMemcpy(&shadow_queue_size, context.shadow_queue.size, sizeof(uint32), cudaMemcpyDeviceToHost);
367 
368  // trace the rays
369  //
370  if (shadow_queue_size)
371  {
372  FERMAT_CUDA_TIME(cugar::cuda::ScopedTimer<float> trace_timer(&stats.shadow_rt_time));
373 
374  renderer.get_rt_context()->trace_shadow(shadow_queue_size, (MaskedRay*)context.shadow_queue.rays, context.shadow_queue.hits);
375  CUDA_CHECK(cugar::cuda::sync_and_check_error("trace occlusion"));
376  }
377 
378  // shade the results
379  //
380  if (shadow_queue_size)
381  {
382  FERMAT_CUDA_TIME(cugar::cuda::ScopedTimer<float> shade_timer(&stats.shadow_shade_time));
383 
384  solve_occlusion(shadow_queue_size, context, vertex_processor, renderer_view);
385  CUDA_CHECK(cugar::cuda::sync_and_check_error("solve occlusion"));
386  }
387  }
388 
389  std::swap(context.in_queue, context.scatter_queue);
390  }
391 }
392 
393 inline void print_timer_stats(const uint64* device_timers, const PTLoopStats& stats)
394 {
395  uint64 h_device_timers[16];
396  cudaMemcpy(&h_device_timers, device_timers, sizeof(uint64) * 16, cudaMemcpyDeviceToHost);
397 
398  const uint64 shade_events = stats.shade_events;
399 
400  //const uint32 warp_size = 32;
401  //const uint64 warp_shade_events = (shade_events + warp_size-1) / warp_size;
402 
403  const float setup_time = float(h_device_timers[SETUP_TIME]) / float(shade_events);
404  const float brdf_eval_time = float(h_device_timers[BRDF_EVAL_TIME]) / float(shade_events);
405  const float dirlight_sample_time = float(h_device_timers[DIRLIGHT_SAMPLE_TIME]) / float(shade_events);
406  const float dirlight_eval_time = float(h_device_timers[DIRLIGHT_EVAL_TIME]) / float(shade_events);
407  const float lights_preprocess_time = float(h_device_timers[LIGHTS_PREPROCESS_TIME]) / float(shade_events);
408  const float lights_sample_time = float(h_device_timers[LIGHTS_SAMPLE_TIME]) / float(shade_events);
409  const float lights_eval_time = float(h_device_timers[LIGHTS_EVAL_TIME]) / float(shade_events);
410  const float lights_mapping_time = float(h_device_timers[LIGHTS_MAPPING_TIME]) / float(shade_events);
411  const float lights_update_time = float(h_device_timers[LIGHTS_UPDATE_TIME]) / float(shade_events);
412  const float brdf_sample_time = float(h_device_timers[BRDF_SAMPLE_TIME]) / float(shade_events);
413  const float trace_shadow_time = float(h_device_timers[TRACE_SHADOW_TIME]) / float(shade_events);
414  const float trace_shaded_time = float(h_device_timers[TRACE_SHADED_TIME]) / float(shade_events);
415  const float vertex_preprocess_time = float(h_device_timers[PREPROCESS_VERTEX_TIME]) / float(shade_events);
416  const float nee_weights_time = float(h_device_timers[NEE_WEIGHTS_TIME]) / float(shade_events);
417  const float scattering_weights_time = float(h_device_timers[SCATTERING_WEIGHTS_TIME]) / float(shade_events);
418  const float fbuffer_writes_time = float(h_device_timers[FBUFFER_WRITES_TIME]) / float(shade_events);
419 
420  const float total_time =
421  setup_time
422  + brdf_eval_time
423  + dirlight_sample_time
424  + dirlight_eval_time
425  + lights_sample_time
426  + lights_eval_time
427  + lights_mapping_time
428  + lights_update_time
429  + brdf_sample_time
430  + trace_shadow_time
431  + trace_shaded_time
432  + vertex_preprocess_time
433  + nee_weights_time
434  + scattering_weights_time
435  + fbuffer_writes_time;
436 
437  fprintf(stderr, "\n device timing: %f clks\n", total_time);
438  fprintf(stderr, " setup : %4.1f %%, %f clks\n", 100.0 * setup_time / total_time, setup_time);
439  fprintf(stderr, " dirlight sample : %4.1f %%, %f clks\n", 100.0 * dirlight_sample_time / total_time, dirlight_sample_time);
440  fprintf(stderr, " dirlight eval : %4.1f %%, %f clks\n", 100.0 * dirlight_eval_time / total_time, dirlight_eval_time);
441  fprintf(stderr, " lights preproc : %4.1f %%, %f clks\n", 100.0 * lights_preprocess_time / total_time, lights_preprocess_time);
442  fprintf(stderr, " lights sample : %4.1f %%, %f clks\n", 100.0 * lights_sample_time / total_time, lights_sample_time);
443  fprintf(stderr, " lights eval : %4.1f %%, %f clks\n", 100.0 * lights_eval_time / total_time, lights_eval_time);
444  fprintf(stderr, " lights map : %4.1f %%, %f clks\n", 100.0 * lights_mapping_time / total_time, lights_mapping_time);
445  fprintf(stderr, " lights update : %4.1f %%, %f clks\n", 100.0 * lights_update_time / total_time, lights_update_time);
446  fprintf(stderr, " brdf eval : %4.1f %%, %f clks\n", 100.0 * brdf_eval_time / total_time, brdf_eval_time);
447  fprintf(stderr, " brdf sample : %4.1f %%, %f clks\n", 100.0 * brdf_sample_time / total_time, brdf_sample_time);
448  fprintf(stderr, " trace shadow : %4.1f %%, %f clks\n", 100.0 * trace_shadow_time / total_time, trace_shadow_time);
449  fprintf(stderr, " trace shaded : %4.1f %%, %f clks\n", 100.0 * trace_shaded_time / total_time, trace_shaded_time);
450  fprintf(stderr, " preprocess : %4.1f %%, %f clks\n", 100.0 * vertex_preprocess_time / total_time, vertex_preprocess_time);
451  fprintf(stderr, " nee weights : %4.1f %%, %f clks\n", 100.0 * nee_weights_time / total_time, nee_weights_time);
452  fprintf(stderr, " scatter weights : %4.1f %%, %f clks\n", 100.0 * scattering_weights_time / total_time, scattering_weights_time);
453  fprintf(stderr, " fbuffer writes : %4.1f %%, %f clks\n", 100.0 * fbuffer_writes_time / total_time, fbuffer_writes_time);
454 }
455 
__global__ void solve_occlusion_kernel(const uint32 in_queue_size, TPTContext context, TPTVertexProcessor vertex_processor, RenderingContextView renderer)
Definition: pathtracer_kernels.h:250
void solve_occlusion(const uint32 in_queue_size, TPTContext context, TPTVertexProcessor vertex_processor, RenderingContextView renderer)
Definition: pathtracer_kernels.h:275
Definition: pathtracer_kernels.h:49
float shadow_shade_time
time spent for shading shadow samples (i.e. in solve_occlusion)
Definition: pathtracer_kernels.h:303
__global__ void generate_primary_rays_kernel(TPTContext context, RenderingContextView renderer, cugar::Vector3f U, cugar::Vector3f V, cugar::Vector3f W, const float W_len, const float square_pixel_focal_length)
Definition: pathtracer_kernels.h:134
void generate_primary_rays(TPTContext context, const RenderingContextView renderer)
Definition: pathtracer_kernels.h:171
FERMAT_HOST_DEVICE void compute_per_bounce_options(TPTContext &context, const RenderingContextView &renderer)
Definition: pathtracer_core.h:596
Definition: timer.h:76
__global__ __launch_bounds__(SHADE_HITS_BLOCKSIZE, SHADE_HITS_CTA_BLOCKS) void shade_hits_kernel(const uint32 in_queue_size
CUGAR_HOST_DEVICE T * alloc(const uint64 sz, const uint64 alignment=sizeof(T))
Definition: memory_arena.h:69
Definition: pathtracer.h:169
CUGAR_HOST_DEVICE L divide_ri(const L x, const R y)
Definition: numbers.h:180
Definition: pathtracer_kernels.h:284
float path_rt_time
time spent for tracing scattering rays
Definition: pathtracer_kernels.h:300
FERMAT_HOST_DEVICE void camera_frame(cugar::Vector3f eye, cugar::Vector3f lookat, cugar::Vector3f up, float hfov, float aspect_ratio, cugar::Vector3f &U, cugar::Vector3f &V, cugar::Vector3f &W)
Definition: camera.h:142
float shadow_rt_time
time spent for tracing shadow rays
Definition: pathtracer_kernels.h:301
float primary_rt_time
time spent for tracing primary rays
Definition: pathtracer_kernels.h:299
FERMAT_DEVICE MaskedRay generate_primary_ray(TPTContext &context, RenderingContextView &renderer, const uint2 pixel, cugar::Vector3f U, cugar::Vector3f V, cugar::Vector3f W)
Definition: pathtracer_core.h:635
RTContext * get_rt_context() const
Definition: ray.h:42
Definition: ray.h:68
void path_trace_loop(TPTContext &context, TPTVertexProcessor &vertex_processor, RenderingContext &renderer, RenderingContextView &renderer_view, PTLoopStats &stats)
Definition: pathtracer_kernels.h:310
Definition: renderer.h:52
void alloc_queues(PTOptions options, const uint32 n_pixels, PTRayQueue &input_queue, PTRayQueue &scatter_queue, PTRayQueue &shadow_queue, cugar::memory_arena &arena)
Definition: pathtracer_kernels.h:91
FERMAT_HOST_DEVICE float camera_direction_pdf(const cugar::Vector3f &U, const cugar::Vector3f &V, const cugar::Vector3f &W, const float W_len, const float square_focal_length, const cugar::Vector3f out, float *out_x=0, float *out_y=0)
Definition: camera.h:206
Definition: ray.h:55
uint64 shade_events
number of path vertex shade events
Definition: pathtracer_kernels.h:304
Definition: pathtracer_core.h:527
Definition: memory_arena.h:44
Definition: renderer_view.h:80
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE uint32 length(const vector_view< Iterator > &vec)
Definition: vector_view.h:228
Definition: pathtracer_queues.h:44
PTLoopStats()
Definition: pathtracer_kernels.h:288
float path_shade_time
time spent for shading path vertices
Definition: pathtracer_kernels.h:302
void shade_hits(const uint32 in_queue_size, TPTContext context, TPTVertexProcessor vertex_processor, RenderingContextView renderer)
Definition: pathtracer_kernels.h:235
FERMAT_DEVICE bool shade_vertex(TPTContext &context, TPTVertexProcessor &vertex_processor, RenderingContextView &renderer, const uint32 bounce, const PixelInfo pixel_info, const uint2 pixel, const MaskedRay &ray, const Hit hit, const cugar::Vector4f w, const uint32 prev_vertex_info=uint32(-1), const uint32 prev_nee_vertex_id=uint32(-1), const cugar::Vector2f cone=cugar::Vector2f(0))
Definition: pathtracer_core.h:773