34 #include <mesh/MeshStorage.h> 35 #include <cugar/basic/timer.h> 36 #include <cugar/basic/primitives.h> 37 #include <cugar/basic/memory_arena.h> 38 #include <pathtracer_core.h> 39 #include <pathtracer_queues.h> 40 #include <pathtracer_kernels.h> 41 #include <psfpt_vertex_processor.h> 44 #define SHIFT_RES 256u 46 #define HASH_SIZE (64u * 1024u * 1024u) 67 weights_d[slot] = weight_d;
68 weights_g[slot] = weight_g;
70 pixels[slot] = make_uint2(pixel.packed, cache_slot.packed);
76 template <
typename TDirectLightingSampler>
79 PSFRefQueue ref_queue;
84 TDirectLightingSampler dl;
92 mesh_vtls->get_bvh_clusters_count(),
93 mesh_vtls->get_bvh_cluster_offsets());
100 mesh_vtls->get_bvh_nodes(),
101 mesh_vtls->get_bvh_parents(),
102 mesh_vtls->get_bvh_ranges(),
103 mesh_vtls->get_bvh_clusters_count(),
104 mesh_vtls->get_bvh_clusters(),
105 mesh_vtls->get_bvh_cluster_offsets());
110 template <
typename TDirectLightingSampler>
112 void psf_blending_kernel(
const uint32 in_queue_size, PSFPTContext<TDirectLightingSampler> context,
RenderingContextView renderer,
const float frame_weight)
114 const uint32 thread_id = threadIdx.x + blockIdx.x * blockDim.x;
116 if (thread_id < in_queue_size)
121 const PixelInfo pixel_info = context.ref_queue.pixels[thread_id].x;
122 const CacheInfo cache_info = context.ref_queue.pixels[thread_id].y;
127 if (cache_info.is_valid())
130 const uint32 cache_slot = cache_info.pixel;
133 cache_value /= cache_value.w;
137 ((pixel_info.comp & Bsdf::kDiffuseMask) ? w_d.xyz() :
cugar::Vector3f(0.0f)) +
138 ((pixel_info.comp & Bsdf::kGlossyMask) ? w_g.xyz() :
cugar::Vector3f(0.0f));
141 add_in<false>(renderer.fb(FBufferDesc::COMPOSITED_C), pixel_info.pixel, cugar::min( cache_value.xyz() * w, context.options.firefly_filter ), frame_weight);
144 if (pixel_info.comp & Bsdf::kDiffuseMask)
145 add_in<true>(renderer.fb(FBufferDesc::DIFFUSE_C), pixel_info.pixel, cache_value.xyz() * w_d.xyz(), frame_weight);
148 if (pixel_info.comp & Bsdf::kGlossyMask)
149 add_in<true>(renderer.fb(FBufferDesc::SPECULAR_C), pixel_info.pixel, cache_value.xyz() * w_g.xyz(), frame_weight);
156 template <
typename TDirectLightingSampler>
157 void psf_blending(
const uint32 in_queue_size, PSFPTContext<TDirectLightingSampler> context,
RenderingContextView renderer)
162 const uint32 blockSize(128);
164 psf_blending_kernel << < gridSize, blockSize >> > (in_queue_size, context, renderer, 1.0f / float(renderer.instance + 1));
171 const uint32 n_pixels,
175 PSFRefQueue& ref_queue,
178 ::alloc_queues( options, n_pixels, input_queue, scatter_queue, shadow_queue, arena );
180 ref_queue.weights_d = arena.
alloc<float4>(n_pixels * (options.max_path_length + 1));
181 ref_queue.weights_g = arena.alloc<float4>(n_pixels * (options.max_path_length + 1));
182 ref_queue.pixels = arena.alloc<uint2>(n_pixels * (options.max_path_length + 1));
183 ref_queue.size = arena.alloc<uint32>(1);
189 m_generator(32,
cugar::LFSRGeneratorMatrix::GOOD_PROJECTIONS),
190 m_random(&m_generator, 1u, 1351u)
193 m_vtls_rl =
new VTLRLStorage;
198 const uint2 res = renderer.
res();
199 const uint32 n_pixels = res.x * res.y;
202 m_options.parse(argc, argv);
204 const char* nee_alg[] = {
"mesh",
"vpl",
"rl" };
206 fprintf(stderr,
" PSFPT settings:\n");
207 fprintf(stderr,
" path-length : %u\n", m_options.max_path_length);
208 fprintf(stderr,
" direct-nee : %u\n", m_options.direct_lighting_nee ? 1 : 0);
209 fprintf(stderr,
" direct-bsdf : %u\n", m_options.direct_lighting_bsdf ? 1 : 0);
210 fprintf(stderr,
" indirect-nee : %u\n", m_options.indirect_lighting_nee ? 1 : 0);
211 fprintf(stderr,
" indirect-bsdf : %u\n", m_options.indirect_lighting_bsdf ? 1 : 0);
212 fprintf(stderr,
" visible-lights : %u\n", m_options.visible_lights ? 1 : 0);
213 fprintf(stderr,
" direct lighting : %u\n", m_options.direct_lighting ? 1 : 0);
214 fprintf(stderr,
" diffuse : %u\n", m_options.diffuse_scattering ? 1 : 0);
215 fprintf(stderr,
" glossy : %u\n", m_options.glossy_scattering ? 1 : 0);
216 fprintf(stderr,
" indirect glossy : %u\n", m_options.indirect_glossy ? 1 : 0);
217 fprintf(stderr,
" RR : %u\n", m_options.rr ? 1 : 0);
218 fprintf(stderr,
" nee algorithm : %s\n", nee_alg[ m_options.nee_type ]);
219 fprintf(stderr,
" filter width : %f\n", m_options.psf_width);
220 fprintf(stderr,
" filter depth : %u\n", m_options.psf_depth);
221 fprintf(stderr,
" filter min-dist : %f\n", m_options.psf_min_dist);
222 fprintf(stderr,
" firefly filter : %f\n", m_options.firefly_filter);
225 m_psf_hash.resize(HASH_SIZE);
226 m_psf_values.alloc(HASH_SIZE);
236 PSFRefQueue ref_queue;
248 arena.
alloc<int64>( 16 );
250 fprintf(stderr,
" allocating queue storage: %.1f MB\n",
float(arena.size) / (1024*1024));
251 m_memory_pool.alloc(arena.size);
255 const uint32 n_dimensions = 6 * (m_options.max_path_length + 1);
256 fprintf(stderr,
" initializing sampler: %u dimensions\n", n_dimensions);
257 m_sequence.setup(n_dimensions, SHIFT_RES);
259 const uint32 n_light_paths = n_pixels;
261 fprintf(stderr,
" creating mesh lights... started\n");
266 fprintf(stderr,
" creating mesh lights... done\n");
273 m_options.nee_type = NEE_ALGORITHM_MESH;
275 if (m_options.nee_type == NEE_ALGORITHM_RL)
277 fprintf(stderr,
" creating mesh VTLs... started\n");
278 m_mesh_vtls->init(n_light_paths, renderer, 0u );
279 fprintf(stderr,
" creating mesh VTLs... done (%u VTLs, %u clusters)\n", m_mesh_vtls->get_vtl_count(), m_mesh_vtls->get_bvh_clusters_count());
281 fprintf(stderr,
" initializing VTLs RL... started\n");
282 ::init( m_vtls_rl, m_mesh_vtls );
283 fprintf(stderr,
" initializing VTLs RL... done (%.1f MB)\n", m_vtls_rl->needed_bytes(VTL_RL_HASH_SIZE, m_mesh_vtls->get_bvh_clusters_count()) /
float(1024*1024));
293 render_pass( instance, renderer, PSFPT::kFinalPass );
305 const uint2 res = renderer.
res();
306 const uint32 n_pixels = res.x * res.y;
315 PSFRefQueue ref_queue;
336 uint64* device_timers = arena.alloc<uint64>( 16 );
343 if (m_options.nee_type == NEE_ALGORITHM_RL)
345 if ((instance % 32) == 0)
354 CUDA_CHECK(cugar::cuda::sync_and_check_error(
"vtl-rl update"));
359 m_sequence.set_instance(instance);
362 if (m_options.nee_type == NEE_ALGORITHM_RL)
364 PSFPTContext<DirectLightingRL> context;
365 context.options = m_options;
366 context.in_bounce = 0;
367 context.in_queue = input_queue;
368 context.scatter_queue = scatter_queue;
369 context.shadow_queue = shadow_queue;
370 context.sequence = m_sequence.view();
371 context.frame_weight = 1.0f / float(renderer_view.instance + 1);
372 context.device_timers = device_timers;
373 context.bbox = m_bbox;
376 m_mesh_vtls->view() );
377 context.ref_queue = ref_queue;
378 context.psf_hashmap = HashMap(
380 m_psf_hash.m_keys.ptr(),
381 m_psf_hash.m_unique.ptr(),
382 m_psf_hash.m_slots.ptr(),
383 m_psf_hash.m_size.ptr()
385 context.psf_values = m_psf_values.ptr();
388 if ((instance % m_options.psf_temporal_reuse) == 0)
392 cudaMemset(context.ref_queue.size, 0x00,
sizeof(uint32));
393 CUDA_CHECK(cugar::cuda::sync_and_check_error(
"clear reference queue"));
396 path_trace_loop( context, vertex_processor, renderer, renderer_view, stats );
399 if (pass_type == PSFPT::kFinalPass)
401 uint32 ref_queue_size;
402 cudaMemcpy(&ref_queue_size, context.ref_queue.size,
sizeof(uint32), cudaMemcpyDeviceToHost);
404 psf_blending(ref_queue_size, context, renderer_view);
405 CUDA_CHECK(cugar::cuda::sync_and_check_error(
"psf blending"));
411 MeshLight mesh_light = m_options.nee_type == NEE_ALGORITHM_VPL ? renderer_view.mesh_vpls : renderer_view.mesh_light;
414 PSFPTContext<DirectLightingMesh> context;
415 context.options = m_options;
416 context.in_bounce = 0;
417 context.in_queue = input_queue;
418 context.scatter_queue = scatter_queue;
419 context.shadow_queue = shadow_queue;
420 context.sequence = m_sequence.view();
421 context.frame_weight = 1.0f / float(renderer_view.instance + 1);
422 context.device_timers = device_timers;
423 context.bbox = m_bbox;
425 context.ref_queue = ref_queue;
426 context.psf_hashmap = HashMap(
428 m_psf_hash.m_keys.ptr(),
429 m_psf_hash.m_unique.ptr(),
430 m_psf_hash.m_slots.ptr(),
431 m_psf_hash.m_size.ptr()
433 context.psf_values = m_psf_values.ptr();
436 if ((instance % m_options.psf_temporal_reuse) == 0)
440 cudaMemset(context.ref_queue.size, 0x00,
sizeof(uint32));
441 CUDA_CHECK(cugar::cuda::sync_and_check_error(
"clear reference queue"));
444 path_trace_loop( context, vertex_processor, renderer, renderer_view, stats );
447 if (pass_type == PSFPT::kFinalPass)
449 uint32 ref_queue_size;
450 cudaMemcpy(&ref_queue_size, context.ref_queue.size,
sizeof(uint32), cudaMemcpyDeviceToHost);
452 psf_blending(ref_queue_size, context, renderer_view);
453 CUDA_CHECK(cugar::cuda::sync_and_check_error(
"psf blending"));
459 const float time = timer.seconds();
466 fprintf(stderr,
"\r %.1fs (%.1fms = rt[%.1fms + %.1fms + %.1fms] + shade[%.1fms + %.1fms] - %uK cells) ",
474 m_psf_hash.size() / 1000);
476 #if defined(DEVICE_TIMING) && DEVICE_TIMING 477 if (instance % 64 == 0)
478 print_timer_stats( device_timers, stats );
Definition: direct_lighting_rl.h:45
void init(int argc, char **argv, RenderingContext &renderer)
Definition: psfpt_impl.h:196
Definition: pathtracer_kernels.h:49
float shadow_shade_time
time spent for shading shadow samples (i.e. in solve_occlusion)
Definition: pathtracer_kernels.h:303
__device__ __forceinline__ unsigned int warp_increment(unsigned int *ptr)
Definition: warp_atomics.h:56
void update_variances(const uint32 instance)
void start()
start timing
Definition: timer.cpp:116
Definition: mesh_lights.h:59
Definition: direct_lighting_mesh.h:41
Definition: psfpt_vertex_processor.h:49
Definition: clustered_rl.h:161
CUGAR_HOST_DEVICE T * alloc(const uint64 sz, const uint64 alignment=sizeof(T))
Definition: memory_arena.h:69
CUGAR_HOST_DEVICE L divide_ri(const L x, const R y)
Definition: numbers.h:180
Definition: pathtracer_kernels.h:284
void render(const uint32 instance, RenderingContext &renderer)
Definition: psfpt_impl.h:287
float path_rt_time
time spent for tracing scattering rays
Definition: pathtracer_kernels.h:300
MeshLightsStorage & get_mesh_lights()
float shadow_rt_time
time spent for tracing shadow rays
Definition: pathtracer_kernels.h:301
void clamp_frame(const float max_value)
float primary_rt_time
time spent for tracing primary rays
Definition: pathtracer_kernels.h:299
void render_pass(const uint32 instance, RenderingContext &renderer, const PassType pass_type)
Definition: psfpt_impl.h:301
Definition: pathtracer_core.h:570
RenderingContextView view(const uint32 instance)
Define a vector_view POD type and plain_view() for std::vector.
Definition: diff.h:38
Definition: clustered_rl.h:87
void path_trace_loop(TPTContext &context, TPTVertexProcessor &vertex_processor, RenderingContext &renderer, RenderingContextView &renderer_view, PTLoopStats &stats)
Definition: pathtracer_kernels.h:310
Definition: renderer.h:52
void alloc_queues(PTOptions options, const uint32 n_pixels, PTRayQueue &input_queue, PTRayQueue &scatter_queue, PTRayQueue &shadow_queue, cugar::memory_arena &arena)
Definition: pathtracer_kernels.h:91
void rescale_frame(const uint32 instance)
Definition: pathtracer_core.h:527
Definition: memory_arena.h:44
Definition: renderer_view.h:80
Definition: pathtracer_queues.h:44
[PSFPTVertexProcessor::CacheInfo]
Definition: psfpt_vertex_processor.h:56
cugar::Bbox3f compute_bbox()
float path_shade_time
time spent for shading path vertices
Definition: pathtracer_kernels.h:302