Fermat
wavefront_queues.h
1 //
2 //Copyright (c) 2016 NVIDIA Corporation. All rights reserved.
3 //
4 //NVIDIA Corporation and its licensors retain all intellectual property and
5 //proprietary rights in and to this software, related documentation and any
6 //modifications thereto. Any use, reproduction, disclosure or distribution of
7 //this software and related documentation without an express license agreement
8 //from NVIDIA Corporation is strictly prohibited.
9 //
10 //TO THE MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, THIS SOFTWARE IS PROVIDED
11 //*AS IS* AND NVIDIA AND ITS SUPPLIERS DISCLAIM ALL WARRANTIES, EITHER EXPRESS
12 //OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, IMPLIED WARRANTIES OF
13 //MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL
14 //NVIDIA OR ITS SUPPLIERS BE LIABLE FOR ANY SPECIAL, INCIDENTAL, INDIRECT, OR
15 //CONSEQUENTIAL DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION, DAMAGES FOR
16 //LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS
17 //INFORMATION, OR ANY OTHER PECUNIARY LOSS) ARISING OUT OF THE USE OF OR
18 //INABILITY TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS BEEN ADVISED OF THE
19 //POSSIBILITY OF SUCH DAMAGES
20 //
21 
22 #pragma once
23 
24 // ------------------------------------------------------------------------- //
25 //
26 // Declaration of classes used to store intersections.
27 //
28 // ------------------------------------------------------------------------- //
29 
30 #include <types.h>
31 #include <cugar/basic/memory_arena.h>
33 
36 
39 
41 {
42  enum Type
43  {
44  NONE = 0,
45  UINT = 1,
46  UINT2 = 2,
47  UINT4 = 3,
48  FLOAT = 4,
49  FLOAT2 = 5,
50  FLOAT4 = 6,
51  };
52 
53  FERMAT_HOST_DEVICE
55  {
56  for (uint32 i = 0; i < 16; ++i)
57  desc[i] = NONE;
58  }
59 
60  FERMAT_HOST_DEVICE
61  uint32 size(const uint32 i) const
62  {
63  switch (desc[i])
64  {
65  case UINT: return 4u;
66  case UINT2: return 8u;
67  case UINT4: return 16u;
68  case FLOAT: return 4u;
69  case FLOAT2: return 8u;
70  case FLOAT4: return 16u;
71  default : return 0u;
72  };
73  }
74 
75  template <typename TUserData>
76  void setup(QueueDescriptor& user)
77  {
78  // invoke the serialization method on the user object
79  serialize( *this, 0u, user );
80  }
81 
82  Type desc[16];
83 };
84 
85 template <uint32 m> void serialize_member(QueueDescriptor& queue, uint32 i, uint v) { queue.desc[m] = QueueDescriptor::UINT; }
86 template <uint32 m> void serialize_member(QueueDescriptor& queue, uint32 i, uint2 v) { queue.desc[m] = QueueDescriptor::UINT2; }
87 template <uint32 m> void serialize_member(QueueDescriptor& queue, uint32 i, uint4 v) { queue.desc[m] = QueueDescriptor::UINT4; }
88 template <uint32 m> void serialize_member(QueueDescriptor& queue, uint32 i, float v) { queue.desc[m] = QueueDescriptor::FLOAT; }
89 template <uint32 m> void serialize_member(QueueDescriptor& queue, uint32 i, float2 v) { queue.desc[m] = QueueDescriptor::FLOAT2; }
90 template <uint32 m> void serialize_member(QueueDescriptor& queue, uint32 i, float4 v) { queue.desc[m] = QueueDescriptor::FLOAT4; }
91 
93 {
94  typedef uint32 Entry;
95 
98  FERMAT_HOST_DEVICE
99  WavefrontQueue() : ptr(NULL), size(NULL), capacity(0) {}
100 
103  FERMAT_HOST_DEVICE
104  void setup(const QueueDescriptor& _desc, const uint32 _capacity)
105  {
106  capacity = _capacity;
107 
108  uint32 offset = 0;
109  for (uint32 i = 0; i < 16; ++i)
110  {
111  const uint32 el_size = _desc.size(i);
112 
113  // take care of the element alignment
114  offset = cugar::round_i( offset, el_size );
115 
116  // record the offset
117  offsets[i] = offset;
118 
119  // increase the offset
120  offset += el_size * capacity;
121  }
122  }
123 
126  FERMAT_HOST_DEVICE
127  uint32 byte_size() const { return offsets[ 15 ]; }
128 
131  FERMAT_HOST_DEVICE
132  void alloc(uint8* _ptr, uint32* _size) { ptr = _ptr; size = _size; }
133 
136  template <uint32 m, typename T>
137  FERMAT_HOST_DEVICE
138  T* member_base() const { return reinterpret_cast<T*>(ptr + offsets[m]); }
139 
142  template <uint32 m, typename T>
143  FERMAT_HOST_DEVICE
144  const T& member(const uint32 slot) const { return member_base<m>()[slot]; }
145 
148  template <uint32 m, typename T>
149  FERMAT_HOST_DEVICE
150  T& member(const uint32 slot) { return member_base<m>()[slot]; }
151 
154  FERMAT_DEVICE
155  uint32 append()
156  {
157  return cugar::cuda::warp_increment(size);
158  }
159 
162  FERMAT_HOST_DEVICE
163  void set_size(const uint32 _size) { *size = _size; }
164 
165  uint8* ptr;
166  uint32* size;
167  uint32 capacity;
168  uint32 offsets[16];
169 };
170 
171 template <uint32 m> void serialize_member(WavefrontQueue& queue, const uint32 slot, uint v) { queue.member<m,uint>(slot) = v; }
172 template <uint32 m> void serialize_member(WavefrontQueue& queue, const uint32 slot, uint2 v) { queue.member<m,uint2>(slot) = v; }
173 template <uint32 m> void serialize_member(WavefrontQueue& queue, const uint32 slot, uint4 v) { queue.member<m,uint4>(slot) = v; }
174 template <uint32 m> void serialize_member(WavefrontQueue& queue, const uint32 slot, float v) { queue.member<m,float>(slot) = v; }
175 template <uint32 m> void serialize_member(WavefrontQueue& queue, const uint32 slot, float2 v) { queue.member<m,float2>(slot) = v; }
176 template <uint32 m> void serialize_member(WavefrontQueue& queue, const uint32 slot, float4 v) { queue.member<m,float4>(slot) = v; }
177 
178 template <uint32 m> void deserialize_member(const WavefrontQueue& queue, const uint32 slot, uint& v) { v = queue.member<m,uint>(slot); }
179 template <uint32 m> void deserialize_member(const WavefrontQueue& queue, const uint32 slot, uint2& v) { v = queue.member<m,uint2>(slot); }
180 template <uint32 m> void deserialize_member(const WavefrontQueue& queue, const uint32 slot, uint4& v) { v = queue.member<m,uint4>(slot); }
181 template <uint32 m> void deserialize_member(const WavefrontQueue& queue, const uint32 slot, float& v) { v = queue.member<m,float>(slot); }
182 template <uint32 m> void deserialize_member(const WavefrontQueue& queue, const uint32 slot, float2& v) { v = queue.member<m,float2>(slot); }
183 template <uint32 m> void deserialize_member(const WavefrontQueue& queue, const uint32 slot, float4& v) { v = queue.member<m,float4>(slot); }
184 
188 {
191  FERMAT_HOST_DEVICE
192  void alloc(uint8* _ptr, uint32* _size) { ptr = _ptr + sizeof(float4)*3*capacity; size = _size; ray_ptr = _ptr; }
193 
194  FERMAT_HOST_DEVICE
195  Ray* rays() const { return (Ray*)ray_ptr; }
196 
197  FERMAT_HOST_DEVICE
198  Hit* hits() const { return (Hit*)(ray_ptr + sizeof(float4)*2*capacity); }
199 
200  uint8* ray_ptr;
201 };
202 
203 FERMAT_HOST_DEVICE
204 void serialize(RayWavefrontQueue& queue, const uint32 i, const Ray& ray)
205 {
206  reinterpret_cast<float4*>(queue.ray_ptr)[2*i + 0] = make_float4( ray.origin.x, ray.origin.y, ray.origin.x, ray.tmin );
207  reinterpret_cast<float4*>(queue.ray_ptr)[2*i + 1] = make_float4( ray.dir.x, ray.dir.y, ray.dir.x, ray.tmax );
208 }
209 FERMAT_HOST_DEVICE
210 void deserialize(const RayWavefrontQueue& queue, const uint32 i, Ray& ray, Hit& hit)
211 {
212  {
213  const float4 val = reinterpret_cast<const float4*>(queue.ray_ptr)[2*i + 0];
214  ray.origin.x = val.x;
215  ray.origin.y = val.y;
216  ray.origin.z = val.z;
217  ray.tmin = val.w;
218  }
219  {
220  const float4 val = reinterpret_cast<const float4*>(queue.ray_ptr)[2*i + 1];
221  ray.dir.x = val.x;
222  ray.dir.y = val.y;
223  ray.dir.z = val.z;
224  ray.tmax = val.w;
225  }
226  {
227  const float4 val = reinterpret_cast<const float4*>(queue.ray_ptr + sizeof(float4)*2*queue.capacity)[i];
228  hit.t = val.x;
229  hit.triId = val.y;
230  hit.u = val.z;
231  hit.v = val.w;
232  }
233 }
234 
235 #if 0
236  struct ScatteringPayload
237  {
238  PixelInfo pixel_info;
239  CacheInfo cache_info;
240  CacheInfo prev_cache_info;
241  float4 weight;
242  float2 cone;
243  float roughness;
244  };
245  template <typename TQueue, typename TQueueEntry>
246  FERMAT_HOST_DEVICE
247  void serialize(TQueue& queue, const TQueueEntry& i, const ScatteringPayload& payload)
248  {
249  serialize_member<0u>( queue, i, make_uint4(
250  payload.pixel_info.packed,
251  payload.cache_info.packed,
252  payload.prev_cache_info.packed,
253  cugar::binary_cast<uint32>(payload.roughness)) );
254 
255  serialize_member<1u>( queue, i, payload.cone );
256  serialize_member<2u>( queue, i, payload.weight );
257  }
258  template <typename TQueue, typename TQueueEntry>
259  FERMAT_HOST_DEVICE
260  void deserialize(const TQueue& queue, const TQueueEntry& i, ScatteringPayload& payload)
261  {
262  uint4 u;
263  deserialize_member<0u>( queue, i, u );
264  payload.pixel_info = PixelInfo(u.x);
265  payload.cache_info = CacheInfo(u.y);
266  payload.prev_cache_info = CacheInfo(u.z);
267  payload.roughness = cugar::binary_cast<float>(u.w);
268 
269  deserialize_member<1u>( queue, i, payload.cone );
270  deserialize_member<2u>( queue, i, payload.weight );
271  }
272 #endif
273 
FERMAT_HOST_DEVICE void setup(const QueueDescriptor &_desc, const uint32 _capacity)
Definition: wavefront_queues.h:104
__device__ __forceinline__ unsigned int warp_increment(unsigned int *ptr)
Definition: warp_atomics.h:56
Definition: wavefront_queues.h:92
CUGAR_HOST_DEVICE L round_i(const L x, const R y)
Definition: numbers.h:198
FERMAT_HOST_DEVICE const T & member(const uint32 slot) const
Definition: wavefront_queues.h:144
FERMAT_HOST_DEVICE T * member_base() const
Definition: wavefront_queues.h:138
FERMAT_HOST_DEVICE void alloc(uint8 *_ptr, uint32 *_size)
Definition: wavefront_queues.h:192
FERMAT_HOST_DEVICE void set_size(const uint32 _size)
Definition: wavefront_queues.h:163
Definition: wavefront_queues.h:187
FERMAT_HOST_DEVICE void alloc(uint8 *_ptr, uint32 *_size)
Definition: wavefront_queues.h:132
Definition: wavefront_queues.h:40
Definition: ray.h:42
Definition: ray.h:68
CUGAR_FORCEINLINE CUGAR_HOST_DEVICE Out binary_cast(const In in)
Definition: types.h:288
Definition: pathtracer_core.h:527
Define CUDA based warp adders.
FERMAT_HOST_DEVICE WavefrontQueue()
Definition: wavefront_queues.h:99
FERMAT_HOST_DEVICE T & member(const uint32 slot)
Definition: wavefront_queues.h:150