33 #include <cuda_runtime.h> 40 #define CHK_CUDA( code ) \ 42 cudaError_t err__ = code; \ 43 if( err__ != cudaSuccess ) \ 45 std::cerr << "Error on line " << __LINE__ << ":" \ 46 << cudaGetErrorString( err__ ) << std::endl; \ 78 : m_ptr(0), m_count(0)
84 : m_ptr(0), m_count(0)
93 memcpy(m_ptr, src.ptr(), sizeInBytes());
98 void alloc(
size_t count)
102 cudaMallocManaged(&m_ptr,
sizeof(T)*count);
118 size_t count()
const {
return m_count; }
119 size_t sizeInBytes()
const {
return m_count *
sizeof(T); }
120 const T* ptr()
const {
return m_ptr; }
121 T* ptr() {
return m_ptr; }
136 Buffer(
size_t count = 0, BufferType type = HOST_BUFFER, PageLockedState pageLockedState = UNLOCKED)
141 m_pageLockedState( pageLockedState )
143 alloc( count, type, pageLockedState );
148 this->operator=(src);
155 if (src.type() == HOST_BUFFER)
157 if (type() == HOST_BUFFER)
158 memcpy(m_ptr, src.ptr(), sizeInBytes());
160 cudaMemcpy(m_ptr, src.ptr(), sizeInBytes(), cudaMemcpyHostToDevice);
164 if (type() == HOST_BUFFER)
165 cudaMemcpy(m_ptr, src.ptr(), sizeInBytes(), cudaMemcpyDeviceToHost);
167 cudaMemcpy(m_ptr, src.ptr(), sizeInBytes(), cudaMemcpyDeviceToDevice);
175 if (type() == HOST_BUFFER)
176 memcpy(m_ptr, src.ptr(), sizeInBytes());
178 cudaMemcpy(m_ptr, src.ptr(), sizeInBytes(), cudaMemcpyHostToDevice);
184 void alloc(
size_t count )
186 alloc( count, m_type, m_pageLockedState );
189 void alloc(
size_t count, BufferType type, PageLockedState pageLockedState = UNLOCKED)
198 if (m_type == HOST_BUFFER)
200 m_ptr =
new T[m_count];
201 if( pageLockedState == LOCKED )
202 cudaHostRegister( m_ptr, sizeInBytes(), cudaHostRegisterDefault );
203 m_pageLockedState = pageLockedState;
207 CHK_CUDA( cudaGetDevice( &m_device ) );
208 CHK_CUDA( cudaMalloc( &m_ptr, sizeInBytes() ) );
213 void resize(
const size_t count)
215 Buffer<T> buffer( count, m_type, m_pageLockedState );
216 buffer.copy_from( count < m_count ? count : m_count, m_type, m_ptr );
221 void copy_from(
const size_t count,
const BufferType src_type,
const T* src,
const uint32 dst_offset = 0)
223 assert(dst_offset + count <= m_count);
227 if (m_type == HOST_BUFFER)
229 if (src_type == HOST_BUFFER)
230 memcpy( m_ptr + dst_offset, src,
sizeof(T)*count );
233 CHK_CUDA( cudaMemcpy( m_ptr + dst_offset, src,
sizeof(T)*count, cudaMemcpyDeviceToHost ) );
238 if (src_type == HOST_BUFFER)
240 CHK_CUDA( cudaMemcpy( m_ptr + dst_offset, src,
sizeof(T)*count, cudaMemcpyHostToDevice ) );
244 CHK_CUDA( cudaMemcpy( m_ptr + dst_offset, src,
sizeof(T)*count, cudaMemcpyDeviceToDevice ) );
249 void clear(
const uint8 byte)
251 if (m_type == HOST_BUFFER)
252 memset(m_ptr, byte, sizeInBytes());
254 cudaMemset(m_ptr, byte, sizeInBytes());
261 if (m_type == HOST_BUFFER)
263 if (m_pageLockedState == LOCKED)
264 cudaHostUnregister(m_ptr);
270 CHK_CUDA(cudaGetDevice(&oldDevice));
271 CHK_CUDA(cudaSetDevice(m_device));
272 CHK_CUDA(cudaFree(m_ptr));
273 CHK_CUDA(cudaSetDevice(oldDevice));
283 size_t count()
const {
return m_count; }
284 size_t sizeInBytes()
const {
return m_count *
sizeof(T); }
285 const T* ptr()
const {
return m_ptr; }
286 T* ptr() {
return m_ptr; }
287 BufferType type()
const {
return m_type; }
289 T operator[] (
const size_t i)
const 291 if (m_type == HOST_BUFFER)
296 cudaMemcpy( &t, m_ptr + i,
sizeof(T), cudaMemcpyDeviceToHost);
301 T& operator[] (
const size_t i)
303 if (m_type == HOST_BUFFER)
308 cudaMemcpy( &t, m_ptr + i,
sizeof(T), cudaMemcpyDeviceToHost);
313 void set(
const size_t i,
const T val)
315 if (m_type == HOST_BUFFER)
318 cudaMemcpy(m_ptr + i, &val,
sizeof(T), cudaMemcpyHostToDevice);
323 std::swap(m_type, buf.m_type);
324 std::swap(m_ptr, buf.m_ptr);
325 std::swap(m_device, buf.m_device);
326 std::swap(m_count, buf.m_count);
327 std::swap(m_pageLockedState, buf.m_pageLockedState);
335 PageLockedState m_pageLockedState;
342 template<BufferType TYPE,
typename T>
346 DomainBuffer(
size_t count = 0, PageLockedState pageLockedState = UNLOCKED)
347 :
Buffer(count, TYPE, pageLockedState)
350 template <BufferType UTYPE>
353 this->operator=(src);
374 inline float3 ptr_to_float3(
const float* v) {
return make_float3(v[0], v[1], v[2]); }
Definition: buffers.h:343