11 #ifndef CUBBYFLOW_CUDA_ALGORITHMS_HPP 12 #define CUBBYFLOW_CUDA_ALGORITHMS_HPP 14 #ifdef CUBBYFLOW_USE_CUDA 25 __global__
void CUDAFillKernel(T* dst,
size_t n, T val)
27 size_t i = blockIdx.x * blockDim.x + threadIdx.x;
36 void CUDAFill(T* dst,
size_t n,
const T& val)
43 unsigned int numBlocks, numThreads;
44 CUDAComputeGridSize((
unsigned int)n, 256, numBlocks, numThreads);
45 CUDAFillKernel<<<numBlocks, numThreads>>>(dst, n, val);
47 CUBBYFLOW_CUDA_CHECK_LAST_ERROR(
"Failed executing CUDAFillKernel");
53 __host__ __device__
inline void CUDASwap(T& a, T& b)
63 void CUDACopy(
const T* src,
size_t n, T* dst,
64 cudaMemcpyKind kind = cudaMemcpyDeviceToDevice)
66 CUBBYFLOW_CUDA_CHECK(cudaMemcpy(dst, src, n *
sizeof(T), kind));
70 void CUDACopyDeviceToDevice(
const T* src,
size_t n, T* dst)
72 CUDACopy(src, n, dst, cudaMemcpyDeviceToDevice);
76 void CUDACopyHostToDevice(
const T* src,
size_t n, T* dst)
78 CUDACopy(src, n, dst, cudaMemcpyHostToDevice);
82 void CUDACopyDeviceToHost(
const T* src,
size_t n, T* dst)
84 CUDACopy(src, n, dst, cudaMemcpyDeviceToHost);
Definition: pybind11Utils.hpp:20