11 #ifndef CUBBYFLOW_CUDA_UTILS_HPP 12 #define CUBBYFLOW_CUDA_UTILS_HPP 14 #ifdef CUBBYFLOW_USE_CUDA 18 #include <cuda_runtime.h> 25 inline CUBBYFLOW_CUDA_HOST_DEVICE
unsigned int CUDADivRoundUp(
unsigned int a,
28 return (a % b != 0) ? (a / b + 1) : (a / b);
31 inline CUBBYFLOW_CUDA_HOST
void CUDAComputeGridSize(
unsigned int n,
32 unsigned int blockSize,
33 unsigned int& numBlocks,
34 unsigned int& numThreads)
36 numThreads = std::min(blockSize, n);
37 numBlocks = CUDADivRoundUp(n, numThreads);
40 inline CUBBYFLOW_CUDA_HOST_DEVICE float2
operator+(float2 a, float2 b)
42 return make_float2(a.x + b.x, a.y + b.y);
45 inline CUBBYFLOW_CUDA_HOST_DEVICE float3
operator+(float3 a, float3 b)
47 return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
50 inline CUBBYFLOW_CUDA_HOST_DEVICE float4
operator+(float4 a, float4 b)
52 return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
55 inline CUBBYFLOW_CUDA_HOST_DEVICE float2
operator-(float2 a, float2 b)
57 return make_float2(a.x - b.x, a.y - b.y);
60 inline CUBBYFLOW_CUDA_HOST_DEVICE float3
operator-(float3 a, float3 b)
62 return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
65 inline CUBBYFLOW_CUDA_HOST_DEVICE float4
operator-(float4 a, float4 b)
67 return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
70 inline CUBBYFLOW_CUDA_HOST_DEVICE float2
operator*(
float a, float2 b)
72 return make_float2(a * b.x, a * b.y);
75 inline CUBBYFLOW_CUDA_HOST_DEVICE float3
operator*(
float a, float3 b)
77 return make_float3(a * b.x, a * b.y, a * b.z);
80 inline CUBBYFLOW_CUDA_HOST_DEVICE float4
operator*(
float a, float4 b)
82 return make_float4(a * b.x, a * b.y, a * b.z, a * b.w);
85 inline CUBBYFLOW_CUDA_HOST_DEVICE float2
operator*(float2 a,
float b)
87 return make_float2(a.x * b, a.y * b);
90 inline CUBBYFLOW_CUDA_HOST_DEVICE float3
operator*(float3 a,
float b)
92 return make_float3(a.x * b, a.y * b, a.z * b);
95 inline CUBBYFLOW_CUDA_HOST_DEVICE float4
operator*(float4 a,
float b)
97 return make_float4(a.x * b, a.y * b, a.z * b, a.w * b);
100 inline CUBBYFLOW_CUDA_HOST_DEVICE float2
operator*(float2 a, float2 b)
102 return make_float2(a.x * b.x, a.y * b.y);
105 inline CUBBYFLOW_CUDA_HOST_DEVICE float3
operator*(float3 a, float3 b)
107 return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
110 inline CUBBYFLOW_CUDA_HOST_DEVICE float4
operator*(float4 a, float4 b)
112 return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
115 inline CUBBYFLOW_CUDA_HOST_DEVICE float2
operator/(
float a, float2 b)
117 return make_float2(a / b.x, a / b.y);
120 inline CUBBYFLOW_CUDA_HOST_DEVICE float3
operator/(
float a, float3 b)
122 return make_float3(a / b.x, a / b.y, a / b.z);
125 inline CUBBYFLOW_CUDA_HOST_DEVICE float4
operator/(
float a, float4 b)
127 return make_float4(a / b.x, a / b.y, a / b.z, a / b.w);
130 inline CUBBYFLOW_CUDA_HOST_DEVICE float2
operator/(float2 a,
float b)
132 return make_float2(a.x / b, a.y / b);
135 inline CUBBYFLOW_CUDA_HOST_DEVICE float3
operator/(float3 a,
float b)
137 return make_float3(a.x / b, a.y / b, a.z / b);
140 inline CUBBYFLOW_CUDA_HOST_DEVICE float4
operator/(float4 a,
float b)
142 return make_float4(a.x / b, a.y / b, a.z / b, a.w / b);
145 inline CUBBYFLOW_CUDA_HOST_DEVICE float2
operator/(float2 a, float2 b)
147 return make_float2(a.x / b.x, a.y / b.y);
150 inline CUBBYFLOW_CUDA_HOST_DEVICE float3
operator/(float3 a, float3 b)
152 return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
155 inline CUBBYFLOW_CUDA_HOST_DEVICE float4
operator/(float4 a, float4 b)
157 return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
160 inline CUBBYFLOW_CUDA_HOST_DEVICE
void operator+=(float2& a,
float b)
166 inline CUBBYFLOW_CUDA_HOST_DEVICE
void operator+=(float3& a,
float b)
173 inline CUBBYFLOW_CUDA_HOST_DEVICE
void operator+=(float4& a,
float b)
181 inline CUBBYFLOW_CUDA_HOST_DEVICE
void operator+=(float2& a, float2 b)
187 inline CUBBYFLOW_CUDA_HOST_DEVICE
void operator+=(float3& a, float3 b)
194 inline CUBBYFLOW_CUDA_HOST_DEVICE
void operator+=(float4& a, float4 b)
202 inline CUBBYFLOW_CUDA_HOST_DEVICE
void operator-=(float2& a,
float b)
208 inline CUBBYFLOW_CUDA_HOST_DEVICE
void operator-=(float3& a,
float b)
215 inline CUBBYFLOW_CUDA_HOST_DEVICE
void operator-=(float4& a,
float b)
223 inline CUBBYFLOW_CUDA_HOST_DEVICE
void operator-=(float2& a, float2 b)
229 inline CUBBYFLOW_CUDA_HOST_DEVICE
void operator-=(float3& a, float3 b)
236 inline CUBBYFLOW_CUDA_HOST_DEVICE
void operator-=(float4& a, float4 b)
244 inline CUBBYFLOW_CUDA_HOST_DEVICE
void operator*=(float2& a,
float b)
250 inline CUBBYFLOW_CUDA_HOST_DEVICE
void operator*=(float3& a,
float b)
257 inline CUBBYFLOW_CUDA_HOST_DEVICE
void operator*=(float4& a,
float b)
265 inline CUBBYFLOW_CUDA_HOST_DEVICE
void operator*=(float2& a, float2 b)
271 inline CUBBYFLOW_CUDA_HOST_DEVICE
void operator*=(float3& a, float4 b)
278 inline CUBBYFLOW_CUDA_HOST_DEVICE
void operator*=(float4& a, float4 b)
286 inline CUBBYFLOW_CUDA_HOST_DEVICE
void operator/=(float2& a,
float b)
292 inline CUBBYFLOW_CUDA_HOST_DEVICE
void operator/=(float3& a,
float b)
299 inline CUBBYFLOW_CUDA_HOST_DEVICE
void operator/=(float4& a,
float b)
307 inline CUBBYFLOW_CUDA_HOST_DEVICE
void operator/=(float2& a, float4 b)
313 inline CUBBYFLOW_CUDA_HOST_DEVICE
void operator/=(float3& a, float4 b)
320 inline CUBBYFLOW_CUDA_HOST_DEVICE
void operator/=(float4& a, float4 b)
328 inline CUBBYFLOW_CUDA_HOST_DEVICE
bool operator==(float2 a, float2 b)
330 return std::abs(a.x - b.x) < 1e-6f && std::abs(a.y - b.y) < 1e-6f;
333 inline CUBBYFLOW_CUDA_HOST_DEVICE
bool operator==(float3 a, float3 b)
335 return std::abs(a.x - b.x) < 1e-6f && std::abs(a.y - b.y) < 1e-6f &&
336 std::abs(a.z - b.z) < 1e-6f;
339 inline CUBBYFLOW_CUDA_HOST_DEVICE
bool operator==(float4 a, float4 b)
341 return std::abs(a.x - b.x) < 1e-6f && std::abs(a.y - b.y) < 1e-6f &&
342 std::abs(a.z - b.z) < 1e-6f && std::abs(a.w - b.w) < 1e-6f;
345 inline CUBBYFLOW_CUDA_HOST_DEVICE
float Dot(float2 a, float2 b)
347 return a.x * b.x + a.y * b.y;
350 inline CUBBYFLOW_CUDA_HOST_DEVICE
float Dot(float3 a, float3 b)
352 return a.x * b.x + a.y * b.y + a.z * b.z;
355 inline CUBBYFLOW_CUDA_HOST_DEVICE
float Dot(float4 a, float4 b)
357 return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
360 inline CUBBYFLOW_CUDA_HOST_DEVICE
float LengthSquared(float2 v)
365 inline CUBBYFLOW_CUDA_HOST_DEVICE
float LengthSquared(float3 v)
370 inline CUBBYFLOW_CUDA_HOST_DEVICE
float LengthSquared(float4 v)
375 inline CUBBYFLOW_CUDA_HOST_DEVICE
float Length(float2 v)
377 return sqrtf(LengthSquared(v));
380 inline CUBBYFLOW_CUDA_HOST_DEVICE
float Length(float3 v)
382 return sqrtf(LengthSquared(v));
385 inline CUBBYFLOW_CUDA_HOST_DEVICE
float Length(float4 v)
387 return sqrtf(LengthSquared(v));
390 template <
typename VectorType>
391 inline CUBBYFLOW_CUDA_HOST_DEVICE float2 ToFloat2(
const VectorType& vec)
393 return make_float2(vec.x, vec.y);
396 template <
typename VectorType>
397 inline CUBBYFLOW_CUDA_HOST_DEVICE float3 ToFloat3(
const VectorType& vec)
399 return make_float3(vec.x, vec.y, vec.z);
402 template <
typename VectorType>
403 inline CUBBYFLOW_CUDA_HOST_DEVICE float4 ToFloat4(
const VectorType& vec,
406 return make_float4(vec.x, vec.y, vec.z, w);
409 template <
typename VectorType>
410 inline CUBBYFLOW_CUDA_HOST_DEVICE float4 ToFloat4(
const VectorType& vec)
412 return make_float4(vec.x, vec.y, vec.z, vec.w);
415 template <
typename SizeType>
416 inline CUBBYFLOW_CUDA_HOST_DEVICE int2 ToInt2(
const SizeType& size)
418 return make_int2(static_cast<int>(size.x), static_cast<int>(size.y));
421 template <
typename SizeType>
422 inline CUBBYFLOW_CUDA_HOST_DEVICE int3 ToInt3(
const SizeType& size)
424 return make_int3(static_cast<int>(size.x), static_cast<int>(size.y),
425 static_cast<int>(size.z));
428 template <
typename SizeType>
429 inline CUBBYFLOW_CUDA_HOST_DEVICE uint2 ToUInt2(
const SizeType& size)
431 return make_uint2(static_cast<uint32_t>(size.x),
432 static_cast<uint32_t>(size.y));
435 template <
typename SizeType>
436 inline CUBBYFLOW_CUDA_HOST_DEVICE uint3 ToUInt3(
const SizeType& size)
438 return make_uint3(static_cast<uint32_t>(size.x),
439 static_cast<uint32_t>(size.y),
440 static_cast<uint32_t>(size.z));
MatrixCSR< T > operator-(const MatrixCSR< T > &a)
Definition: MatrixCSR-Impl.hpp:1029
constexpr std::enable_if_t< IsMatrixSizeStatic< Rows, Cols >), bool > operator==(const MatrixExpression< T, Rows, Cols, M1 > &a, const MatrixExpression< T, Rows, Cols, M2 > &b)
Definition: Matrix-Impl.hpp:1408
MatrixCSR< T > operator/(const MatrixCSR< T > &a, T b)
Definition: MatrixCSR-Impl.hpp:1090
void operator+=(Matrix< T, R1, C1 > &a, const MatrixExpression< T, R2, C2, M2 > &b)
Definition: Matrix-Impl.hpp:1342
Definition: pybind11Utils.hpp:20
void operator/=(Matrix< T, Rows, Cols > &a, const T &b)
Definition: Matrix-Impl.hpp:1400
MatrixCSR< T > operator+(const MatrixCSR< T > &a, const MatrixCSR< T > &b)
Definition: MatrixCSR-Impl.hpp:1035
void operator*=(Matrix< T, R1, C1 > &a, const MatrixExpression< T, R2, C2, M2 > &b)
Definition: Matrix-Impl.hpp:1366
void operator-=(Matrix< T, R1, C1 > &a, const MatrixExpression< T, R2, C2, M2 > &b)
Definition: Matrix-Impl.hpp:1354
Vector< T, 3 > operator*(const Quaternion< T > &q, const Vector< T, 3 > &v)
Returns quaternion q * vector v.
Definition: Quaternion-Impl.hpp:543