CUDAUtils.hpp
Go to the documentation of this file.
1 // This code is based on Jet framework.
2 // Copyright (c) 2018 Doyub Kim
3 // CubbyFlow is voxel-based fluid simulation engine for computer games.
4 // Copyright (c) 2020 CubbyFlow Team
5 // Core Part: Chris Ohk, Junwoo Hwang, Jihong Sin, Seungwoo Yoo
6 // AI Part: Dongheon Cho, Minseo Kim
7 // We are making my contributions/submissions to this project solely in our
8 // personal capacity and are not conveying any rights to any intellectual
9 // property of any third parties.
10 
11 #ifndef CUBBYFLOW_CUDA_UTILS_HPP
12 #define CUBBYFLOW_CUDA_UTILS_HPP
13 
14 #ifdef CUBBYFLOW_USE_CUDA
15 
16 #include <Core/Utils/Macros.hpp>
17 
18 #include <cuda_runtime.h>
19 
20 #include <algorithm>
21 #include <cstdint>
22 
23 namespace CubbyFlow
24 {
25 inline CUBBYFLOW_CUDA_HOST_DEVICE unsigned int CUDADivRoundUp(unsigned int a,
26  unsigned int b)
27 {
28  return (a % b != 0) ? (a / b + 1) : (a / b);
29 }
30 
31 inline CUBBYFLOW_CUDA_HOST void CUDAComputeGridSize(unsigned int n,
32  unsigned int blockSize,
33  unsigned int& numBlocks,
34  unsigned int& numThreads)
35 {
36  numThreads = std::min(blockSize, n);
37  numBlocks = CUDADivRoundUp(n, numThreads);
38 }
39 
40 inline CUBBYFLOW_CUDA_HOST_DEVICE float2 operator+(float2 a, float2 b)
41 {
42  return make_float2(a.x + b.x, a.y + b.y);
43 }
44 
45 inline CUBBYFLOW_CUDA_HOST_DEVICE float3 operator+(float3 a, float3 b)
46 {
47  return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
48 }
49 
50 inline CUBBYFLOW_CUDA_HOST_DEVICE float4 operator+(float4 a, float4 b)
51 {
52  return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
53 }
54 
55 inline CUBBYFLOW_CUDA_HOST_DEVICE float2 operator-(float2 a, float2 b)
56 {
57  return make_float2(a.x - b.x, a.y - b.y);
58 }
59 
60 inline CUBBYFLOW_CUDA_HOST_DEVICE float3 operator-(float3 a, float3 b)
61 {
62  return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);
63 }
64 
65 inline CUBBYFLOW_CUDA_HOST_DEVICE float4 operator-(float4 a, float4 b)
66 {
67  return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
68 }
69 
70 inline CUBBYFLOW_CUDA_HOST_DEVICE float2 operator*(float a, float2 b)
71 {
72  return make_float2(a * b.x, a * b.y);
73 }
74 
75 inline CUBBYFLOW_CUDA_HOST_DEVICE float3 operator*(float a, float3 b)
76 {
77  return make_float3(a * b.x, a * b.y, a * b.z);
78 }
79 
80 inline CUBBYFLOW_CUDA_HOST_DEVICE float4 operator*(float a, float4 b)
81 {
82  return make_float4(a * b.x, a * b.y, a * b.z, a * b.w);
83 }
84 
85 inline CUBBYFLOW_CUDA_HOST_DEVICE float2 operator*(float2 a, float b)
86 {
87  return make_float2(a.x * b, a.y * b);
88 }
89 
90 inline CUBBYFLOW_CUDA_HOST_DEVICE float3 operator*(float3 a, float b)
91 {
92  return make_float3(a.x * b, a.y * b, a.z * b);
93 }
94 
95 inline CUBBYFLOW_CUDA_HOST_DEVICE float4 operator*(float4 a, float b)
96 {
97  return make_float4(a.x * b, a.y * b, a.z * b, a.w * b);
98 }
99 
100 inline CUBBYFLOW_CUDA_HOST_DEVICE float2 operator*(float2 a, float2 b)
101 {
102  return make_float2(a.x * b.x, a.y * b.y);
103 }
104 
105 inline CUBBYFLOW_CUDA_HOST_DEVICE float3 operator*(float3 a, float3 b)
106 {
107  return make_float3(a.x * b.x, a.y * b.y, a.z * b.z);
108 }
109 
110 inline CUBBYFLOW_CUDA_HOST_DEVICE float4 operator*(float4 a, float4 b)
111 {
112  return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
113 }
114 
115 inline CUBBYFLOW_CUDA_HOST_DEVICE float2 operator/(float a, float2 b)
116 {
117  return make_float2(a / b.x, a / b.y);
118 }
119 
120 inline CUBBYFLOW_CUDA_HOST_DEVICE float3 operator/(float a, float3 b)
121 {
122  return make_float3(a / b.x, a / b.y, a / b.z);
123 }
124 
125 inline CUBBYFLOW_CUDA_HOST_DEVICE float4 operator/(float a, float4 b)
126 {
127  return make_float4(a / b.x, a / b.y, a / b.z, a / b.w);
128 }
129 
130 inline CUBBYFLOW_CUDA_HOST_DEVICE float2 operator/(float2 a, float b)
131 {
132  return make_float2(a.x / b, a.y / b);
133 }
134 
135 inline CUBBYFLOW_CUDA_HOST_DEVICE float3 operator/(float3 a, float b)
136 {
137  return make_float3(a.x / b, a.y / b, a.z / b);
138 }
139 
140 inline CUBBYFLOW_CUDA_HOST_DEVICE float4 operator/(float4 a, float b)
141 {
142  return make_float4(a.x / b, a.y / b, a.z / b, a.w / b);
143 }
144 
145 inline CUBBYFLOW_CUDA_HOST_DEVICE float2 operator/(float2 a, float2 b)
146 {
147  return make_float2(a.x / b.x, a.y / b.y);
148 }
149 
150 inline CUBBYFLOW_CUDA_HOST_DEVICE float3 operator/(float3 a, float3 b)
151 {
152  return make_float3(a.x / b.x, a.y / b.y, a.z / b.z);
153 }
154 
155 inline CUBBYFLOW_CUDA_HOST_DEVICE float4 operator/(float4 a, float4 b)
156 {
157  return make_float4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
158 }
159 
160 inline CUBBYFLOW_CUDA_HOST_DEVICE void operator+=(float2& a, float b)
161 {
162  a.x += b;
163  a.y += b;
164 }
165 
166 inline CUBBYFLOW_CUDA_HOST_DEVICE void operator+=(float3& a, float b)
167 {
168  a.x += b;
169  a.y += b;
170  a.z += b;
171 }
172 
173 inline CUBBYFLOW_CUDA_HOST_DEVICE void operator+=(float4& a, float b)
174 {
175  a.x += b;
176  a.y += b;
177  a.z += b;
178  a.w += b;
179 }
180 
181 inline CUBBYFLOW_CUDA_HOST_DEVICE void operator+=(float2& a, float2 b)
182 {
183  a.x += b.x;
184  a.y += b.y;
185 }
186 
187 inline CUBBYFLOW_CUDA_HOST_DEVICE void operator+=(float3& a, float3 b)
188 {
189  a.x += b.x;
190  a.y += b.y;
191  a.z += b.z;
192 }
193 
194 inline CUBBYFLOW_CUDA_HOST_DEVICE void operator+=(float4& a, float4 b)
195 {
196  a.x += b.x;
197  a.y += b.y;
198  a.z += b.z;
199  a.w += b.w;
200 }
201 
202 inline CUBBYFLOW_CUDA_HOST_DEVICE void operator-=(float2& a, float b)
203 {
204  a.x -= b;
205  a.y -= b;
206 }
207 
208 inline CUBBYFLOW_CUDA_HOST_DEVICE void operator-=(float3& a, float b)
209 {
210  a.x -= b;
211  a.y -= b;
212  a.z -= b;
213 }
214 
215 inline CUBBYFLOW_CUDA_HOST_DEVICE void operator-=(float4& a, float b)
216 {
217  a.x -= b;
218  a.y -= b;
219  a.z -= b;
220  a.w -= b;
221 }
222 
223 inline CUBBYFLOW_CUDA_HOST_DEVICE void operator-=(float2& a, float2 b)
224 {
225  a.x -= b.x;
226  a.y -= b.y;
227 }
228 
229 inline CUBBYFLOW_CUDA_HOST_DEVICE void operator-=(float3& a, float3 b)
230 {
231  a.x -= b.x;
232  a.y -= b.y;
233  a.z -= b.z;
234 }
235 
236 inline CUBBYFLOW_CUDA_HOST_DEVICE void operator-=(float4& a, float4 b)
237 {
238  a.x -= b.x;
239  a.y -= b.y;
240  a.z -= b.z;
241  a.w -= b.w;
242 }
243 
244 inline CUBBYFLOW_CUDA_HOST_DEVICE void operator*=(float2& a, float b)
245 {
246  a.x *= b;
247  a.y *= b;
248 }
249 
250 inline CUBBYFLOW_CUDA_HOST_DEVICE void operator*=(float3& a, float b)
251 {
252  a.x *= b;
253  a.y *= b;
254  a.z *= b;
255 }
256 
257 inline CUBBYFLOW_CUDA_HOST_DEVICE void operator*=(float4& a, float b)
258 {
259  a.x *= b;
260  a.y *= b;
261  a.z *= b;
262  a.w *= b;
263 }
264 
265 inline CUBBYFLOW_CUDA_HOST_DEVICE void operator*=(float2& a, float2 b)
266 {
267  a.x *= b.x;
268  a.y *= b.y;
269 }
270 
271 inline CUBBYFLOW_CUDA_HOST_DEVICE void operator*=(float3& a, float4 b)
272 {
273  a.x *= b.x;
274  a.y *= b.y;
275  a.z *= b.z;
276 }
277 
278 inline CUBBYFLOW_CUDA_HOST_DEVICE void operator*=(float4& a, float4 b)
279 {
280  a.x *= b.x;
281  a.y *= b.y;
282  a.z *= b.z;
283  a.w *= b.w;
284 }
285 
286 inline CUBBYFLOW_CUDA_HOST_DEVICE void operator/=(float2& a, float b)
287 {
288  a.x /= b;
289  a.y /= b;
290 }
291 
292 inline CUBBYFLOW_CUDA_HOST_DEVICE void operator/=(float3& a, float b)
293 {
294  a.x /= b;
295  a.y /= b;
296  a.z /= b;
297 }
298 
299 inline CUBBYFLOW_CUDA_HOST_DEVICE void operator/=(float4& a, float b)
300 {
301  a.x /= b;
302  a.y /= b;
303  a.z /= b;
304  a.w /= b;
305 }
306 
307 inline CUBBYFLOW_CUDA_HOST_DEVICE void operator/=(float2& a, float4 b)
308 {
309  a.x /= b.x;
310  a.y /= b.y;
311 }
312 
313 inline CUBBYFLOW_CUDA_HOST_DEVICE void operator/=(float3& a, float4 b)
314 {
315  a.x /= b.x;
316  a.y /= b.y;
317  a.z /= b.z;
318 }
319 
320 inline CUBBYFLOW_CUDA_HOST_DEVICE void operator/=(float4& a, float4 b)
321 {
322  a.x /= b.x;
323  a.y /= b.y;
324  a.z /= b.z;
325  a.w /= b.w;
326 }
327 
328 inline CUBBYFLOW_CUDA_HOST_DEVICE bool operator==(float2 a, float2 b)
329 {
330  return std::abs(a.x - b.x) < 1e-6f && std::abs(a.y - b.y) < 1e-6f;
331 }
332 
333 inline CUBBYFLOW_CUDA_HOST_DEVICE bool operator==(float3 a, float3 b)
334 {
335  return std::abs(a.x - b.x) < 1e-6f && std::abs(a.y - b.y) < 1e-6f &&
336  std::abs(a.z - b.z) < 1e-6f;
337 }
338 
339 inline CUBBYFLOW_CUDA_HOST_DEVICE bool operator==(float4 a, float4 b)
340 {
341  return std::abs(a.x - b.x) < 1e-6f && std::abs(a.y - b.y) < 1e-6f &&
342  std::abs(a.z - b.z) < 1e-6f && std::abs(a.w - b.w) < 1e-6f;
343 }
344 
345 inline CUBBYFLOW_CUDA_HOST_DEVICE float Dot(float2 a, float2 b)
346 {
347  return a.x * b.x + a.y * b.y;
348 }
349 
350 inline CUBBYFLOW_CUDA_HOST_DEVICE float Dot(float3 a, float3 b)
351 {
352  return a.x * b.x + a.y * b.y + a.z * b.z;
353 }
354 
355 inline CUBBYFLOW_CUDA_HOST_DEVICE float Dot(float4 a, float4 b)
356 {
357  return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;
358 }
359 
360 inline CUBBYFLOW_CUDA_HOST_DEVICE float LengthSquared(float2 v)
361 {
362  return Dot(v, v);
363 }
364 
365 inline CUBBYFLOW_CUDA_HOST_DEVICE float LengthSquared(float3 v)
366 {
367  return Dot(v, v);
368 }
369 
370 inline CUBBYFLOW_CUDA_HOST_DEVICE float LengthSquared(float4 v)
371 {
372  return Dot(v, v);
373 }
374 
375 inline CUBBYFLOW_CUDA_HOST_DEVICE float Length(float2 v)
376 {
377  return sqrtf(LengthSquared(v));
378 }
379 
380 inline CUBBYFLOW_CUDA_HOST_DEVICE float Length(float3 v)
381 {
382  return sqrtf(LengthSquared(v));
383 }
384 
385 inline CUBBYFLOW_CUDA_HOST_DEVICE float Length(float4 v)
386 {
387  return sqrtf(LengthSquared(v));
388 }
389 
390 template <typename VectorType>
391 inline CUBBYFLOW_CUDA_HOST_DEVICE float2 ToFloat2(const VectorType& vec)
392 {
393  return make_float2(vec.x, vec.y);
394 }
395 
396 template <typename VectorType>
397 inline CUBBYFLOW_CUDA_HOST_DEVICE float3 ToFloat3(const VectorType& vec)
398 {
399  return make_float3(vec.x, vec.y, vec.z);
400 }
401 
402 template <typename VectorType>
403 inline CUBBYFLOW_CUDA_HOST_DEVICE float4 ToFloat4(const VectorType& vec,
404  float w)
405 {
406  return make_float4(vec.x, vec.y, vec.z, w);
407 }
408 
409 template <typename VectorType>
410 inline CUBBYFLOW_CUDA_HOST_DEVICE float4 ToFloat4(const VectorType& vec)
411 {
412  return make_float4(vec.x, vec.y, vec.z, vec.w);
413 }
414 
415 template <typename SizeType>
416 inline CUBBYFLOW_CUDA_HOST_DEVICE int2 ToInt2(const SizeType& size)
417 {
418  return make_int2(static_cast<int>(size.x), static_cast<int>(size.y));
419 }
420 
421 template <typename SizeType>
422 inline CUBBYFLOW_CUDA_HOST_DEVICE int3 ToInt3(const SizeType& size)
423 {
424  return make_int3(static_cast<int>(size.x), static_cast<int>(size.y),
425  static_cast<int>(size.z));
426 }
427 
428 template <typename SizeType>
429 inline CUBBYFLOW_CUDA_HOST_DEVICE uint2 ToUInt2(const SizeType& size)
430 {
431  return make_uint2(static_cast<uint32_t>(size.x),
432  static_cast<uint32_t>(size.y));
433 }
434 
435 template <typename SizeType>
436 inline CUBBYFLOW_CUDA_HOST_DEVICE uint3 ToUInt3(const SizeType& size)
437 {
438  return make_uint3(static_cast<uint32_t>(size.x),
439  static_cast<uint32_t>(size.y),
440  static_cast<uint32_t>(size.z));
441 }
442 } // namespace CubbyFlow
443 
444 #endif
445 
446 #endif
MatrixCSR< T > operator-(const MatrixCSR< T > &a)
Definition: MatrixCSR-Impl.hpp:1029
constexpr std::enable_if_t< IsMatrixSizeStatic< Rows, Cols >), bool > operator==(const MatrixExpression< T, Rows, Cols, M1 > &a, const MatrixExpression< T, Rows, Cols, M2 > &b)
Definition: Matrix-Impl.hpp:1408
MatrixCSR< T > operator/(const MatrixCSR< T > &a, T b)
Definition: MatrixCSR-Impl.hpp:1090
void operator+=(Matrix< T, R1, C1 > &a, const MatrixExpression< T, R2, C2, M2 > &b)
Definition: Matrix-Impl.hpp:1342
Definition: pybind11Utils.hpp:20
void operator/=(Matrix< T, Rows, Cols > &a, const T &b)
Definition: Matrix-Impl.hpp:1400
MatrixCSR< T > operator+(const MatrixCSR< T > &a, const MatrixCSR< T > &b)
Definition: MatrixCSR-Impl.hpp:1035
void operator*=(Matrix< T, R1, C1 > &a, const MatrixExpression< T, R2, C2, M2 > &b)
Definition: Matrix-Impl.hpp:1366
void operator-=(Matrix< T, R1, C1 > &a, const MatrixExpression< T, R2, C2, M2 > &b)
Definition: Matrix-Impl.hpp:1354
Vector< T, 3 > operator*(const Quaternion< T > &q, const Vector< T, 3 > &v)
Returns quaternion q * vector v.
Definition: Quaternion-Impl.hpp:543