CUDAArray-Impl.hpp
Go to the documentation of this file.
1 // This code is based on Jet framework.
2 // Copyright (c) 2018 Doyub Kim
3 // CubbyFlow is voxel-based fluid simulation engine for computer games.
4 // Copyright (c) 2020 CubbyFlow Team
5 // Core Part: Chris Ohk, Junwoo Hwang, Jihong Sin, Seungwoo Yoo
6 // AI Part: Dongheon Cho, Minseo Kim
7 // We are making my contributions/submissions to this project solely in our
8 // personal capacity and are not conveying any rights to any intellectual
9 // property of any third parties.
10 
11 #ifndef CUBBYFLOW_CUDA_ARRAY_IMPL_HPP
12 #define CUBBYFLOW_CUDA_ARRAY_IMPL_HPP
13 
14 #ifdef CUBBYFLOW_USE_CUDA
15 
16 namespace CubbyFlow
17 {
18 #ifdef __CUDACC__
19 namespace Internal
20 {
21 template <typename T, size_t N, size_t I>
22 struct CUDABlockCopyHelper
23 {
24  template <typename... RemainingIndices>
25  CUBBYFLOW_CUDA_HOST_DEVICE static void Call(CUDAArrayView<const T, N> src,
26  CUDAStdArray<size_t, N> size,
27  CUDAArrayView<T, N> dst,
28  RemainingIndices... indices)
29  {
30  for (size_t i = 0; i < size[I - 1]; ++i)
31  {
32  CUDABlockCopyHelper<T, N, I - 1>::Call(src, size, dst, i,
33  indices...);
34  }
35  }
36 };
37 
38 template <typename T, size_t N>
39 struct CUDABlockCopyHelper<T, N, 1>
40 {
41  template <typename... RemainingIndices>
42  CUBBYFLOW_CUDA_HOST_DEVICE static void Call(CUDAArrayView<const T, N> src,
43  CUDAStdArray<size_t, N> size,
44  CUDAArrayView<T, N> dst,
45  RemainingIndices... indices)
46  {
47  for (size_t i = 0; i < size[0]; ++i)
48  {
49  dst(i, indices...) = src(i, indices...);
50  }
51  }
52 };
53 
54 template <typename T, size_t N>
55 __global__ void CUDABlockCopyKernelN(CUDAArrayView<const T, N> src,
56  CUDAStdArray<size_t, N> size,
57  CUDAArrayView<T, N> dst)
58 {
59  size_t i = blockIdx.x * blockDim.x + threadIdx.x;
60  if (i < size[N - 1])
61  {
62  CUDABlockCopyHelper<T, N, N - 1>::Call(src, size, dst, i);
63  }
64 }
65 
66 template <typename T>
67 __global__ void CUDABlockCopyKernel1(CUDAArrayView<const T, 1> src,
68  CUDAStdArray<size_t, 1> size,
69  CUDAArrayView<T, 1> dst)
70 {
71  size_t i = blockIdx.x * blockDim.x + threadIdx.x;
72  if (i < size[0])
73  {
74  dst[i] = src[i];
75  }
76 }
77 
78 template <typename T, size_t N>
79 struct CUDABlockCopy
80 {
81  static void Call(CUDAArrayView<const T, N> src,
82  CUDAStdArray<size_t, N> size, CUDAArrayView<T, N> dst)
83  {
84  if (size[N - 1] == 0)
85  {
86  return;
87  }
88 
89  // Assuming i-major
90  unsigned int numBlocks, numThreads;
91  CUDAComputeGridSize((unsigned int)size[N - 1], 256, numBlocks,
92  numThreads);
93  CUDABlockCopyKernelN<<<numBlocks, numThreads>>>(src, size, dst);
94  CUBBYFLOW_CUDA_CHECK_LAST_ERROR(
95  "Failed executing CUDABlockCopyKernelN");
96  }
97 };
98 
99 template <typename T>
100 struct CUDABlockCopy<T, 1>
101 {
102  static void Call(CUDAArrayView<const T, 1> src,
103  CUDAStdArray<size_t, 1> size, CUDAArrayView<T, 1> dst)
104  {
105  if (size[0] == 0)
106  {
107  return;
108  }
109 
110  // Assuming i-major
111  unsigned int numBlocks, numThreads;
112  CUDAComputeGridSize((unsigned int)size[0], 256, numBlocks, numThreads);
113  CUDABlockCopyKernel1<<<numBlocks, numThreads>>>(src, size, dst);
114  CUBBYFLOW_CUDA_CHECK_LAST_ERROR(
115  "Failed executing CUDABlockCopyKernel1");
116  }
117 };
118 } // namespace Internal
119 #endif
120 
121 template <typename T, size_t N>
122 CUDAArray<T, N>::CUDAArray() : Base()
123 {
124  // Do nothing
125 }
126 
127 template <typename T, size_t N>
128 CUDAArray<T, N>::CUDAArray(const CUDAStdArray<size_t, N>& size,
129  const T& initVal)
130  : CUDAArray{}
131 {
132  size_t l = size[0];
133 
134  for (size_t i = 1; i < N; ++i)
135  {
136  l *= size[i];
137  }
138 
139  m_data.Resize(l, initVal);
140  Base::SetPtrAndSize(m_data.data(), size);
141 }
142 
143 #ifdef __CUDACC__
144 template <typename T, size_t N>
145 template <typename... Args>
146 CUDAArray<T, N>::CUDAArray(size_t nx, Args... args) : CUDAArray{}
147 {
148  Vector<size_t, N> newSizeVec;
149  T initVal;
150  Internal::GetSizeAndInitVal<T, N, N - 1>::Call(newSizeVec, initVal, nx,
151  args...);
152  CUDAStdArray<size_t, N> newSize(newSizeVec);
153  CUDAArray newArray(newSize, initVal);
154  *this = std::move(newArray);
155 }
156 
157 template <typename T, size_t N>
158 CUDAArray<T, N>::CUDAArray(NestedInitializerListsT<T, N> lst) : CUDAArray{}
159 {
160  Vector<size_t, N> newSize;
162 
163  Array<T, N> newCpuArray(newSize);
165  CopyFrom(newCpuArray);
166 }
167 #endif
168 
169 template <typename T, size_t N>
170 template <size_t M>
171 CUDAArray<T, N>::CUDAArray(
172  const std::enable_if_t<(M == 1), std::vector<T>>& vec)
173  : CUDAArray()
174 {
175  CopyFrom(vec);
176 }
177 
178 template <typename T, size_t N>
179 template <typename OtherDerived>
180 CUDAArray<T, N>::CUDAArray(const ArrayBase<T, N, OtherDerived>& other)
181  : CUDAArray()
182 {
183  CopyFrom(other);
184 }
185 
186 template <typename T, size_t N>
187 template <typename OtherDerived>
188 CUDAArray<T, N>::CUDAArray(const CUDAArrayBase<T, N, OtherDerived>& other)
189  : CUDAArray()
190 {
191  CopyFrom(other);
192 }
193 
194 template <typename T, size_t N>
195 CUDAArray<T, N>::CUDAArray(const CUDAArray& other) : CUDAArray{}
196 {
197  CopyFrom(other);
198 }
199 
200 template <typename T, size_t N>
201 CUDAArray<T, N>::CUDAArray(CUDAArray&& other) noexcept : CUDAArray{}
202 {
203  *this = std::move(other);
204 }
205 
206 template <typename T, size_t N>
207 template <size_t M>
208 CUDAArray<T, N>& CUDAArray<T, N>::operator=(
209  const std::enable_if_t<(M == 1), std::vector<T>>& vec)
210 {
211  CopyFrom(vec);
212  return *this;
213 }
214 
215 template <typename T, size_t N>
216 template <typename OtherDerived>
217 CUDAArray<T, N>& CUDAArray<T, N>::operator=(
218  const ArrayBase<T, N, OtherDerived>& other)
219 {
220  CopyFrom(other);
221  return *this;
222 }
223 
224 template <typename T, size_t N>
225 template <typename OtherDerived>
226 CUDAArray<T, N>& CUDAArray<T, N>::operator=(
227  const ArrayBase<const T, N, OtherDerived>& other)
228 {
229  CopyFrom(other);
230  return *this;
231 }
232 
233 template <typename T, size_t N>
234 template <typename OtherDerived>
235 CUDAArray<T, N>& CUDAArray<T, N>::operator=(
236  const CUDAArrayBase<T, N, OtherDerived>& other)
237 {
238  CopyFrom(other);
239  return *this;
240 }
241 
242 template <typename T, size_t N>
243 template <typename OtherDerived>
244 CUDAArray<T, N>& CUDAArray<T, N>::operator=(
245  const CUDAArrayBase<const T, N, OtherDerived>& other)
246 {
247  CopyFrom(other);
248  return *this;
249 }
250 
251 template <typename T, size_t N>
252 CUDAArray<T, N>& CUDAArray<T, N>::operator=(const CUDAArray& other)
253 {
254  m_data = other.m_data;
255  Base::SetPtrAndSize(m_data.data(), other.Size());
256  return *this;
257 }
258 
259 template <typename T, size_t N>
260 CUDAArray<T, N>& CUDAArray<T, N>::operator=(CUDAArray&& other) noexcept
261 {
262  Swap(other);
263  other.Clear();
264  return *this;
265 }
266 
267 template <typename T, size_t N>
268 template <typename A, size_t M>
269 std::enable_if_t<(M == 1), void> CUDAArray<T, N>::CopyFrom(
270  const std::vector<T, A>& vec)
271 {
272  CUDAArray newArray(vec.size());
273  newArray.m_data.CopyFrom(vec);
274  newArray.SetPtrAndSize(newArray.m_data.data(), newArray.Size());
275  *this = std::move(newArray);
276 }
277 
278 template <typename T, size_t N>
279 template <typename OtherDerived>
280 void CUDAArray<T, N>::CopyFrom(const ArrayBase<T, N, OtherDerived>& other)
281 {
282  CUDAArray newArray(other.Size());
283  CUDACopyHostToDevice(other.data(), other.Length(), newArray.data());
284  *this = std::move(newArray);
285 }
286 
287 template <typename T, size_t N>
288 template <typename OtherDerived>
289 void CUDAArray<T, N>::CopyFrom(const ArrayBase<const T, N, OtherDerived>& other)
290 {
291  CUDAArray newArray(other.Size());
292  CUDACopyHostToDevice(other.data(), other.Length(), newArray.data());
293  *this = std::move(newArray);
294 }
295 
296 template <typename T, size_t N>
297 template <typename OtherDerived>
298 void CUDAArray<T, N>::CopyFrom(const CUDAArrayBase<T, N, OtherDerived>& other)
299 {
300  CUDAArray newArray(other.Size());
301  CUDACopyDeviceToDevice(other.data(), other.Length(), newArray.data());
302  *this = std::move(newArray);
303 }
304 
305 template <typename T, size_t N>
306 template <typename OtherDerived>
307 void CUDAArray<T, N>::CopyFrom(
308  const CUDAArrayBase<const T, N, OtherDerived>& other)
309 {
310  CUDAArray newArray(other.Size());
311  CUDACopyDeviceToDevice(other.data(), other.Length(), newArray.data());
312  *this = std::move(newArray);
313 }
314 
315 template <typename T, size_t N>
316 template <typename A, size_t M>
317 std::enable_if_t<(M == 1), void> CUDAArray<T, N>::CopyTo(std::vector<T, A>& vec)
318 {
319  vec.resize(Length());
320  CUDACopyDeviceToHost(data(), Length(), vec.data());
321 }
322 
323 template <typename T, size_t N>
324 void CUDAArray<T, N>::CopyTo(Array<T, N>& other)
325 {
326  other.Resize(m_size.ToVector());
327  CUDACopyDeviceToHost(data(), Length(), other.data());
328 }
329 
330 template <typename T, size_t N>
331 void CUDAArray<T, N>::CopyTo(ArrayView<T, N>& other)
332 {
333  assert(m_size.ToVector() == other.Size());
334  CUDACopyDeviceToHost(data(), Length(), other.data());
335 }
336 
337 template <typename T, size_t N>
338 void CUDAArray<T, N>::CopyTo(CUDAArray<T, N>& other)
339 {
340  other.Resize(m_size.ToVector());
341  CUDACopyDeviceToDevice(data(), Length(), other.data());
342 }
343 
344 template <typename T, size_t N>
345 void CUDAArray<T, N>::CopyTo(CUDAArrayView<T, N>& other)
346 {
347  assert(Length() == other.Length());
348  CUDACopyDeviceToDevice(data(), Length(), other.data());
349 }
350 
351 template <typename T, size_t N>
352 void CUDAArray<T, N>::Fill(const T& val)
353 {
354  m_data.Fill(val);
355 }
356 
357 #ifdef __CUDACC__
358 template <typename T, size_t N>
359 void CUDAArray<T, N>::Resize(CUDAStdArray<size_t, N> newSize, const T& initVal)
360 {
361  CUDAArray newArray(newSize, initVal);
362  CUDAStdArray<size_t, N> minSize;
363 
364  for (size_t i = 0; i < N; ++i)
365  {
366  minSize[i] = std::min(m_size[i], newArray.m_size[i]);
367  }
368 
369  Internal::CUDABlockCopy<T, N>::Call(View(), minSize, newArray.View());
370 
371  *this = std::move(newArray);
372 }
373 
374 template <typename T, size_t N>
375 template <typename... Args>
376 void CUDAArray<T, N>::Resize(size_t nx, Args... args)
377 {
378  Vector<size_t, N> newSizeVec;
379  T initVal;
380 
381  Internal::GetSizeAndInitVal<T, N, N - 1>::Call(newSizeVec, initVal, nx,
382  args...);
383 
384  CUDAStdArray<size_t, N> newSize(newSizeVec);
385  Resize(newSize, initVal);
386 }
387 #endif
388 
389 template <typename T, size_t N>
390 template <size_t M>
391 std::enable_if_t<(M == 1), void> CUDAArray<T, N>::Append(const T& val)
392 {
393  m_data.PushBack(val);
394  Base::SetPtrAndSize(m_data.data(), m_data.Size());
395 }
396 
397 template <typename T, size_t N>
398 template <typename A, size_t M>
399 std::enable_if_t<(M == 1), void> CUDAArray<T, N>::Append(
400  const std::vector<T, A>& extra)
401 {
402  m_data.Append(extra);
403  m_size[0] = m_data.size();
404 }
405 
406 template <typename T, size_t N>
407 template <typename OtherDerived, size_t M>
408 std::enable_if_t<(M == 1), void> CUDAArray<T, N>::Append(
409  const ArrayBase<T, N, OtherDerived>& extra)
410 {
411  CUDAArray newArray(Length() + extra.Length());
412  CUDACopy(data(), Length(), newArray.data());
413  CUDACopyHostToDevice(extra.data(), extra.Length(),
414  newArray.data() + m_size[0]);
415  Swap(newArray);
416 }
417 
418 template <typename T, size_t N>
419 template <typename OtherDerived, size_t M>
420 std::enable_if_t<(M == 1), void> CUDAArray<T, N>::Append(
421  const CUDAArrayBase<T, N, OtherDerived>& extra)
422 {
423  CUDAArray newArray(Length() + extra.Length());
424  CUDACopy(data(), Length(), newArray.data());
425  CUDACopy(extra.data(), extra.Length(), newArray.data() + m_size[0]);
426  Swap(newArray);
427 }
428 
429 template <typename T, size_t N>
430 void CUDAArray<T, N>::Clear()
431 {
433  m_data.Clear();
434 }
435 
436 template <typename T, size_t N>
437 void CUDAArray<T, N>::Swap(CUDAArray& other)
438 {
439  Base::SwapPtrAndSize(other);
440  m_data.Swap(other.m_data);
441 }
442 
443 template <typename T, size_t N>
444 CUDAArrayView<T, N> CUDAArray<T, N>::View()
445 {
446  return CUDAArrayView<T, N>(*this);
447 };
448 
449 template <typename T, size_t N>
450 CUDAArrayView<const T, N> CUDAArray<T, N>::View() const
451 {
452  return CUDAArrayView<const T, N>(*this);
453 };
454 } // namespace CubbyFlow
455 
456 #endif
457 
458 #endif
void SwapPtrAndSize(ArrayBase &other)
Definition: ArrayBase-Impl.hpp:269
void ClearPtrAndSize()
Definition: ArrayBase-Impl.hpp:263
static size_t Call(Vector< size_t, N > &size, NestedInitializerListsT< T, I > lst)
Definition: Array-Impl.hpp:54
Vector< size_t, N > m_size
Definition: ArrayBase.hpp:125
Definition: pybind11Utils.hpp:20
void Fill(ArrayView< T, N > a, const Vector< size_t, N > &begin, const Vector< size_t, N > &end, const T &val)
Definition: ArrayUtils-Impl.hpp:19
static void Call(Vector< size_t, N > &size, T &value, size_t n, Args... args)
Definition: Array-Impl.hpp:27
size_t Length() const
Definition: ArrayBase-Impl.hpp:84
Pointer data()
Definition: ArrayBase-Impl.hpp:39
void SetPtrAndSize(Pointer ptr, size_t ni, Args... args)
Definition: ArrayBase-Impl.hpp:250
static void Call(Array< T, N > &arr, NestedInitializerListsT< T, I > lst)
Definition: Array-Impl.hpp:98