Kokkos Core Kernels Package Version of the Day
Loading...
Searching...
No Matches
Kokkos_CudaSpace.hpp
1/*
2//@HEADER
3// ************************************************************************
4//
5// Kokkos v. 3.0
6// Copyright (2020) National Technology & Engineering
7// Solutions of Sandia, LLC (NTESS).
8//
9// Under the terms of Contract DE-NA0003525 with NTESS,
10// the U.S. Government retains certain rights in this software.
11//
12// Redistribution and use in source and binary forms, with or without
13// modification, are permitted provided that the following conditions are
14// met:
15//
16// 1. Redistributions of source code must retain the above copyright
17// notice, this list of conditions and the following disclaimer.
18//
19// 2. Redistributions in binary form must reproduce the above copyright
20// notice, this list of conditions and the following disclaimer in the
21// documentation and/or other materials provided with the distribution.
22//
23// 3. Neither the name of the Corporation nor the names of the
24// contributors may be used to endorse or promote products derived from
25// this software without specific prior written permission.
26//
27// THIS SOFTWARE IS PROVIDED BY NTESS "AS IS" AND ANY
28// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NTESS OR THE
31// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
33// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
34// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38//
39// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
40//
41// ************************************************************************
42//@HEADER
43*/
44
45#ifndef KOKKOS_CUDASPACE_HPP
46#define KOKKOS_CUDASPACE_HPP
47
48#include <Kokkos_Macros.hpp>
49#if defined(KOKKOS_ENABLE_CUDA)
50
51#include <Kokkos_Core_fwd.hpp>
52
53#include <iosfwd>
54#include <typeinfo>
55#include <string>
56#include <memory>
57
58#include <Kokkos_HostSpace.hpp>
59#include <impl/Kokkos_SharedAlloc.hpp>
60
61#include <impl/Kokkos_Profiling_Interface.hpp>
62
63#include <Cuda/Kokkos_Cuda_abort.hpp>
64
65#ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
66extern "C" bool kokkos_impl_cuda_pin_uvm_to_host();
67extern "C" void kokkos_impl_cuda_set_pin_uvm_to_host(bool);
68#endif
69
70/*--------------------------------------------------------------------------*/
71
72namespace Kokkos {
73
76class CudaSpace {
77 public:
79 using memory_space = CudaSpace;
80 using execution_space = Kokkos::Cuda;
82
83 using size_type = unsigned int;
84
85 /*--------------------------------*/
86
87 CudaSpace();
88 CudaSpace(CudaSpace&& rhs) = default;
89 CudaSpace(const CudaSpace& rhs) = default;
90 CudaSpace& operator=(CudaSpace&& rhs) = default;
91 CudaSpace& operator=(const CudaSpace& rhs) = default;
92 ~CudaSpace() = default;
93
95 void* allocate(const size_t arg_alloc_size) const;
96 void* allocate(const char* arg_label, const size_t arg_alloc_size,
97 const size_t arg_logical_size = 0) const;
98
100 void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const;
101 void deallocate(const char* arg_label, void* const arg_alloc_ptr,
102 const size_t arg_alloc_size,
103 const size_t arg_logical_size = 0) const;
104
105 private:
106 template <class, class, class, class>
108 void* impl_allocate(const char* arg_label, const size_t arg_alloc_size,
109 const size_t arg_logical_size = 0,
110 const Kokkos::Tools::SpaceHandle =
111 Kokkos::Tools::make_space_handle(name())) const;
112 void impl_deallocate(const char* arg_label, void* const arg_alloc_ptr,
113 const size_t arg_alloc_size,
114 const size_t arg_logical_size = 0,
115 const Kokkos::Tools::SpaceHandle =
116 Kokkos::Tools::make_space_handle(name())) const;
117
118 public:
120 static constexpr const char* name() { return m_name; }
121
122 /*--------------------------------*/
124 KOKKOS_DEPRECATED static void access_error();
125 KOKKOS_DEPRECATED static void access_error(const void* const);
126
127 private:
128 int m_device;
129
130 static constexpr const char* m_name = "Cuda";
131 friend class Kokkos::Impl::SharedAllocationRecord<Kokkos::CudaSpace, void>;
132};
133} // namespace Kokkos
134
135/*--------------------------------------------------------------------------*/
136/*--------------------------------------------------------------------------*/
137
138namespace Kokkos {
139
143class CudaUVMSpace {
144 public:
146 using memory_space = CudaUVMSpace;
147 using execution_space = Cuda;
149 using size_type = unsigned int;
150
152 static bool available();
153
154 /*--------------------------------*/
156 KOKKOS_DEPRECATED static int number_of_allocations();
157
158 /*--------------------------------*/
159
160 /*--------------------------------*/
161
162 CudaUVMSpace();
163 CudaUVMSpace(CudaUVMSpace&& rhs) = default;
164 CudaUVMSpace(const CudaUVMSpace& rhs) = default;
165 CudaUVMSpace& operator=(CudaUVMSpace&& rhs) = default;
166 CudaUVMSpace& operator=(const CudaUVMSpace& rhs) = default;
167 ~CudaUVMSpace() = default;
168
170 void* allocate(const size_t arg_alloc_size) const;
171 void* allocate(const char* arg_label, const size_t arg_alloc_size,
172 const size_t arg_logical_size = 0) const;
173
175 void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const;
176 void deallocate(const char* arg_label, void* const arg_alloc_ptr,
177 const size_t arg_alloc_size,
178 const size_t arg_logical_size = 0) const;
179
180 private:
181 template <class, class, class, class>
183 void* impl_allocate(const char* arg_label, const size_t arg_alloc_size,
184 const size_t arg_logical_size = 0,
185 const Kokkos::Tools::SpaceHandle =
186 Kokkos::Tools::make_space_handle(name())) const;
187 void impl_deallocate(const char* arg_label, void* const arg_alloc_ptr,
188 const size_t arg_alloc_size,
189 const size_t arg_logical_size = 0,
190 const Kokkos::Tools::SpaceHandle =
191 Kokkos::Tools::make_space_handle(name())) const;
192
193 public:
195 static constexpr const char* name() { return m_name; }
196
197#ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
198 static bool cuda_pin_uvm_to_host();
199 static void cuda_set_pin_uvm_to_host(bool val);
200#endif
201 /*--------------------------------*/
202
203 private:
204 int m_device;
205
206#ifdef KOKKOS_IMPL_DEBUG_CUDA_PIN_UVM_TO_HOST
207 static bool kokkos_impl_cuda_pin_uvm_to_host_v;
208#endif
209 static constexpr const char* m_name = "CudaUVM";
210};
211
212} // namespace Kokkos
213
214/*--------------------------------------------------------------------------*/
215/*--------------------------------------------------------------------------*/
216
217namespace Kokkos {
218
222class CudaHostPinnedSpace {
223 public:
225
226 using execution_space = HostSpace::execution_space;
227 using memory_space = CudaHostPinnedSpace;
229 using size_type = unsigned int;
230
231 /*--------------------------------*/
232
233 CudaHostPinnedSpace();
234 CudaHostPinnedSpace(CudaHostPinnedSpace&& rhs) = default;
235 CudaHostPinnedSpace(const CudaHostPinnedSpace& rhs) = default;
236 CudaHostPinnedSpace& operator=(CudaHostPinnedSpace&& rhs) = default;
237 CudaHostPinnedSpace& operator=(const CudaHostPinnedSpace& rhs) = default;
238 ~CudaHostPinnedSpace() = default;
239
241 void* allocate(const size_t arg_alloc_size) const;
242 void* allocate(const char* arg_label, const size_t arg_alloc_size,
243 const size_t arg_logical_size = 0) const;
244
246 void deallocate(void* const arg_alloc_ptr, const size_t arg_alloc_size) const;
247 void deallocate(const char* arg_label, void* const arg_alloc_ptr,
248 const size_t arg_alloc_size,
249 const size_t arg_logical_size = 0) const;
250
251 private:
252 template <class, class, class, class>
254 void* impl_allocate(const char* arg_label, const size_t arg_alloc_size,
255 const size_t arg_logical_size = 0,
256 const Kokkos::Tools::SpaceHandle =
257 Kokkos::Tools::make_space_handle(name())) const;
258 void impl_deallocate(const char* arg_label, void* const arg_alloc_ptr,
259 const size_t arg_alloc_size,
260 const size_t arg_logical_size = 0,
261 const Kokkos::Tools::SpaceHandle =
262 Kokkos::Tools::make_space_handle(name())) const;
263
264 public:
266 static constexpr const char* name() { return m_name; }
267
268 private:
269 static constexpr const char* m_name = "CudaHostPinned";
270
271 /*--------------------------------*/
272};
273
274} // namespace Kokkos
275
276/*--------------------------------------------------------------------------*/
277/*--------------------------------------------------------------------------*/
278
279namespace Kokkos {
280namespace Impl {
281
282cudaStream_t cuda_get_deep_copy_stream();
283
284const std::unique_ptr<Kokkos::Cuda>& cuda_get_deep_copy_space(
285 bool initialize = true);
286
288 Kokkos::CudaSpace>::assignable,
289 "");
291 Kokkos::CudaUVMSpace>::assignable,
292 "");
293static_assert(
295 Kokkos::CudaHostPinnedSpace>::assignable,
296 "");
297
298//----------------------------------------
299
300template <>
301struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::CudaSpace> {
302 enum : bool { assignable = false };
303 enum : bool { accessible = false };
304 enum : bool { deepcopy = true };
305};
306
307template <>
308struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::CudaUVMSpace> {
309 // HostSpace::execution_space != CudaUVMSpace::execution_space
310 enum : bool { assignable = false };
311 enum : bool { accessible = true };
312 enum : bool { deepcopy = true };
313};
314
315template <>
316struct MemorySpaceAccess<Kokkos::HostSpace, Kokkos::CudaHostPinnedSpace> {
317 // HostSpace::execution_space == CudaHostPinnedSpace::execution_space
318 enum : bool { assignable = true };
319 enum : bool { accessible = true };
320 enum : bool { deepcopy = true };
321};
322
323//----------------------------------------
324
325template <>
326struct MemorySpaceAccess<Kokkos::CudaSpace, Kokkos::HostSpace> {
327 enum : bool { assignable = false };
328 enum : bool { accessible = false };
329 enum : bool { deepcopy = true };
330};
331
332template <>
333struct MemorySpaceAccess<Kokkos::CudaSpace, Kokkos::CudaUVMSpace> {
334 // CudaSpace::execution_space == CudaUVMSpace::execution_space
335 enum : bool { assignable = true };
336 enum : bool { accessible = true };
337 enum : bool { deepcopy = true };
338};
339
340template <>
341struct MemorySpaceAccess<Kokkos::CudaSpace, Kokkos::CudaHostPinnedSpace> {
342 // CudaSpace::execution_space != CudaHostPinnedSpace::execution_space
343 enum : bool { assignable = false };
344 enum : bool { accessible = true }; // CudaSpace::execution_space
345 enum : bool { deepcopy = true };
346};
347
348//----------------------------------------
349// CudaUVMSpace::execution_space == Cuda
350// CudaUVMSpace accessible to both Cuda and Host
351
352template <>
353struct MemorySpaceAccess<Kokkos::CudaUVMSpace, Kokkos::HostSpace> {
354 enum : bool { assignable = false };
355 enum : bool { accessible = false }; // Cuda cannot access HostSpace
356 enum : bool { deepcopy = true };
357};
358
359template <>
360struct MemorySpaceAccess<Kokkos::CudaUVMSpace, Kokkos::CudaSpace> {
361 // CudaUVMSpace::execution_space == CudaSpace::execution_space
362 // Can access CudaUVMSpace from Host but cannot access CudaSpace from Host
363 enum : bool { assignable = false };
364
365 // CudaUVMSpace::execution_space can access CudaSpace
366 enum : bool { accessible = true };
367 enum : bool { deepcopy = true };
368};
369
370template <>
371struct MemorySpaceAccess<Kokkos::CudaUVMSpace, Kokkos::CudaHostPinnedSpace> {
372 // CudaUVMSpace::execution_space != CudaHostPinnedSpace::execution_space
373 enum : bool { assignable = false };
374 enum : bool { accessible = true }; // CudaUVMSpace::execution_space
375 enum : bool { deepcopy = true };
376};
377
378//----------------------------------------
379// CudaHostPinnedSpace::execution_space == HostSpace::execution_space
380// CudaHostPinnedSpace accessible to both Cuda and Host
381
382template <>
383struct MemorySpaceAccess<Kokkos::CudaHostPinnedSpace, Kokkos::HostSpace> {
384 enum : bool { assignable = false }; // Cannot access from Cuda
385 enum : bool { accessible = true }; // CudaHostPinnedSpace::execution_space
386 enum : bool { deepcopy = true };
387};
388
389template <>
390struct MemorySpaceAccess<Kokkos::CudaHostPinnedSpace, Kokkos::CudaSpace> {
391 enum : bool { assignable = false }; // Cannot access from Host
392 enum : bool { accessible = false };
393 enum : bool { deepcopy = true };
394};
395
396template <>
397struct MemorySpaceAccess<Kokkos::CudaHostPinnedSpace, Kokkos::CudaUVMSpace> {
398 enum : bool { assignable = false }; // different execution_space
399 enum : bool { accessible = true }; // same accessibility
400 enum : bool { deepcopy = true };
401};
402
403//----------------------------------------
404
405} // namespace Impl
406} // namespace Kokkos
407
408/*--------------------------------------------------------------------------*/
409/*--------------------------------------------------------------------------*/
410
411namespace Kokkos {
412namespace Impl {
413
414void DeepCopyAsyncCuda(void* dst, const void* src, size_t n);
415
416template <>
417struct DeepCopy<CudaSpace, CudaSpace, Cuda> {
418 DeepCopy(void* dst, const void* src, size_t);
419 DeepCopy(const Cuda&, void* dst, const void* src, size_t);
420};
421
422template <>
423struct DeepCopy<CudaSpace, HostSpace, Cuda> {
424 DeepCopy(void* dst, const void* src, size_t);
425 DeepCopy(const Cuda&, void* dst, const void* src, size_t);
426};
427
428template <>
429struct DeepCopy<HostSpace, CudaSpace, Cuda> {
430 DeepCopy(void* dst, const void* src, size_t);
431 DeepCopy(const Cuda&, void* dst, const void* src, size_t);
432};
433
434template <>
435struct DeepCopy<CudaUVMSpace, CudaUVMSpace, Cuda> {
436 DeepCopy(void* dst, const void* src, size_t n) {
437 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
438 }
439 DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
440 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
441 }
442};
443
444template <>
445struct DeepCopy<CudaUVMSpace, HostSpace, Cuda> {
446 DeepCopy(void* dst, const void* src, size_t n) {
447 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
448 }
449 DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
450 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(instance, dst, src, n);
451 }
452};
453
454template <>
455struct DeepCopy<HostSpace, CudaUVMSpace, Cuda> {
456 DeepCopy(void* dst, const void* src, size_t n) {
457 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
458 }
459 DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
460 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(instance, dst, src, n);
461 }
462};
463
464template <>
465struct DeepCopy<CudaHostPinnedSpace, CudaHostPinnedSpace, Cuda> {
466 DeepCopy(void* dst, const void* src, size_t n) {
467 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
468 }
469 DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
470 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
471 }
472};
473
474template <>
475struct DeepCopy<CudaHostPinnedSpace, HostSpace, Cuda> {
476 DeepCopy(void* dst, const void* src, size_t n) {
477 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
478 }
479 DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
480 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(instance, dst, src, n);
481 }
482};
483
484template <>
485struct DeepCopy<HostSpace, CudaHostPinnedSpace, Cuda> {
486 DeepCopy(void* dst, const void* src, size_t n) {
487 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
488 }
489 DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
490 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(instance, dst, src, n);
491 }
492};
493
494template <>
495struct DeepCopy<CudaUVMSpace, CudaSpace, Cuda> {
496 DeepCopy(void* dst, const void* src, size_t n) {
497 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
498 }
499 DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
500 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
501 }
502};
503
504template <>
505struct DeepCopy<CudaSpace, CudaUVMSpace, Cuda> {
506 DeepCopy(void* dst, const void* src, size_t n) {
507 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
508 }
509 DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
510 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
511 }
512};
513
514template <>
515struct DeepCopy<CudaUVMSpace, CudaHostPinnedSpace, Cuda> {
516 DeepCopy(void* dst, const void* src, size_t n) {
517 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
518 }
519 DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
520 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
521 }
522};
523
524template <>
525struct DeepCopy<CudaHostPinnedSpace, CudaUVMSpace, Cuda> {
526 DeepCopy(void* dst, const void* src, size_t n) {
527 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
528 }
529 DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
530 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
531 }
532};
533
534template <>
535struct DeepCopy<CudaSpace, CudaHostPinnedSpace, Cuda> {
536 DeepCopy(void* dst, const void* src, size_t n) {
537 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
538 }
539 DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
540 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
541 }
542};
543
544template <>
545struct DeepCopy<CudaHostPinnedSpace, CudaSpace, Cuda> {
546 DeepCopy(void* dst, const void* src, size_t n) {
547 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
548 }
549 DeepCopy(const Cuda& instance, void* dst, const void* src, size_t n) {
550 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(instance, dst, src, n);
551 }
552};
553
554template <class ExecutionSpace>
555struct DeepCopy<CudaSpace, CudaSpace, ExecutionSpace> {
556 inline DeepCopy(void* dst, const void* src, size_t n) {
557 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
558 }
559
560 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
561 size_t n) {
562 exec.fence();
563 DeepCopyAsyncCuda(dst, src, n);
564 }
565};
566
567template <class ExecutionSpace>
568struct DeepCopy<CudaSpace, HostSpace, ExecutionSpace> {
569 inline DeepCopy(void* dst, const void* src, size_t n) {
570 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
571 }
572
573 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
574 size_t n) {
575 exec.fence();
576 DeepCopyAsyncCuda(dst, src, n);
577 }
578};
579
580template <class ExecutionSpace>
581struct DeepCopy<HostSpace, CudaSpace, ExecutionSpace> {
582 inline DeepCopy(void* dst, const void* src, size_t n) {
583 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
584 }
585
586 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
587 size_t n) {
588 exec.fence();
589 DeepCopyAsyncCuda(dst, src, n);
590 }
591};
592
593template <class ExecutionSpace>
594struct DeepCopy<CudaSpace, CudaUVMSpace, ExecutionSpace> {
595 inline DeepCopy(void* dst, const void* src, size_t n) {
596 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
597 }
598
599 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
600 size_t n) {
601 exec.fence();
602 DeepCopyAsyncCuda(dst, src, n);
603 }
604};
605
606template <class ExecutionSpace>
607struct DeepCopy<CudaSpace, CudaHostPinnedSpace, ExecutionSpace> {
608 inline DeepCopy(void* dst, const void* src, size_t n) {
609 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
610 }
611
612 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
613 size_t n) {
614 exec.fence();
615 DeepCopyAsyncCuda(dst, src, n);
616 }
617};
618
619template <class ExecutionSpace>
620struct DeepCopy<CudaUVMSpace, CudaSpace, ExecutionSpace> {
621 inline DeepCopy(void* dst, const void* src, size_t n) {
622 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
623 }
624
625 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
626 size_t n) {
627 exec.fence();
628 DeepCopyAsyncCuda(dst, src, n);
629 }
630};
631
632template <class ExecutionSpace>
633struct DeepCopy<CudaUVMSpace, CudaUVMSpace, ExecutionSpace> {
634 inline DeepCopy(void* dst, const void* src, size_t n) {
635 (void)DeepCopy<CudaSpace, CudaSpace, Cuda>(dst, src, n);
636 }
637
638 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
639 size_t n) {
640 exec.fence();
641 DeepCopyAsyncCuda(dst, src, n);
642 }
643};
644
645template <class ExecutionSpace>
646struct DeepCopy<CudaUVMSpace, CudaHostPinnedSpace, ExecutionSpace> {
647 inline DeepCopy(void* dst, const void* src, size_t n) {
648 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
649 }
650
651 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
652 size_t n) {
653 exec.fence();
654 DeepCopyAsyncCuda(dst, src, n);
655 }
656};
657
658template <class ExecutionSpace>
659struct DeepCopy<CudaUVMSpace, HostSpace, ExecutionSpace> {
660 inline DeepCopy(void* dst, const void* src, size_t n) {
661 (void)DeepCopy<CudaSpace, HostSpace, Cuda>(dst, src, n);
662 }
663
664 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
665 size_t n) {
666 exec.fence();
667 DeepCopyAsyncCuda(dst, src, n);
668 }
669};
670
671template <class ExecutionSpace>
672struct DeepCopy<CudaHostPinnedSpace, CudaSpace, ExecutionSpace> {
673 inline DeepCopy(void* dst, const void* src, size_t n) {
674 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
675 }
676
677 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
678 size_t n) {
679 exec.fence();
680 DeepCopyAsyncCuda(dst, src, n);
681 }
682};
683
684template <class ExecutionSpace>
685struct DeepCopy<CudaHostPinnedSpace, CudaUVMSpace, ExecutionSpace> {
686 inline DeepCopy(void* dst, const void* src, size_t n) {
687 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
688 }
689
690 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
691 size_t n) {
692 exec.fence();
693 DeepCopyAsyncCuda(dst, src, n);
694 }
695};
696
697template <class ExecutionSpace>
698struct DeepCopy<CudaHostPinnedSpace, CudaHostPinnedSpace, ExecutionSpace> {
699 inline DeepCopy(void* dst, const void* src, size_t n) {
700 (void)DeepCopy<HostSpace, HostSpace, Cuda>(dst, src, n);
701 }
702
703 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
704 size_t n) {
705 exec.fence();
706 DeepCopyAsyncCuda(dst, src, n);
707 }
708};
709
710template <class ExecutionSpace>
711struct DeepCopy<CudaHostPinnedSpace, HostSpace, ExecutionSpace> {
712 inline DeepCopy(void* dst, const void* src, size_t n) {
713 (void)DeepCopy<HostSpace, HostSpace, Cuda>(dst, src, n);
714 }
715
716 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
717 size_t n) {
718 exec.fence();
719 DeepCopyAsyncCuda(dst, src, n);
720 }
721};
722
723template <class ExecutionSpace>
724struct DeepCopy<HostSpace, CudaUVMSpace, ExecutionSpace> {
725 inline DeepCopy(void* dst, const void* src, size_t n) {
726 (void)DeepCopy<HostSpace, CudaSpace, Cuda>(dst, src, n);
727 }
728
729 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
730 size_t n) {
731 exec.fence();
732 DeepCopyAsyncCuda(dst, src, n);
733 }
734};
735
736template <class ExecutionSpace>
737struct DeepCopy<HostSpace, CudaHostPinnedSpace, ExecutionSpace> {
738 inline DeepCopy(void* dst, const void* src, size_t n) {
739 (void)DeepCopy<HostSpace, HostSpace, Cuda>(dst, src, n);
740 }
741
742 inline DeepCopy(const ExecutionSpace& exec, void* dst, const void* src,
743 size_t n) {
744 exec.fence();
745 DeepCopyAsyncCuda(dst, src, n);
746 }
747};
748
749} // namespace Impl
750} // namespace Kokkos
751
752//----------------------------------------------------------------------------
753//----------------------------------------------------------------------------
754
755namespace Kokkos {
756namespace Impl {
757
758template <>
759class SharedAllocationRecord<Kokkos::CudaSpace, void>
760 : public HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace> {
761 private:
762 friend class SharedAllocationRecord<Kokkos::CudaUVMSpace, void>;
763 friend class SharedAllocationRecordCommon<Kokkos::CudaSpace>;
764 friend class HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace>;
765
766 using RecordBase = SharedAllocationRecord<void, void>;
767 using base_t =
768 HostInaccessibleSharedAllocationRecordCommon<Kokkos::CudaSpace>;
769
770 SharedAllocationRecord(const SharedAllocationRecord&) = delete;
771 SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
772
773 static ::cudaTextureObject_t attach_texture_object(
774 const unsigned sizeof_alias, void* const alloc_ptr,
775 const size_t alloc_size);
776
777#ifdef KOKKOS_ENABLE_DEBUG
778 static RecordBase s_root_record;
779#endif
780
781 ::cudaTextureObject_t m_tex_obj = 0;
782 const Kokkos::CudaSpace m_space;
783
784 protected:
785 ~SharedAllocationRecord();
786 SharedAllocationRecord() = default;
787
788 SharedAllocationRecord(
789 const Kokkos::CudaSpace& arg_space, const std::string& arg_label,
790 const size_t arg_alloc_size,
791 const RecordBase::function_type arg_dealloc = &base_t::deallocate);
792
793 public:
794 template <typename AliasType>
795 inline ::cudaTextureObject_t attach_texture_object() {
796 static_assert((std::is_same<AliasType, int>::value ||
797 std::is_same<AliasType, ::int2>::value ||
798 std::is_same<AliasType, ::int4>::value),
799 "Cuda texture fetch only supported for alias types of int, "
800 "::int2, or ::int4");
801
802 if (m_tex_obj == 0) {
803 m_tex_obj = attach_texture_object(sizeof(AliasType),
804 (void*)RecordBase::m_alloc_ptr,
805 RecordBase::m_alloc_size);
806 }
807
808 return m_tex_obj;
809 }
810
811 template <typename AliasType>
812 inline int attach_texture_object_offset(const AliasType* const ptr) {
813 // Texture object is attached to the entire allocation range
814 return ptr - reinterpret_cast<AliasType*>(RecordBase::m_alloc_ptr);
815 }
816};
817
818template <>
819class SharedAllocationRecord<Kokkos::CudaUVMSpace, void>
820 : public SharedAllocationRecordCommon<Kokkos::CudaUVMSpace> {
821 private:
822 friend class SharedAllocationRecordCommon<Kokkos::CudaUVMSpace>;
823
824 using base_t = SharedAllocationRecordCommon<Kokkos::CudaUVMSpace>;
825 using RecordBase = SharedAllocationRecord<void, void>;
826
827 SharedAllocationRecord(const SharedAllocationRecord&) = delete;
828 SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
829
830 static RecordBase s_root_record;
831
832 ::cudaTextureObject_t m_tex_obj = 0;
833 const Kokkos::CudaUVMSpace m_space;
834
835 protected:
836 ~SharedAllocationRecord();
837 SharedAllocationRecord() = default;
838
839 SharedAllocationRecord(
840 const Kokkos::CudaUVMSpace& arg_space, const std::string& arg_label,
841 const size_t arg_alloc_size,
842 const RecordBase::function_type arg_dealloc = &base_t::deallocate);
843
844 public:
845 template <typename AliasType>
846 inline ::cudaTextureObject_t attach_texture_object() {
847 static_assert((std::is_same<AliasType, int>::value ||
848 std::is_same<AliasType, ::int2>::value ||
849 std::is_same<AliasType, ::int4>::value),
850 "Cuda texture fetch only supported for alias types of int, "
851 "::int2, or ::int4");
852
853 if (m_tex_obj == 0) {
854 m_tex_obj = SharedAllocationRecord<Kokkos::CudaSpace, void>::
855 attach_texture_object(sizeof(AliasType),
856 (void*)RecordBase::m_alloc_ptr,
857 RecordBase::m_alloc_size);
858 }
859
860 return m_tex_obj;
861 }
862
863 template <typename AliasType>
864 inline int attach_texture_object_offset(const AliasType* const ptr) {
865 // Texture object is attached to the entire allocation range
866 return ptr - reinterpret_cast<AliasType*>(RecordBase::m_alloc_ptr);
867 }
868};
869
870template <>
871class SharedAllocationRecord<Kokkos::CudaHostPinnedSpace, void>
872 : public SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace> {
873 private:
874 friend class SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace>;
875
876 using RecordBase = SharedAllocationRecord<void, void>;
877 using base_t = SharedAllocationRecordCommon<Kokkos::CudaHostPinnedSpace>;
878
879 SharedAllocationRecord(const SharedAllocationRecord&) = delete;
880 SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
881
882 static RecordBase s_root_record;
883
884 const Kokkos::CudaHostPinnedSpace m_space;
885
886 protected:
887 ~SharedAllocationRecord();
888 SharedAllocationRecord() = default;
889
890 SharedAllocationRecord(
891 const Kokkos::CudaHostPinnedSpace& arg_space,
892 const std::string& arg_label, const size_t arg_alloc_size,
893 const RecordBase::function_type arg_dealloc = &deallocate);
894};
895
896} // namespace Impl
897} // namespace Kokkos
898
899//----------------------------------------------------------------------------
900//----------------------------------------------------------------------------
901
902#endif /* #if defined( KOKKOS_ENABLE_CUDA ) */
903#endif /* #define KOKKOS_CUDASPACE_HPP */
A thread safe view to a bitset.
LogicalMemorySpace is a space that is identical to another space, but differentiable by name and temp...
Memory management for host memory.
bool available()
Query if hwloc is available.
Access relationship between DstMemorySpace and SrcMemorySpace.