44 #ifndef KOKKOS_CUDASPACE_HPP 45 #define KOKKOS_CUDASPACE_HPP 47 #include <Kokkos_Macros.hpp> 48 #if defined( KOKKOS_ENABLE_CUDA ) 50 #include <Kokkos_Core_fwd.hpp> 56 #include <Kokkos_HostSpace.hpp> 58 #include <Cuda/Kokkos_Cuda_abort.hpp> 70 typedef CudaSpace memory_space ;
71 typedef Kokkos::Cuda execution_space ;
74 typedef unsigned int size_type ;
79 CudaSpace( CudaSpace && rhs ) = default ;
80 CudaSpace(
const CudaSpace & rhs ) = default ;
81 CudaSpace & operator = ( CudaSpace && rhs ) = default ;
82 CudaSpace & operator = (
const CudaSpace & rhs ) = default ;
83 ~CudaSpace() = default ;
86 void * allocate(
const size_t arg_alloc_size )
const ;
89 void deallocate(
void *
const arg_alloc_ptr
90 ,
const size_t arg_alloc_size )
const ;
93 static constexpr
const char* name();
97 static void access_error();
98 static void access_error(
const void *
const );
104 static constexpr
const char* m_name =
"Cuda";
105 friend class Kokkos::Impl::SharedAllocationRecord<
Kokkos::CudaSpace , void > ;
115 void init_lock_arrays_cuda_space();
124 int* atomic_lock_array_cuda_space_ptr(
bool deallocate =
false);
132 int* scratch_lock_array_cuda_space_ptr(
bool deallocate =
false);
140 int* threadid_lock_array_cuda_space_ptr(
bool deallocate =
false);
156 typedef CudaUVMSpace memory_space ;
157 typedef Cuda execution_space ;
159 typedef unsigned int size_type ;
167 static int number_of_allocations();
175 CudaUVMSpace( CudaUVMSpace && rhs ) = default ;
176 CudaUVMSpace(
const CudaUVMSpace & rhs ) = default ;
177 CudaUVMSpace & operator = ( CudaUVMSpace && rhs ) = default ;
178 CudaUVMSpace & operator = (
const CudaUVMSpace & rhs ) = default ;
179 ~CudaUVMSpace() = default ;
182 void * allocate(
const size_t arg_alloc_size )
const ;
185 void deallocate(
void *
const arg_alloc_ptr
186 ,
const size_t arg_alloc_size )
const ;
189 static constexpr
const char* name();
196 static constexpr
const char* m_name =
"CudaUVM";
210 class CudaHostPinnedSpace {
215 typedef HostSpace::execution_space execution_space ;
216 typedef CudaHostPinnedSpace memory_space ;
218 typedef unsigned int size_type ;
222 CudaHostPinnedSpace();
223 CudaHostPinnedSpace( CudaHostPinnedSpace && rhs ) = default ;
224 CudaHostPinnedSpace(
const CudaHostPinnedSpace & rhs ) = default ;
225 CudaHostPinnedSpace & operator = ( CudaHostPinnedSpace && rhs ) = default ;
226 CudaHostPinnedSpace & operator = (
const CudaHostPinnedSpace & rhs ) = default ;
227 ~CudaHostPinnedSpace() = default ;
230 void * allocate(
const size_t arg_alloc_size )
const ;
233 void deallocate(
void *
const arg_alloc_ptr
234 ,
const size_t arg_alloc_size )
const ;
237 static constexpr
const char* name();
241 static constexpr
const char* m_name =
"CudaHostPinned";
261 struct MemorySpaceAccess<
Kokkos::HostSpace , Kokkos::CudaSpace > {
262 enum { assignable =
false };
263 enum { accessible =
false };
264 enum { deepcopy =
true };
268 struct MemorySpaceAccess<
Kokkos::HostSpace , Kokkos::CudaUVMSpace > {
270 enum { assignable =
false };
271 enum { accessible =
true };
272 enum { deepcopy =
true };
276 struct MemorySpaceAccess<
Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace > {
278 enum { assignable =
true };
279 enum { accessible =
true };
280 enum { deepcopy =
true };
287 enum { assignable =
false };
288 enum { accessible =
false };
289 enum { deepcopy =
true };
293 struct MemorySpaceAccess<
Kokkos::CudaSpace , Kokkos::CudaUVMSpace > {
295 enum { assignable =
true };
296 enum { accessible =
true };
297 enum { deepcopy =
true };
301 struct MemorySpaceAccess<
Kokkos::CudaSpace , Kokkos::CudaHostPinnedSpace > {
303 enum { assignable =
false };
304 enum { accessible =
true };
305 enum { deepcopy =
true };
314 enum { assignable =
false };
315 enum { accessible =
false };
316 enum { deepcopy =
true };
320 struct MemorySpaceAccess<
Kokkos::CudaUVMSpace , Kokkos::CudaSpace > {
323 enum { assignable =
false };
326 enum { accessible =
true };
327 enum { deepcopy =
true };
331 struct MemorySpaceAccess<
Kokkos::CudaUVMSpace , Kokkos::CudaHostPinnedSpace > {
333 enum { assignable =
false };
334 enum { accessible =
true };
335 enum { deepcopy =
true };
345 enum { assignable =
false };
346 enum { accessible =
true };
347 enum { deepcopy =
true };
351 struct MemorySpaceAccess<
Kokkos::CudaHostPinnedSpace , Kokkos::CudaSpace > {
352 enum { assignable =
false };
353 enum { accessible =
false };
354 enum { deepcopy =
true };
358 struct MemorySpaceAccess<
Kokkos::CudaHostPinnedSpace , Kokkos::CudaUVMSpace > {
359 enum { assignable =
false };
360 enum { accessible =
true };
361 enum { deepcopy =
true };
374 void DeepCopyAsyncCuda(
void * dst ,
const void * src ,
size_t n);
376 template<>
struct DeepCopy< CudaSpace , CudaSpace , Cuda>
378 DeepCopy(
void * dst ,
const void * src ,
size_t );
379 DeepCopy(
const Cuda & ,
void * dst ,
const void * src ,
size_t );
382 template<>
struct DeepCopy< CudaSpace , HostSpace , Cuda >
384 DeepCopy(
void * dst ,
const void * src ,
size_t );
385 DeepCopy(
const Cuda & ,
void * dst ,
const void * src ,
size_t );
388 template<>
struct DeepCopy< HostSpace , CudaSpace , Cuda >
390 DeepCopy(
void * dst ,
const void * src ,
size_t );
391 DeepCopy(
const Cuda & ,
void * dst ,
const void * src ,
size_t );
394 template<
class ExecutionSpace>
struct DeepCopy< CudaSpace , CudaSpace , ExecutionSpace >
397 DeepCopy(
void * dst ,
const void * src ,
size_t n )
398 { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
401 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
404 DeepCopyAsyncCuda (dst,src,n);
408 template<
class ExecutionSpace>
struct DeepCopy< CudaSpace , HostSpace , ExecutionSpace >
411 DeepCopy(
void * dst ,
const void * src ,
size_t n )
412 { (void) DeepCopy< CudaSpace , HostSpace , Cuda>( dst , src , n ); }
415 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
418 DeepCopyAsyncCuda (dst,src,n);
422 template<
class ExecutionSpace>
423 struct DeepCopy< HostSpace , CudaSpace , ExecutionSpace >
426 DeepCopy(
void * dst ,
const void * src ,
size_t n )
427 { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
430 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
433 DeepCopyAsyncCuda (dst,src,n);
437 template<
class ExecutionSpace>
438 struct DeepCopy< CudaSpace , CudaUVMSpace , ExecutionSpace >
441 DeepCopy(
void * dst ,
const void * src ,
size_t n )
442 { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
445 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
448 DeepCopyAsyncCuda (dst,src,n);
452 template<
class ExecutionSpace>
453 struct DeepCopy< CudaSpace , CudaHostPinnedSpace , ExecutionSpace>
456 DeepCopy(
void * dst ,
const void * src ,
size_t n )
457 { (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); }
460 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
463 DeepCopyAsyncCuda (dst,src,n);
468 template<
class ExecutionSpace>
469 struct DeepCopy< CudaUVMSpace , CudaSpace , ExecutionSpace>
472 DeepCopy(
void * dst ,
const void * src ,
size_t n )
473 { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
476 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
479 DeepCopyAsyncCuda (dst,src,n);
483 template<
class ExecutionSpace>
484 struct DeepCopy< CudaUVMSpace , CudaUVMSpace , ExecutionSpace>
487 DeepCopy(
void * dst ,
const void * src ,
size_t n )
488 { (void) DeepCopy< CudaSpace , CudaSpace , Cuda >( dst , src , n ); }
491 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
494 DeepCopyAsyncCuda (dst,src,n);
498 template<
class ExecutionSpace>
499 struct DeepCopy< CudaUVMSpace , CudaHostPinnedSpace , ExecutionSpace>
502 DeepCopy(
void * dst ,
const void * src ,
size_t n )
503 { (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); }
506 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
509 DeepCopyAsyncCuda (dst,src,n);
513 template<
class ExecutionSpace>
struct DeepCopy< CudaUVMSpace , HostSpace , ExecutionSpace >
516 DeepCopy(
void * dst ,
const void * src ,
size_t n )
517 { (void) DeepCopy< CudaSpace , HostSpace , Cuda >( dst , src , n ); }
520 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
523 DeepCopyAsyncCuda (dst,src,n);
528 template<
class ExecutionSpace>
struct DeepCopy< CudaHostPinnedSpace , CudaSpace , ExecutionSpace >
531 DeepCopy(
void * dst ,
const void * src ,
size_t n )
532 { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
535 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
538 DeepCopyAsyncCuda (dst,src,n);
542 template<
class ExecutionSpace>
struct DeepCopy< CudaHostPinnedSpace , CudaUVMSpace , ExecutionSpace >
545 DeepCopy(
void * dst ,
const void * src ,
size_t n )
546 { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
549 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
552 DeepCopyAsyncCuda (dst,src,n);
556 template<
class ExecutionSpace>
struct DeepCopy< CudaHostPinnedSpace , CudaHostPinnedSpace , ExecutionSpace >
559 DeepCopy(
void * dst ,
const void * src ,
size_t n )
560 { (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); }
563 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
566 DeepCopyAsyncCuda (dst,src,n);
570 template<
class ExecutionSpace>
struct DeepCopy< CudaHostPinnedSpace , HostSpace , ExecutionSpace >
573 DeepCopy(
void * dst ,
const void * src ,
size_t n )
574 { (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); }
577 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
580 DeepCopyAsyncCuda (dst,src,n);
585 template<
class ExecutionSpace>
struct DeepCopy< HostSpace , CudaUVMSpace , ExecutionSpace >
588 DeepCopy(
void * dst ,
const void * src ,
size_t n )
589 { (void) DeepCopy< HostSpace , CudaSpace , Cuda >( dst , src , n ); }
592 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
595 DeepCopyAsyncCuda (dst,src,n);
599 template<
class ExecutionSpace>
struct DeepCopy< HostSpace , CudaHostPinnedSpace , ExecutionSpace >
602 DeepCopy(
void * dst ,
const void * src ,
size_t n )
603 { (void) DeepCopy< HostSpace , HostSpace , Cuda >( dst , src , n ); }
606 DeepCopy(
const ExecutionSpace& exec,
void * dst ,
const void * src ,
size_t n )
609 DeepCopyAsyncCuda (dst,src,n);
626 enum { value =
false };
627 KOKKOS_INLINE_FUNCTION
static void verify(
void )
628 { Kokkos::abort(
"Cuda code attempted to access HostSpace memory"); }
630 KOKKOS_INLINE_FUNCTION
static void verify(
const void * )
631 { Kokkos::abort(
"Cuda code attempted to access HostSpace memory"); }
636 struct VerifyExecutionCanAccessMemorySpace<
Kokkos::CudaSpace , Kokkos::CudaUVMSpace >
638 enum { value =
true };
639 KOKKOS_INLINE_FUNCTION
static void verify(
void ) { }
640 KOKKOS_INLINE_FUNCTION
static void verify(
const void * ) { }
645 struct VerifyExecutionCanAccessMemorySpace<
Kokkos::CudaSpace , Kokkos::CudaHostPinnedSpace >
647 enum { value =
true };
648 KOKKOS_INLINE_FUNCTION
static void verify(
void ) { }
649 KOKKOS_INLINE_FUNCTION
static void verify(
const void * ) { }
653 template<
class OtherSpace >
654 struct VerifyExecutionCanAccessMemorySpace<
655 typename enable_if< ! is_same<
Kokkos::CudaSpace,OtherSpace>::value , Kokkos::CudaSpace >::type ,
658 enum { value =
false };
659 KOKKOS_INLINE_FUNCTION
static void verify(
void )
660 { Kokkos::abort(
"Cuda code attempted to access unknown Space memory"); }
662 KOKKOS_INLINE_FUNCTION
static void verify(
const void * )
663 { Kokkos::abort(
"Cuda code attempted to access unknown Space memory"); }
669 struct VerifyExecutionCanAccessMemorySpace<
Kokkos::HostSpace , Kokkos::CudaSpace >
671 enum { value =
false };
672 inline static void verify(
void ) { CudaSpace::access_error(); }
673 inline static void verify(
const void * p ) { CudaSpace::access_error(p); }
678 struct VerifyExecutionCanAccessMemorySpace<
Kokkos::HostSpace , Kokkos::CudaUVMSpace >
680 enum { value =
true };
681 inline static void verify(
void ) { }
682 inline static void verify(
const void * ) { }
687 struct VerifyExecutionCanAccessMemorySpace<
Kokkos::HostSpace , Kokkos::CudaHostPinnedSpace >
689 enum { value =
true };
690 KOKKOS_INLINE_FUNCTION
static void verify(
void ) {}
691 KOKKOS_INLINE_FUNCTION
static void verify(
const void * ) {}
704 class SharedAllocationRecord<
Kokkos::CudaSpace , void >
705 :
public SharedAllocationRecord< void , void >
709 friend class SharedAllocationRecord<
Kokkos::CudaUVMSpace , void > ;
711 typedef SharedAllocationRecord< void , void > RecordBase ;
713 SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
714 SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
716 static void deallocate( RecordBase * );
718 static ::cudaTextureObject_t
719 attach_texture_object( const unsigned sizeof_alias
720 , void * const alloc_ptr
721 , const size_t alloc_size );
723 static RecordBase s_root_record ;
725 ::cudaTextureObject_t m_tex_obj ;
726 const Kokkos::CudaSpace m_space ;
730 ~SharedAllocationRecord();
731 SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {}
733 SharedAllocationRecord(
const Kokkos::CudaSpace & arg_space
734 ,
const std::string & arg_label
735 ,
const size_t arg_alloc_size
736 ,
const RecordBase::function_type arg_dealloc = & deallocate
741 std::string get_label()
const ;
743 static SharedAllocationRecord * allocate(
const Kokkos::CudaSpace & arg_space
744 ,
const std::string & arg_label
745 ,
const size_t arg_alloc_size );
749 void * allocate_tracked(
const Kokkos::CudaSpace & arg_space
750 ,
const std::string & arg_label
751 ,
const size_t arg_alloc_size );
755 void * reallocate_tracked(
void *
const arg_alloc_ptr
756 ,
const size_t arg_alloc_size );
760 void deallocate_tracked(
void *
const arg_alloc_ptr );
762 static SharedAllocationRecord * get_record(
void * arg_alloc_ptr );
764 template<
typename AliasType >
766 ::cudaTextureObject_t attach_texture_object()
768 static_assert( ( std::is_same< AliasType , int >::value ||
769 std::is_same< AliasType , ::int2 >::value ||
770 std::is_same< AliasType , ::int4 >::value )
771 ,
"Cuda texture fetch only supported for alias types of int, ::int2, or ::int4" );
773 if ( m_tex_obj == 0 ) {
774 m_tex_obj = attach_texture_object(
sizeof(AliasType)
775 , (
void*) RecordBase::m_alloc_ptr
776 , RecordBase::m_alloc_size );
782 template<
typename AliasType >
784 int attach_texture_object_offset(
const AliasType *
const ptr )
787 return ptr -
reinterpret_cast<AliasType*
>( RecordBase::m_alloc_ptr );
790 static void print_records( std::ostream & ,
const Kokkos::CudaSpace & ,
bool detail =
false );
795 class SharedAllocationRecord<
Kokkos::CudaUVMSpace , void >
796 :
public SharedAllocationRecord< void , void >
800 typedef SharedAllocationRecord< void , void > RecordBase ;
802 SharedAllocationRecord(
const SharedAllocationRecord & ) = delete ;
803 SharedAllocationRecord & operator = (
const SharedAllocationRecord & ) = delete ;
805 static void deallocate( RecordBase * );
807 static RecordBase s_root_record ;
809 ::cudaTextureObject_t m_tex_obj ;
810 const Kokkos::CudaUVMSpace m_space ;
814 ~SharedAllocationRecord();
815 SharedAllocationRecord() : RecordBase(), m_tex_obj(0), m_space() {}
817 SharedAllocationRecord(
const Kokkos::CudaUVMSpace & arg_space
818 ,
const std::string & arg_label
819 ,
const size_t arg_alloc_size
820 ,
const RecordBase::function_type arg_dealloc = & deallocate
825 std::string get_label()
const ;
827 static SharedAllocationRecord * allocate(
const Kokkos::CudaUVMSpace & arg_space
828 ,
const std::string & arg_label
829 ,
const size_t arg_alloc_size
834 void * allocate_tracked(
const Kokkos::CudaUVMSpace & arg_space
835 ,
const std::string & arg_label
836 ,
const size_t arg_alloc_size );
840 void * reallocate_tracked(
void *
const arg_alloc_ptr
841 ,
const size_t arg_alloc_size );
845 void deallocate_tracked(
void *
const arg_alloc_ptr );
847 static SharedAllocationRecord * get_record(
void * arg_alloc_ptr );
850 template<
typename AliasType >
852 ::cudaTextureObject_t attach_texture_object()
854 static_assert( ( std::is_same< AliasType , int >::value ||
855 std::is_same< AliasType , ::int2 >::value ||
856 std::is_same< AliasType , ::int4 >::value )
857 ,
"Cuda texture fetch only supported for alias types of int, ::int2, or ::int4" );
859 if ( m_tex_obj == 0 ) {
860 m_tex_obj = SharedAllocationRecord< Kokkos::CudaSpace , void >::
861 attach_texture_object(
sizeof(AliasType)
862 , (
void*) RecordBase::m_alloc_ptr
863 , RecordBase::m_alloc_size );
869 template<
typename AliasType >
871 int attach_texture_object_offset(
const AliasType *
const ptr )
874 return ptr -
reinterpret_cast<AliasType*
>( RecordBase::m_alloc_ptr );
877 static void print_records( std::ostream & ,
const Kokkos::CudaUVMSpace & ,
bool detail =
false );
881 class SharedAllocationRecord<
Kokkos::CudaHostPinnedSpace , void >
882 :
public SharedAllocationRecord< void , void >
886 typedef SharedAllocationRecord< void , void > RecordBase ;
888 SharedAllocationRecord(
const SharedAllocationRecord & ) = delete ;
889 SharedAllocationRecord & operator = (
const SharedAllocationRecord & ) = delete ;
891 static void deallocate( RecordBase * );
893 static RecordBase s_root_record ;
895 const Kokkos::CudaHostPinnedSpace m_space ;
899 ~SharedAllocationRecord();
900 SharedAllocationRecord() : RecordBase(), m_space() {}
902 SharedAllocationRecord(
const Kokkos::CudaHostPinnedSpace & arg_space
903 ,
const std::string & arg_label
904 ,
const size_t arg_alloc_size
905 ,
const RecordBase::function_type arg_dealloc = & deallocate
910 std::string get_label()
const ;
912 static SharedAllocationRecord * allocate(
const Kokkos::CudaHostPinnedSpace & arg_space
913 ,
const std::string & arg_label
914 ,
const size_t arg_alloc_size
918 void * allocate_tracked(
const Kokkos::CudaHostPinnedSpace & arg_space
919 ,
const std::string & arg_label
920 ,
const size_t arg_alloc_size );
924 void * reallocate_tracked(
void *
const arg_alloc_ptr
925 ,
const size_t arg_alloc_size );
929 void deallocate_tracked(
void *
const arg_alloc_ptr );
932 static SharedAllocationRecord * get_record(
void * arg_alloc_ptr );
934 static void print_records( std::ostream & ,
const Kokkos::CudaHostPinnedSpace & ,
bool detail =
false );
Memory space for main process and CPU execution spaces.
Memory management for host memory.
bool available()
Query if hwloc is available.
Access relationship between DstMemorySpace and SrcMemorySpace.