47 #ifndef KOKKOS_PARALLEL_HPP 48 #define KOKKOS_PARALLEL_HPP 51 #include <Kokkos_Core_fwd.hpp> 52 #include <Kokkos_View.hpp> 53 #include <Kokkos_ExecPolicy.hpp> 55 #if defined(KOKKOS_ENABLE_PROFILING) 56 #include <impl/Kokkos_Profiling_Interface.hpp> 60 #include <impl/Kokkos_Tags.hpp> 61 #include <impl/Kokkos_Traits.hpp> 62 #include <impl/Kokkos_FunctorAnalysis.hpp> 63 #include <impl/Kokkos_FunctorAdapter.hpp> 83 template<
class Functor
88 struct FunctorPolicyExecutionSpace {
89 typedef Kokkos::DefaultExecutionSpace execution_space ;
92 template<
class Functor ,
class Policy >
93 struct FunctorPolicyExecutionSpace
95 , typename enable_if_type< typename Functor::device_type >::type
96 , typename enable_if_type< typename Policy ::execution_space >::type
99 typedef typename Policy ::execution_space execution_space ;
102 template<
class Functor ,
class Policy >
103 struct FunctorPolicyExecutionSpace
105 , typename enable_if_type< typename Functor::execution_space >::type
106 , typename enable_if_type< typename Policy ::execution_space >::type
109 typedef typename Policy ::execution_space execution_space ;
112 template<
class Functor ,
class Policy ,
class EnableFunctor >
113 struct FunctorPolicyExecutionSpace
116 , typename enable_if_type< typename Policy::execution_space >::type
119 typedef typename Policy ::execution_space execution_space ;
122 template<
class Functor ,
class Policy ,
class EnablePolicy >
123 struct FunctorPolicyExecutionSpace
125 , typename enable_if_type< typename Functor::device_type >::type
129 typedef typename Functor::device_type execution_space ;
132 template<
class Functor ,
class Policy ,
class EnablePolicy >
133 struct FunctorPolicyExecutionSpace
135 , typename enable_if_type< typename Functor::execution_space >::type
139 typedef typename Functor::execution_space execution_space ;
171 template<
class ExecPolicy ,
class FunctorType >
174 ,
const FunctorType & functor
175 ,
const std::string& str =
"" 176 ,
typename Impl::enable_if< ! Impl::is_integral< ExecPolicy >::value >::type * = 0
179 #if defined(KOKKOS_ENABLE_PROFILING) 181 if(Kokkos::Profiling::profileLibraryLoaded()) {
182 Kokkos::Profiling::beginParallelFor(
"" == str ?
typeid(FunctorType).name() : str, 0, &kpID);
186 Kokkos::Impl::shared_allocation_tracking_claim_and_disable();
188 Kokkos::Impl::shared_allocation_tracking_release_and_enable();
192 #if defined(KOKKOS_ENABLE_PROFILING) 193 if(Kokkos::Profiling::profileLibraryLoaded()) {
194 Kokkos::Profiling::endParallelFor(kpID);
199 template<
class FunctorType >
202 ,
const FunctorType & functor
203 ,
const std::string& str =
"" 207 Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
211 #if defined(KOKKOS_ENABLE_PROFILING) 213 if(Kokkos::Profiling::profileLibraryLoaded()) {
214 Kokkos::Profiling::beginParallelFor(
"" == str ?
typeid(FunctorType).name() : str, 0, &kpID);
218 Kokkos::Impl::shared_allocation_tracking_claim_and_disable();
220 Kokkos::Impl::shared_allocation_tracking_release_and_enable();
224 #if defined(KOKKOS_ENABLE_PROFILING) 225 if(Kokkos::Profiling::profileLibraryLoaded()) {
226 Kokkos::Profiling::endParallelFor(kpID);
231 template<
class ExecPolicy ,
class FunctorType >
234 ,
const ExecPolicy & policy
235 ,
const FunctorType & functor )
237 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 239 std::cout <<
"KOKKOS_DEBUG Start parallel_for kernel: " << str << std::endl;
244 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 246 std::cout <<
"KOKKOS_DEBUG End parallel_for kernel: " << str << std::endl;
253 #include <Kokkos_Parallel_Reduce.hpp> 413 template<
class ExecutionPolicy ,
class FunctorType >
415 void parallel_scan(
const ExecutionPolicy & policy
416 ,
const FunctorType & functor
417 ,
const std::string& str =
"" 418 ,
typename Impl::enable_if< ! Impl::is_integral< ExecutionPolicy >::value >::type * = 0
421 #if defined(KOKKOS_ENABLE_PROFILING) 423 if(Kokkos::Profiling::profileLibraryLoaded()) {
424 Kokkos::Profiling::beginParallelScan(
"" == str ?
typeid(FunctorType).name() : str, 0, &kpID);
428 Kokkos::Impl::shared_allocation_tracking_claim_and_disable();
430 Kokkos::Impl::shared_allocation_tracking_release_and_enable();
434 #if defined(KOKKOS_ENABLE_PROFILING) 435 if(Kokkos::Profiling::profileLibraryLoaded()) {
436 Kokkos::Profiling::endParallelScan(kpID);
442 template<
class FunctorType >
444 void parallel_scan(
const size_t work_count
445 ,
const FunctorType & functor
446 ,
const std::string& str =
"" )
449 Kokkos::Impl::FunctorPolicyExecutionSpace< FunctorType , void >::execution_space
454 #if defined(KOKKOS_ENABLE_PROFILING) 456 if(Kokkos::Profiling::profileLibraryLoaded()) {
457 Kokkos::Profiling::beginParallelScan(
"" == str ?
typeid(FunctorType).name() : str, 0, &kpID);
461 Kokkos::Impl::shared_allocation_tracking_claim_and_disable();
463 Kokkos::Impl::shared_allocation_tracking_release_and_enable();
467 #if defined(KOKKOS_ENABLE_PROFILING) 468 if(Kokkos::Profiling::profileLibraryLoaded()) {
469 Kokkos::Profiling::endParallelScan(kpID);
475 template<
class ExecutionPolicy ,
class FunctorType >
477 void parallel_scan(
const std::string& str
478 ,
const ExecutionPolicy & policy
479 ,
const FunctorType & functor)
481 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 483 std::cout <<
"KOKKOS_DEBUG Start parallel_scan kernel: " << str << std::endl;
486 parallel_scan(policy,functor,str);
488 #if KOKKOS_ENABLE_DEBUG_PRINT_KERNEL_NAMES 490 std::cout <<
"KOKKOS_DEBUG End parallel_scan kernel: " << str << std::endl;
503 template<
class FunctorType ,
class Enable =
void >
504 struct FunctorTeamShmemSize
506 KOKKOS_INLINE_FUNCTION
static size_t value(
const FunctorType & ,
int ) {
return 0 ; }
509 template<
class FunctorType >
510 struct FunctorTeamShmemSize< FunctorType , typename Impl::enable_if< 0 < sizeof( & FunctorType::team_shmem_size ) >::type >
512 static inline size_t value(
const FunctorType & f ,
int team_size ) {
return f.team_shmem_size( team_size ) ; }
515 template<
class FunctorType >
516 struct FunctorTeamShmemSize< FunctorType , typename Impl::enable_if< 0 < sizeof( & FunctorType::shmem_size ) >::type >
518 static inline size_t value(
const FunctorType & f ,
int team_size ) {
return f.shmem_size( team_size ) ; }
Implementation detail of parallel_scan.
Implementation of the ParallelFor operator that has a partial specialization for the device...
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< ! Impl::is_integral< ExecPolicy >::value >::type *=0)
Execute functor in parallel according to the execution policy.
Execution policy for work over a range of an integral type.