56 #ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
57 #define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
59 #include "Kokkos_Core.hpp"
60 #include "Kokkos_ArithTraits.hpp"
61 #include "impl/Kokkos_Atomic_Generic.hpp"
66 namespace KokkosRefactor {
82 template<
class IntegerType,
83 const bool isSigned = std::numeric_limits<IntegerType>::is_signed>
85 static KOKKOS_INLINE_FUNCTION
bool
86 test (
const IntegerType x,
87 const IntegerType exclusiveUpperBound);
91 template<
class IntegerType>
93 static KOKKOS_INLINE_FUNCTION
bool
94 test (
const IntegerType x,
95 const IntegerType exclusiveUpperBound)
97 return x < static_cast<IntegerType> (0) || x >= exclusiveUpperBound;
102 template<
class IntegerType>
103 struct OutOfBounds<IntegerType, false> {
104 static KOKKOS_INLINE_FUNCTION
bool
105 test (
const IntegerType x,
106 const IntegerType exclusiveUpperBound)
108 return x >= exclusiveUpperBound;
114 template<
class IntegerType>
115 KOKKOS_INLINE_FUNCTION
bool
116 outOfBounds (
const IntegerType x,
const IntegerType exclusiveUpperBound)
126 template <
typename DstView,
typename SrcView,
typename IdxView,
127 typename Enabled =
void>
128 struct PackArraySingleColumn {
129 typedef typename DstView::execution_space execution_space;
130 typedef typename execution_space::size_type size_type;
137 PackArraySingleColumn (
const DstView& dst_,
141 dst(dst_), src(src_), idx(idx_), col(col_) {}
143 KOKKOS_INLINE_FUNCTION
void
144 operator() (
const size_type k)
const {
145 dst(k) = src(idx(k), col);
149 pack (
const DstView& dst,
154 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
156 (
"Tpetra::MultiVector pack one col",
157 range_type (0, idx.size ()),
158 PackArraySingleColumn (dst, src, idx, col));
162 template <
typename DstView,
165 typename SizeType =
typename DstView::execution_space::size_type,
166 typename Enabled =
void>
167 class PackArraySingleColumnWithBoundsCheck {
169 static_assert (Kokkos::Impl::is_view<DstView>::value,
170 "DstView must be a Kokkos::View.");
171 static_assert (Kokkos::Impl::is_view<SrcView>::value,
172 "SrcView must be a Kokkos::View.");
173 static_assert (Kokkos::Impl::is_view<IdxView>::value,
174 "IdxView must be a Kokkos::View.");
175 static_assert (
static_cast<int> (DstView::rank) == 1,
176 "DstView must be a rank-1 Kokkos::View.");
177 static_assert (
static_cast<int> (SrcView::rank) == 2,
178 "SrcView must be a rank-2 Kokkos::View.");
179 static_assert (
static_cast<int> (IdxView::rank) == 1,
180 "IdxView must be a rank-1 Kokkos::View.");
181 static_assert (std::is_integral<SizeType>::value,
182 "SizeType must be a built-in integer type.");
184 typedef SizeType size_type;
185 using value_type = size_t;
194 PackArraySingleColumnWithBoundsCheck (
const DstView& dst_,
197 const size_type col_) :
198 dst (dst_), src (src_), idx (idx_), col (col_) {}
200 KOKKOS_INLINE_FUNCTION
void
201 operator() (
const size_type k, value_type& lclErrCount)
const {
202 using index_type =
typename IdxView::non_const_value_type;
204 const index_type lclRow = idx(k);
205 if (lclRow <
static_cast<index_type
> (0) ||
206 lclRow >=
static_cast<index_type
> (src.extent (0))) {
210 dst(k) = src(lclRow, col);
214 KOKKOS_INLINE_FUNCTION
215 void init (value_type& initialErrorCount)
const {
216 initialErrorCount = 0;
219 KOKKOS_INLINE_FUNCTION
void
220 join (
volatile value_type& dstErrorCount,
221 const volatile value_type& srcErrorCount)
const
223 dstErrorCount += srcErrorCount;
227 pack (
const DstView& dst,
232 typedef typename DstView::execution_space execution_space;
233 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
234 typedef typename IdxView::non_const_value_type index_type;
236 size_t errorCount = 0;
237 Kokkos::parallel_reduce
238 (
"Tpetra::MultiVector pack one col debug only",
239 range_type (0, idx.size ()),
240 PackArraySingleColumnWithBoundsCheck (dst, src, idx, col),
243 if (errorCount != 0) {
247 auto idx_h = Kokkos::create_mirror_view (idx);
250 std::vector<index_type> badIndices;
251 const size_type numInds = idx_h.extent (0);
252 for (size_type k = 0; k < numInds; ++k) {
253 if (idx_h(k) <
static_cast<index_type
> (0) ||
254 idx_h(k) >=
static_cast<index_type
> (src.extent (0))) {
255 badIndices.push_back (idx_h(k));
259 TEUCHOS_TEST_FOR_EXCEPTION
260 (errorCount != badIndices.size (), std::logic_error,
261 "PackArraySingleColumnWithBoundsCheck: errorCount = " << errorCount
262 <<
" != badIndices.size() = " << badIndices.size () <<
". This sho"
263 "uld never happen. Please report this to the Tpetra developers.");
265 std::ostringstream os;
266 os <<
"MultiVector single-column pack kernel had "
267 << badIndices.size () <<
" out-of bounds index/ices. "
269 for (
size_t k = 0; k < badIndices.size (); ++k) {
271 if (k + 1 < badIndices.size ()) {
276 throw std::runtime_error (os.str ());
282 template <
typename DstView,
typename SrcView,
typename IdxView>
284 pack_array_single_column (
const DstView& dst,
288 const bool debug =
true)
290 static_assert (Kokkos::Impl::is_view<DstView>::value,
291 "DstView must be a Kokkos::View.");
292 static_assert (Kokkos::Impl::is_view<SrcView>::value,
293 "SrcView must be a Kokkos::View.");
294 static_assert (Kokkos::Impl::is_view<IdxView>::value,
295 "IdxView must be a Kokkos::View.");
296 static_assert (
static_cast<int> (DstView::rank) == 1,
297 "DstView must be a rank-1 Kokkos::View.");
298 static_assert (
static_cast<int> (SrcView::rank) == 2,
299 "SrcView must be a rank-2 Kokkos::View.");
300 static_assert (
static_cast<int> (IdxView::rank) == 1,
301 "IdxView must be a rank-1 Kokkos::View.");
304 typedef PackArraySingleColumnWithBoundsCheck<DstView,SrcView,IdxView> impl_type;
305 impl_type::pack (dst, src, idx, col);
308 typedef PackArraySingleColumn<DstView,SrcView,IdxView> impl_type;
309 impl_type::pack (dst, src, idx, col);
313 template <
typename DstView,
typename SrcView,
typename IdxView,
314 typename Enabled =
void>
315 struct PackArrayMultiColumn {
316 typedef typename DstView::execution_space execution_space;
317 typedef typename execution_space::size_type size_type;
324 PackArrayMultiColumn (
const DstView& dst_,
327 const size_t numCols_) :
328 dst(dst_), src(src_), idx(idx_), numCols(numCols_) {}
330 KOKKOS_INLINE_FUNCTION
void
331 operator() (
const size_type k)
const {
332 const typename IdxView::value_type localRow = idx(k);
333 const size_t offset = k*numCols;
334 for (
size_t j = 0; j < numCols; ++j) {
335 dst(offset + j) = src(localRow, j);
339 static void pack(
const DstView& dst,
343 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
345 (
"Tpetra::MultiVector pack multicol const stride",
346 range_type (0, idx.size ()),
347 PackArrayMultiColumn (dst, src, idx, numCols));
351 template <
typename DstView,
354 typename SizeType =
typename DstView::execution_space::size_type,
355 typename Enabled =
void>
356 class PackArrayMultiColumnWithBoundsCheck {
358 using size_type = SizeType;
359 using value_type = size_t;
368 PackArrayMultiColumnWithBoundsCheck (
const DstView& dst_,
371 const size_type numCols_) :
372 dst (dst_), src (src_), idx (idx_), numCols (numCols_) {}
374 KOKKOS_INLINE_FUNCTION
void
375 operator() (
const size_type k, value_type& lclErrorCount)
const {
376 typedef typename IdxView::non_const_value_type index_type;
378 const index_type lclRow = idx(k);
379 if (lclRow <
static_cast<index_type
> (0) ||
380 lclRow >=
static_cast<index_type
> (src.extent (0))) {
384 const size_type offset = k*numCols;
385 for (size_type j = 0; j < numCols; ++j) {
386 dst(offset + j) = src(lclRow, j);
391 KOKKOS_INLINE_FUNCTION
392 void init (value_type& initialErrorCount)
const {
393 initialErrorCount = 0;
396 KOKKOS_INLINE_FUNCTION
void
397 join (
volatile value_type& dstErrorCount,
398 const volatile value_type& srcErrorCount)
const
400 dstErrorCount += srcErrorCount;
404 pack (
const DstView& dst,
407 const size_type numCols)
409 typedef typename DstView::execution_space execution_space;
410 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
411 typedef typename IdxView::non_const_value_type index_type;
413 size_t errorCount = 0;
414 Kokkos::parallel_reduce
415 (
"Tpetra::MultiVector pack multicol const stride debug only",
416 range_type (0, idx.size ()),
417 PackArrayMultiColumnWithBoundsCheck (dst, src, idx, numCols),
419 if (errorCount != 0) {
423 auto idx_h = Kokkos::create_mirror_view (idx);
426 std::vector<index_type> badIndices;
427 const size_type numInds = idx_h.extent (0);
428 for (size_type k = 0; k < numInds; ++k) {
429 if (idx_h(k) <
static_cast<index_type
> (0) ||
430 idx_h(k) >=
static_cast<index_type
> (src.extent (0))) {
431 badIndices.push_back (idx_h(k));
435 TEUCHOS_TEST_FOR_EXCEPTION
436 (errorCount != badIndices.size (), std::logic_error,
437 "PackArrayMultiColumnWithBoundsCheck: errorCount = " << errorCount
438 <<
" != badIndices.size() = " << badIndices.size () <<
". This sho"
439 "uld never happen. Please report this to the Tpetra developers.");
441 std::ostringstream os;
442 os <<
"Tpetra::MultiVector multiple-column pack kernel had "
443 << badIndices.size () <<
" out-of bounds index/ices (errorCount = "
444 << errorCount <<
"): [";
445 for (
size_t k = 0; k < badIndices.size (); ++k) {
447 if (k + 1 < badIndices.size ()) {
452 throw std::runtime_error (os.str ());
458 template <
typename DstView,
462 pack_array_multi_column (
const DstView& dst,
465 const size_t numCols,
466 const bool debug =
true)
468 static_assert (Kokkos::Impl::is_view<DstView>::value,
469 "DstView must be a Kokkos::View.");
470 static_assert (Kokkos::Impl::is_view<SrcView>::value,
471 "SrcView must be a Kokkos::View.");
472 static_assert (Kokkos::Impl::is_view<IdxView>::value,
473 "IdxView must be a Kokkos::View.");
474 static_assert (
static_cast<int> (DstView::rank) == 1,
475 "DstView must be a rank-1 Kokkos::View.");
476 static_assert (
static_cast<int> (SrcView::rank) == 2,
477 "SrcView must be a rank-2 Kokkos::View.");
478 static_assert (
static_cast<int> (IdxView::rank) == 1,
479 "IdxView must be a rank-1 Kokkos::View.");
482 typedef PackArrayMultiColumnWithBoundsCheck<DstView,
483 SrcView, IdxView> impl_type;
484 impl_type::pack (dst, src, idx, numCols);
487 typedef PackArrayMultiColumn<DstView, SrcView, IdxView> impl_type;
488 impl_type::pack (dst, src, idx, numCols);
492 template <
typename DstView,
typename SrcView,
typename IdxView,
493 typename ColView,
typename Enabled =
void>
494 struct PackArrayMultiColumnVariableStride {
495 typedef typename DstView::execution_space execution_space;
496 typedef typename execution_space::size_type size_type;
504 PackArrayMultiColumnVariableStride (
const DstView& dst_,
508 const size_t numCols_) :
509 dst(dst_), src(src_), idx(idx_), col(col_), numCols(numCols_) {}
511 KOKKOS_INLINE_FUNCTION
512 void operator() (
const size_type k)
const {
513 const typename IdxView::value_type localRow = idx(k);
514 const size_t offset = k*numCols;
515 for (
size_t j = 0; j < numCols; ++j) {
516 dst(offset + j) = src(localRow, col(j));
520 static void pack(
const DstView& dst,
525 typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
527 (
"Tpetra::MultiVector pack multicol var stride",
528 range_type (0, idx.size ()),
529 PackArrayMultiColumnVariableStride (dst, src, idx, col, numCols));
533 template <
typename DstView,
537 typename SizeType =
typename DstView::execution_space::size_type,
538 typename Enabled =
void>
539 class PackArrayMultiColumnVariableStrideWithBoundsCheck {
541 using size_type = SizeType;
542 using value_type = size_t;
552 PackArrayMultiColumnVariableStrideWithBoundsCheck (
const DstView& dst_,
556 const size_type numCols_) :
557 dst (dst_), src (src_), idx (idx_), col (col_), numCols (numCols_) {}
559 KOKKOS_INLINE_FUNCTION
void
560 operator() (
const size_type k, value_type& lclErrorCount)
const {
561 typedef typename IdxView::non_const_value_type row_index_type;
562 typedef typename ColView::non_const_value_type col_index_type;
564 const row_index_type lclRow = idx(k);
565 if (lclRow <
static_cast<row_index_type
> (0) ||
566 lclRow >=
static_cast<row_index_type
> (src.extent (0))) {
570 const size_type offset = k*numCols;
571 for (size_type j = 0; j < numCols; ++j) {
572 const col_index_type lclCol = col(j);
573 if (Impl::outOfBounds<col_index_type> (lclCol, src.extent (1))) {
577 dst(offset + j) = src(lclRow, lclCol);
583 KOKKOS_INLINE_FUNCTION
void
584 init (value_type& initialErrorCount)
const {
585 initialErrorCount = 0;
588 KOKKOS_INLINE_FUNCTION
void
589 join (
volatile value_type& dstErrorCount,
590 const volatile value_type& srcErrorCount)
const
592 dstErrorCount += srcErrorCount;
596 pack (
const DstView& dst,
600 const size_type numCols)
602 using execution_space =
typename DstView::execution_space;
603 using range_type = Kokkos::RangePolicy<execution_space, size_type>;
604 using row_index_type =
typename IdxView::non_const_value_type;
605 using col_index_type =
typename ColView::non_const_value_type;
607 size_t errorCount = 0;
608 Kokkos::parallel_reduce
609 (
"Tpetra::MultiVector pack multicol var stride debug only",
610 range_type (0, idx.size ()),
611 PackArrayMultiColumnVariableStrideWithBoundsCheck (dst, src, idx,
614 if (errorCount != 0) {
615 constexpr
size_t maxNumBadIndicesToPrint = 100;
617 std::ostringstream os;
618 os <<
"Tpetra::MultiVector multicolumn variable stride pack kernel "
619 "found " << errorCount
620 <<
" error" << (errorCount != size_t (1) ?
"s" :
"") <<
". ";
625 auto idx_h = Kokkos::create_mirror_view (idx);
628 std::vector<row_index_type> badRows;
629 const size_type numRowInds = idx_h.extent (0);
630 for (size_type k = 0; k < numRowInds; ++k) {
631 if (Impl::outOfBounds<row_index_type> (idx_h(k), src.extent (0))) {
632 badRows.push_back (idx_h(k));
636 if (badRows.size () != 0) {
637 os << badRows.size () <<
" out-of-bounds row ind"
638 << (badRows.size () != size_t (1) ?
"ices" :
"ex");
639 if (badRows.size () <= maxNumBadIndicesToPrint) {
641 for (
size_t k = 0; k < badRows.size (); ++k) {
643 if (k + 1 < badRows.size ()) {
654 os <<
"No out-of-bounds row indices. ";
659 auto col_h = Kokkos::create_mirror_view (col);
662 std::vector<col_index_type> badCols;
663 const size_type numColInds = col_h.extent (0);
664 for (size_type k = 0; k < numColInds; ++k) {
665 if (Impl::outOfBounds<col_index_type> (col_h(k), src.extent (1))) {
666 badCols.push_back (col_h(k));
670 if (badCols.size () != 0) {
671 os << badCols.size () <<
" out-of-bounds column ind"
672 << (badCols.size () != size_t (1) ?
"ices" :
"ex");
673 if (badCols.size () <= maxNumBadIndicesToPrint) {
675 for (
size_t k = 0; k < badCols.size (); ++k) {
677 if (k + 1 < badCols.size ()) {
688 os <<
"No out-of-bounds column indices. ";
691 TEUCHOS_TEST_FOR_EXCEPTION
692 (errorCount != 0 && badRows.size () == 0 && badCols.size () == 0,
693 std::logic_error,
"Tpetra::MultiVector variable stride pack "
694 "kernel reports errorCount=" << errorCount <<
", but we failed "
695 "to find any bad rows or columns. This should never happen. "
696 "Please report this to the Tpetra developers.");
698 throw std::runtime_error (os.str ());
703 template <
typename DstView,
708 pack_array_multi_column_variable_stride (
const DstView& dst,
712 const size_t numCols,
713 const bool debug =
true)
715 static_assert (Kokkos::Impl::is_view<DstView>::value,
716 "DstView must be a Kokkos::View.");
717 static_assert (Kokkos::Impl::is_view<SrcView>::value,
718 "SrcView must be a Kokkos::View.");
719 static_assert (Kokkos::Impl::is_view<IdxView>::value,
720 "IdxView must be a Kokkos::View.");
721 static_assert (Kokkos::Impl::is_view<ColView>::value,
722 "ColView must be a Kokkos::View.");
723 static_assert (
static_cast<int> (DstView::rank) == 1,
724 "DstView must be a rank-1 Kokkos::View.");
725 static_assert (
static_cast<int> (SrcView::rank) == 2,
726 "SrcView must be a rank-2 Kokkos::View.");
727 static_assert (
static_cast<int> (IdxView::rank) == 1,
728 "IdxView must be a rank-1 Kokkos::View.");
729 static_assert (
static_cast<int> (ColView::rank) == 1,
730 "ColView must be a rank-1 Kokkos::View.");
733 typedef PackArrayMultiColumnVariableStrideWithBoundsCheck<DstView,
734 SrcView, IdxView, ColView> impl_type;
735 impl_type::pack (dst, src, idx, col, numCols);
738 typedef PackArrayMultiColumnVariableStride<DstView,
739 SrcView, IdxView, ColView> impl_type;
740 impl_type::pack (dst, src, idx, col, numCols);
746 struct atomic_tag {};
747 struct nonatomic_tag {};
751 KOKKOS_INLINE_FUNCTION
752 void operator() (atomic_tag, SC& dest,
const SC& src)
const {
753 Kokkos::atomic_add (&dest, src);
757 KOKKOS_INLINE_FUNCTION
758 void operator() (nonatomic_tag, SC& dest,
const SC& src)
const {
769 KOKKOS_INLINE_FUNCTION
770 void operator() (atomic_tag, SC& dest,
const SC& src)
const {
775 KOKKOS_INLINE_FUNCTION
776 void operator() (nonatomic_tag, SC& dest,
const SC& src)
const {
782 template<
class Scalar1,
class Scalar2>
784 KOKKOS_INLINE_FUNCTION
785 static Scalar1 apply(
const Scalar1& val1,
const Scalar2& val2) {
786 const auto val1_abs = Kokkos::ArithTraits<Scalar1>::abs(val1);
787 const auto val2_abs = Kokkos::ArithTraits<Scalar2>::abs(val2);
788 return val1_abs > val2_abs ? Scalar1(val1_abs) : Scalar1(val2_abs);
793 template <
typename SC>
794 KOKKOS_INLINE_FUNCTION
795 void operator() (atomic_tag, SC& dest,
const SC& src)
const {
796 Kokkos::Impl::atomic_fetch_oper (AbsMaxOper<SC, SC> (), &dest, src);
799 template <
typename SC>
800 KOKKOS_INLINE_FUNCTION
801 void operator() (nonatomic_tag, SC& dest,
const SC& src)
const {
802 dest = AbsMaxOper<SC, SC> ().apply (dest, src);
806 template <
typename ExecutionSpace,
811 typename Enabled =
void>
812 class UnpackArrayMultiColumn {
814 static_assert (Kokkos::Impl::is_view<DstView>::value,
815 "DstView must be a Kokkos::View.");
816 static_assert (Kokkos::Impl::is_view<SrcView>::value,
817 "SrcView must be a Kokkos::View.");
818 static_assert (Kokkos::Impl::is_view<IdxView>::value,
819 "IdxView must be a Kokkos::View.");
820 static_assert (
static_cast<int> (DstView::rank) == 2,
821 "DstView must be a rank-2 Kokkos::View.");
822 static_assert (
static_cast<int> (SrcView::rank) == 1,
823 "SrcView must be a rank-1 Kokkos::View.");
824 static_assert (
static_cast<int> (IdxView::rank) == 1,
825 "IdxView must be a rank-1 Kokkos::View.");
828 typedef typename ExecutionSpace::execution_space execution_space;
829 typedef typename execution_space::size_type size_type;
839 UnpackArrayMultiColumn (
const ExecutionSpace& ,
844 const size_t numCols_) :
852 template<
class TagType>
853 KOKKOS_INLINE_FUNCTION
void
854 operator() (TagType tag,
const size_type k)
const
857 (std::is_same<TagType, atomic_tag>::value ||
858 std::is_same<TagType, nonatomic_tag>::value,
859 "TagType must be atomic_tag or nonatomic_tag.");
861 const typename IdxView::value_type localRow = idx(k);
862 const size_t offset = k*numCols;
863 for (
size_t j = 0; j < numCols; ++j) {
864 op (tag, dst(localRow, j), src(offset+j));
869 unpack (
const ExecutionSpace& execSpace,
874 const size_t numCols,
875 const bool use_atomic_updates)
877 if (use_atomic_updates) {
879 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
881 (
"Tpetra::MultiVector unpack const stride atomic",
882 range_type (0, idx.size ()),
883 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
887 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
889 (
"Tpetra::MultiVector unpack const stride nonatomic",
890 range_type (0, idx.size ()),
891 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
896 template <
typename ExecutionSpace,
901 typename SizeType =
typename ExecutionSpace::execution_space::size_type,
902 typename Enabled =
void>
903 class UnpackArrayMultiColumnWithBoundsCheck {
905 static_assert (Kokkos::Impl::is_view<DstView>::value,
906 "DstView must be a Kokkos::View.");
907 static_assert (Kokkos::Impl::is_view<SrcView>::value,
908 "SrcView must be a Kokkos::View.");
909 static_assert (Kokkos::Impl::is_view<IdxView>::value,
910 "IdxView must be a Kokkos::View.");
911 static_assert (
static_cast<int> (DstView::rank) == 2,
912 "DstView must be a rank-2 Kokkos::View.");
913 static_assert (
static_cast<int> (SrcView::rank) == 1,
914 "SrcView must be a rank-1 Kokkos::View.");
915 static_assert (
static_cast<int> (IdxView::rank) == 1,
916 "IdxView must be a rank-1 Kokkos::View.");
917 static_assert (std::is_integral<SizeType>::value,
918 "SizeType must be a built-in integer type.");
921 using execution_space =
typename ExecutionSpace::execution_space;
922 using size_type = SizeType;
923 using value_type = size_t;
933 UnpackArrayMultiColumnWithBoundsCheck (
const ExecutionSpace& ,
938 const size_type numCols_) :
946 template<
class TagType>
947 KOKKOS_INLINE_FUNCTION
void
948 operator() (TagType tag,
950 size_t& lclErrCount)
const
953 (std::is_same<TagType, atomic_tag>::value ||
954 std::is_same<TagType, nonatomic_tag>::value,
955 "TagType must be atomic_tag or nonatomic_tag.");
956 using index_type =
typename IdxView::non_const_value_type;
958 const index_type lclRow = idx(k);
959 if (lclRow <
static_cast<index_type
> (0) ||
960 lclRow >=
static_cast<index_type
> (dst.extent (0))) {
964 const size_type offset = k*numCols;
965 for (size_type j = 0; j < numCols; ++j) {
966 op (tag, dst(lclRow,j), src(offset+j));
971 template<
class TagType>
972 KOKKOS_INLINE_FUNCTION
void
973 init (TagType,
size_t& initialErrorCount)
const {
974 initialErrorCount = 0;
977 template<
class TagType>
978 KOKKOS_INLINE_FUNCTION
void
980 volatile size_t& dstErrorCount,
981 const volatile size_t& srcErrorCount)
const
983 dstErrorCount += srcErrorCount;
987 unpack (
const ExecutionSpace& execSpace,
992 const size_type numCols,
993 const bool use_atomic_updates)
995 using index_type =
typename IdxView::non_const_value_type;
997 size_t errorCount = 0;
998 if (use_atomic_updates) {
1000 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1001 Kokkos::parallel_reduce
1002 (
"Tpetra::MultiVector unpack multicol const stride atomic debug only",
1003 range_type (0, idx.size ()),
1004 UnpackArrayMultiColumnWithBoundsCheck (execSpace, dst, src,
1010 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1011 Kokkos::parallel_reduce
1012 (
"Tpetra::MultiVector unpack multicol const stride nonatomic debug only",
1013 range_type (0, idx.size ()),
1014 UnpackArrayMultiColumnWithBoundsCheck (execSpace, dst, src,
1019 if (errorCount != 0) {
1023 auto idx_h = Kokkos::create_mirror_view (idx);
1026 std::vector<index_type> badIndices;
1027 const size_type numInds = idx_h.extent (0);
1028 for (size_type k = 0; k < numInds; ++k) {
1029 if (idx_h(k) <
static_cast<index_type
> (0) ||
1030 idx_h(k) >=
static_cast<index_type
> (dst.extent (0))) {
1031 badIndices.push_back (idx_h(k));
1035 if (errorCount != badIndices.size ()) {
1036 std::ostringstream os;
1037 os <<
"MultiVector unpack kernel: errorCount = " << errorCount
1038 <<
" != badIndices.size() = " << badIndices.size ()
1039 <<
". This should never happen. "
1040 "Please report this to the Tpetra developers.";
1041 throw std::logic_error (os.str ());
1044 std::ostringstream os;
1045 os <<
"MultiVector unpack kernel had " << badIndices.size ()
1046 <<
" out-of bounds index/ices. Here they are: [";
1047 for (
size_t k = 0; k < badIndices.size (); ++k) {
1048 os << badIndices[k];
1049 if (k + 1 < badIndices.size ()) {
1054 throw std::runtime_error (os.str ());
1059 template <
typename ExecutionSpace,
1065 unpack_array_multi_column (
const ExecutionSpace& execSpace,
1070 const size_t numCols,
1071 const bool use_atomic_updates,
1074 static_assert (Kokkos::Impl::is_view<DstView>::value,
1075 "DstView must be a Kokkos::View.");
1076 static_assert (Kokkos::Impl::is_view<SrcView>::value,
1077 "SrcView must be a Kokkos::View.");
1078 static_assert (Kokkos::Impl::is_view<IdxView>::value,
1079 "IdxView must be a Kokkos::View.");
1080 static_assert (
static_cast<int> (DstView::rank) == 2,
1081 "DstView must be a rank-2 Kokkos::View.");
1082 static_assert (
static_cast<int> (SrcView::rank) == 1,
1083 "SrcView must be a rank-1 Kokkos::View.");
1084 static_assert (
static_cast<int> (IdxView::rank) == 1,
1085 "IdxView must be a rank-1 Kokkos::View.");
1088 typedef UnpackArrayMultiColumnWithBoundsCheck<ExecutionSpace,
1089 DstView, SrcView, IdxView, Op> impl_type;
1090 impl_type::unpack (execSpace, dst, src, idx, op, numCols,
1091 use_atomic_updates);
1094 typedef UnpackArrayMultiColumn<ExecutionSpace,
1095 DstView, SrcView, IdxView, Op> impl_type;
1096 impl_type::unpack (execSpace, dst, src, idx, op, numCols,
1097 use_atomic_updates);
1101 template <
typename ExecutionSpace,
1107 typename Enabled =
void>
1108 class UnpackArrayMultiColumnVariableStride {
1110 static_assert (Kokkos::Impl::is_view<DstView>::value,
1111 "DstView must be a Kokkos::View.");
1112 static_assert (Kokkos::Impl::is_view<SrcView>::value,
1113 "SrcView must be a Kokkos::View.");
1114 static_assert (Kokkos::Impl::is_view<IdxView>::value,
1115 "IdxView must be a Kokkos::View.");
1116 static_assert (Kokkos::Impl::is_view<ColView>::value,
1117 "ColView must be a Kokkos::View.");
1118 static_assert (
static_cast<int> (DstView::rank) == 2,
1119 "DstView must be a rank-2 Kokkos::View.");
1120 static_assert (
static_cast<int> (SrcView::rank) == 1,
1121 "SrcView must be a rank-1 Kokkos::View.");
1122 static_assert (
static_cast<int> (IdxView::rank) == 1,
1123 "IdxView must be a rank-1 Kokkos::View.");
1124 static_assert (
static_cast<int> (ColView::rank) == 1,
1125 "ColView must be a rank-1 Kokkos::View.");
1128 using execution_space =
typename ExecutionSpace::execution_space;
1129 using size_type =
typename execution_space::size_type;
1140 UnpackArrayMultiColumnVariableStride (
const ExecutionSpace& ,
1141 const DstView& dst_,
1142 const SrcView& src_,
1143 const IdxView& idx_,
1144 const ColView& col_,
1146 const size_t numCols_) :
1155 template<
class TagType>
1156 KOKKOS_INLINE_FUNCTION
void
1157 operator() (TagType tag,
const size_type k)
const
1160 (std::is_same<TagType, atomic_tag>::value ||
1161 std::is_same<TagType, nonatomic_tag>::value,
1162 "TagType must be atomic_tag or nonatomic_tag.");
1164 const typename IdxView::value_type localRow = idx(k);
1165 const size_t offset = k*numCols;
1166 for (
size_t j = 0; j < numCols; ++j) {
1167 op (tag, dst(localRow, col(j)), src(offset+j));
1172 unpack (
const ExecutionSpace& execSpace,
1178 const size_t numCols,
1179 const bool use_atomic_updates)
1181 if (use_atomic_updates) {
1183 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1184 Kokkos::parallel_for
1185 (
"Tpetra::MultiVector unpack var stride atomic",
1186 range_type (0, idx.size ()),
1187 UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
1188 idx, col, op, numCols));
1192 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1193 Kokkos::parallel_for
1194 (
"Tpetra::MultiVector unpack var stride nonatomic",
1195 range_type (0, idx.size ()),
1196 UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
1197 idx, col, op, numCols));
1202 template <
typename ExecutionSpace,
1208 typename SizeType =
typename ExecutionSpace::execution_space::size_type,
1209 typename Enabled =
void>
1210 class UnpackArrayMultiColumnVariableStrideWithBoundsCheck {
1212 static_assert (Kokkos::Impl::is_view<DstView>::value,
1213 "DstView must be a Kokkos::View.");
1214 static_assert (Kokkos::Impl::is_view<SrcView>::value,
1215 "SrcView must be a Kokkos::View.");
1216 static_assert (Kokkos::Impl::is_view<IdxView>::value,
1217 "IdxView must be a Kokkos::View.");
1218 static_assert (Kokkos::Impl::is_view<ColView>::value,
1219 "ColView must be a Kokkos::View.");
1220 static_assert (
static_cast<int> (DstView::rank) == 2,
1221 "DstView must be a rank-2 Kokkos::View.");
1222 static_assert (
static_cast<int> (SrcView::rank) == 1,
1223 "SrcView must be a rank-1 Kokkos::View.");
1224 static_assert (
static_cast<int> (IdxView::rank) == 1,
1225 "IdxView must be a rank-1 Kokkos::View.");
1226 static_assert (
static_cast<int> (ColView::rank) == 1,
1227 "ColView must be a rank-1 Kokkos::View.");
1228 static_assert (std::is_integral<SizeType>::value,
1229 "SizeType must be a built-in integer type.");
1232 using execution_space =
typename ExecutionSpace::execution_space;
1233 using size_type = SizeType;
1234 using value_type = size_t;
1245 UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1246 (
const ExecutionSpace& ,
1247 const DstView& dst_,
1248 const SrcView& src_,
1249 const IdxView& idx_,
1250 const ColView& col_,
1252 const size_t numCols_) :
1261 template<
class TagType>
1262 KOKKOS_INLINE_FUNCTION
void
1263 operator() (TagType tag,
1265 value_type& lclErrorCount)
const
1268 (std::is_same<TagType, atomic_tag>::value ||
1269 std::is_same<TagType, nonatomic_tag>::value,
1270 "TagType must be atomic_tag or nonatomic_tag.");
1271 using row_index_type =
typename IdxView::non_const_value_type;
1272 using col_index_type =
typename ColView::non_const_value_type;
1274 const row_index_type lclRow = idx(k);
1275 if (lclRow <
static_cast<row_index_type
> (0) ||
1276 lclRow >=
static_cast<row_index_type
> (dst.extent (0))) {
1280 const size_type offset = k * numCols;
1281 for (size_type j = 0; j < numCols; ++j) {
1282 const col_index_type lclCol = col(j);
1283 if (Impl::outOfBounds<col_index_type> (lclCol, dst.extent (1))) {
1287 op (tag, dst(lclRow, col(j)), src(offset+j));
1293 KOKKOS_INLINE_FUNCTION
void
1294 init (value_type& initialErrorCount)
const {
1295 initialErrorCount = 0;
1298 KOKKOS_INLINE_FUNCTION
void
1299 join (
volatile value_type& dstErrorCount,
1300 const volatile value_type& srcErrorCount)
const
1302 dstErrorCount += srcErrorCount;
1306 unpack (
const ExecutionSpace& execSpace,
1312 const size_type numCols,
1313 const bool use_atomic_updates)
1315 using row_index_type =
typename IdxView::non_const_value_type;
1316 using col_index_type =
typename ColView::non_const_value_type;
1318 size_t errorCount = 0;
1319 if (use_atomic_updates) {
1321 Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1322 Kokkos::parallel_reduce
1323 (
"Tpetra::MultiVector unpack var stride atomic debug only",
1324 range_type (0, idx.size ()),
1325 UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1326 (execSpace, dst, src, idx, col, op, numCols),
1331 Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1332 Kokkos::parallel_reduce
1333 (
"Tpetra::MultiVector unpack var stride nonatomic debug only",
1334 range_type (0, idx.size ()),
1335 UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1336 (execSpace, dst, src, idx, col, op, numCols),
1340 if (errorCount != 0) {
1341 constexpr
size_t maxNumBadIndicesToPrint = 100;
1343 std::ostringstream os;
1344 os <<
"Tpetra::MultiVector multicolumn variable stride unpack kernel "
1345 "found " << errorCount
1346 <<
" error" << (errorCount != size_t (1) ?
"s" :
"") <<
". ";
1352 auto idx_h = Kokkos::create_mirror_view (idx);
1355 std::vector<row_index_type> badRows;
1356 const size_type numRowInds = idx_h.extent (0);
1357 for (size_type k = 0; k < numRowInds; ++k) {
1358 if (idx_h(k) <
static_cast<row_index_type
> (0) ||
1359 idx_h(k) >=
static_cast<row_index_type
> (dst.extent (0))) {
1360 badRows.push_back (idx_h(k));
1364 if (badRows.size () != 0) {
1365 os << badRows.size () <<
" out-of-bounds row ind"
1366 << (badRows.size () != size_t (1) ?
"ices" :
"ex");
1367 if (badRows.size () <= maxNumBadIndicesToPrint) {
1369 for (
size_t k = 0; k < badRows.size (); ++k) {
1371 if (k + 1 < badRows.size ()) {
1382 os <<
"No out-of-bounds row indices. ";
1387 auto col_h = Kokkos::create_mirror_view (col);
1390 std::vector<col_index_type> badCols;
1391 const size_type numColInds = col_h.extent (0);
1392 for (size_type k = 0; k < numColInds; ++k) {
1393 if (Impl::outOfBounds<col_index_type> (col_h(k), dst.extent (1))) {
1394 badCols.push_back (col_h(k));
1398 if (badCols.size () != 0) {
1399 os << badCols.size () <<
" out-of-bounds column ind"
1400 << (badCols.size () != size_t (1) ?
"ices" :
"ex");
1401 if (badCols.size () <= maxNumBadIndicesToPrint) {
1402 for (
size_t k = 0; k < badCols.size (); ++k) {
1405 if (k + 1 < badCols.size ()) {
1416 os <<
"No out-of-bounds column indices. ";
1419 TEUCHOS_TEST_FOR_EXCEPTION
1420 (errorCount != 0 && badRows.size () == 0 && badCols.size () == 0,
1421 std::logic_error,
"Tpetra::MultiVector variable stride unpack "
1422 "kernel reports errorCount=" << errorCount <<
", but we failed "
1423 "to find any bad rows or columns. This should never happen. "
1424 "Please report this to the Tpetra developers.");
1426 throw std::runtime_error (os.str ());
1431 template <
typename ExecutionSpace,
1438 unpack_array_multi_column_variable_stride (
const ExecutionSpace& execSpace,
1444 const size_t numCols,
1445 const bool use_atomic_updates,
1448 static_assert (Kokkos::Impl::is_view<DstView>::value,
1449 "DstView must be a Kokkos::View.");
1450 static_assert (Kokkos::Impl::is_view<SrcView>::value,
1451 "SrcView must be a Kokkos::View.");
1452 static_assert (Kokkos::Impl::is_view<IdxView>::value,
1453 "IdxView must be a Kokkos::View.");
1454 static_assert (Kokkos::Impl::is_view<ColView>::value,
1455 "ColView must be a Kokkos::View.");
1456 static_assert (
static_cast<int> (DstView::rank) == 2,
1457 "DstView must be a rank-2 Kokkos::View.");
1458 static_assert (
static_cast<int> (SrcView::rank) == 1,
1459 "SrcView must be a rank-1 Kokkos::View.");
1460 static_assert (
static_cast<int> (IdxView::rank) == 1,
1461 "IdxView must be a rank-1 Kokkos::View.");
1462 static_assert (
static_cast<int> (ColView::rank) == 1,
1463 "ColView must be a rank-1 Kokkos::View.");
1467 UnpackArrayMultiColumnVariableStrideWithBoundsCheck<ExecutionSpace,
1468 DstView, SrcView, IdxView, ColView, Op>;
1469 impl_type::unpack (execSpace, dst, src, idx, col, op, numCols,
1470 use_atomic_updates);
1473 using impl_type = UnpackArrayMultiColumnVariableStride<ExecutionSpace,
1474 DstView, SrcView, IdxView, ColView, Op>;
1475 impl_type::unpack (execSpace, dst, src, idx, col, op, numCols,
1476 use_atomic_updates);
1480 template <
typename DstView,
typename SrcView,
1481 typename DstIdxView,
typename SrcIdxView,
typename Op,
1482 typename Enabled =
void>
1483 struct PermuteArrayMultiColumn {
1484 typedef typename DstView::execution_space execution_space;
1485 typedef typename execution_space::size_type size_type;
1494 PermuteArrayMultiColumn (
const DstView& dst_,
1495 const SrcView& src_,
1496 const DstIdxView& dst_idx_,
1497 const SrcIdxView& src_idx_,
1498 const size_t numCols_,
1500 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1501 numCols(numCols_), op(op_) {}
1503 KOKKOS_INLINE_FUNCTION
void
1504 operator() (
const size_type k)
const {
1505 const typename DstIdxView::value_type toRow = dst_idx(k);
1506 const typename SrcIdxView::value_type fromRow = src_idx(k);
1508 for (
size_t j = 0; j < numCols; ++j) {
1509 op(tag, dst(toRow, j), src(fromRow, j));
1514 permute (
const DstView& dst,
1516 const DstIdxView& dst_idx,
1517 const SrcIdxView& src_idx,
1518 const size_t numCols,
1522 Kokkos::RangePolicy<execution_space, size_type>;
1524 const size_type n = std::min (dst_idx.size (), src_idx.size ());
1525 Kokkos::parallel_for
1526 (
"Tpetra::MultiVector permute multicol const stride",
1528 PermuteArrayMultiColumn (dst, src, dst_idx, src_idx, numCols, op));
1534 template <
typename DstView,
typename SrcView,
1535 typename DstIdxView,
typename SrcIdxView,
typename Op>
1536 void permute_array_multi_column(
const DstView& dst,
1538 const DstIdxView& dst_idx,
1539 const SrcIdxView& src_idx,
1542 PermuteArrayMultiColumn<DstView,SrcView,DstIdxView,SrcIdxView,Op>::permute(
1543 dst, src, dst_idx, src_idx, numCols, op);
1546 template <
typename DstView,
typename SrcView,
1547 typename DstIdxView,
typename SrcIdxView,
1548 typename DstColView,
typename SrcColView,
typename Op,
1549 typename Enabled =
void>
1550 struct PermuteArrayMultiColumnVariableStride {
1551 typedef typename DstView::execution_space execution_space;
1552 typedef typename execution_space::size_type size_type;
1563 PermuteArrayMultiColumnVariableStride(
const DstView& dst_,
1564 const SrcView& src_,
1565 const DstIdxView& dst_idx_,
1566 const SrcIdxView& src_idx_,
1567 const DstColView& dst_col_,
1568 const SrcColView& src_col_,
1569 const size_t numCols_,
1571 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1572 dst_col(dst_col_), src_col(src_col_),
1573 numCols(numCols_), op(op_) {}
1575 KOKKOS_INLINE_FUNCTION
void
1576 operator() (
const size_type k)
const {
1577 const typename DstIdxView::value_type toRow = dst_idx(k);
1578 const typename SrcIdxView::value_type fromRow = src_idx(k);
1579 const nonatomic_tag tag;
1580 for (
size_t j = 0; j < numCols; ++j) {
1581 op(tag, dst(toRow, dst_col(j)), src(fromRow, src_col(j)));
1586 permute (
const DstView& dst,
1588 const DstIdxView& dst_idx,
1589 const SrcIdxView& src_idx,
1590 const DstColView& dst_col,
1591 const SrcColView& src_col,
1592 const size_t numCols,
1595 using range_type = Kokkos::RangePolicy<execution_space, size_type>;
1596 const size_type n = std::min (dst_idx.size (), src_idx.size ());
1597 Kokkos::parallel_for
1598 (
"Tpetra::MultiVector permute multicol var stride",
1600 PermuteArrayMultiColumnVariableStride (dst, src, dst_idx, src_idx,
1601 dst_col, src_col, numCols, op));
1607 template <
typename DstView,
typename SrcView,
1608 typename DstIdxView,
typename SrcIdxView,
1609 typename DstColView,
typename SrcColView,
typename Op>
1610 void permute_array_multi_column_variable_stride(
const DstView& dst,
1612 const DstIdxView& dst_idx,
1613 const SrcIdxView& src_idx,
1614 const DstColView& dst_col,
1615 const SrcColView& src_col,
1618 PermuteArrayMultiColumnVariableStride<DstView,SrcView,
1619 DstIdxView,SrcIdxView,DstColView,SrcColView,Op>::permute(
1620 dst, src, dst_idx, src_idx, dst_col, src_col, numCols, op);
Implementation details of Tpetra.
KOKKOS_INLINE_FUNCTION bool outOfBounds(const IntegerType x, const IntegerType exclusiveUpperBound)
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...