42 #ifndef TPETRA_MULTIVECTOR_DEF_HPP 43 #define TPETRA_MULTIVECTOR_DEF_HPP 54 #include "Tpetra_Vector.hpp" 55 #include "Tpetra_Details_gathervPrint.hpp" 58 #include "Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp" 60 #include "KokkosCompat_View.hpp" 61 #include "Kokkos_MV_GEMM.hpp" 62 #include "Kokkos_Blas1_MV.hpp" 63 #include "Kokkos_Blas1.hpp" 64 #include "Kokkos_Random.hpp" 66 #ifdef HAVE_TPETRA_INST_FLOAT128 69 template<
class Generator>
70 struct rand<Generator, __float128> {
71 static KOKKOS_INLINE_FUNCTION __float128 max ()
73 return static_cast<__float128
> (1.0);
75 static KOKKOS_INLINE_FUNCTION __float128
80 const __float128 scalingFactor =
81 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
82 static_cast<__float128> (2.0);
83 const __float128 higherOrderTerm =
static_cast<__float128
> (gen.drand ());
84 const __float128 lowerOrderTerm =
85 static_cast<__float128
> (gen.drand ()) * scalingFactor;
86 return higherOrderTerm + lowerOrderTerm;
88 static KOKKOS_INLINE_FUNCTION __float128
89 draw (Generator& gen,
const __float128& range)
92 const __float128 scalingFactor =
93 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
94 static_cast<__float128> (2.0);
95 const __float128 higherOrderTerm =
96 static_cast<__float128
> (gen.drand (range));
97 const __float128 lowerOrderTerm =
98 static_cast<__float128
> (gen.drand (range)) * scalingFactor;
99 return higherOrderTerm + lowerOrderTerm;
101 static KOKKOS_INLINE_FUNCTION __float128
102 draw (Generator& gen,
const __float128& start,
const __float128& end)
105 const __float128 scalingFactor =
106 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
107 static_cast<__float128> (2.0);
108 const __float128 higherOrderTerm =
109 static_cast<__float128
> (gen.drand (start, end));
110 const __float128 lowerOrderTerm =
111 static_cast<__float128
> (gen.drand (start, end)) * scalingFactor;
112 return higherOrderTerm + lowerOrderTerm;
116 #endif // HAVE_TPETRA_INST_FLOAT128 134 template<
class ST,
class LO,
class GO,
class NT>
136 allocDualView (
const size_t lclNumRows,
137 const size_t numCols,
138 const bool zeroOut =
true,
139 const bool allowPadding =
false)
141 using Kokkos::AllowPadding;
142 using Kokkos::view_alloc;
143 using Kokkos::WithoutInitializing;
145 typedef typename dual_view_type::t_dev dev_view_type;
150 const std::string label (
"MV::DualView");
160 dev_view_type d_view;
163 d_view = dev_view_type (view_alloc (label, AllowPadding),
164 lclNumRows, numCols);
167 d_view = dev_view_type (view_alloc (label),
168 lclNumRows, numCols);
173 d_view = dev_view_type (view_alloc (label,
176 lclNumRows, numCols);
179 d_view = dev_view_type (view_alloc (label, WithoutInitializing),
180 lclNumRows, numCols);
182 #ifdef HAVE_TPETRA_DEBUG 190 const ST nan = Kokkos::Details::ArithTraits<ST>::nan ();
191 KokkosBlas::fill (d_view, nan);
192 #endif // HAVE_TPETRA_DEBUG 194 #ifdef HAVE_TPETRA_DEBUG 195 TEUCHOS_TEST_FOR_EXCEPTION
196 (static_cast<size_t> (d_view.dimension_0 ()) != lclNumRows ||
197 static_cast<size_t> (d_view.dimension_1 ()) != numCols, std::logic_error,
198 "allocDualView: d_view's dimensions actual dimensions do not match " 199 "requested dimensions. d_view is " << d_view.dimension_0 () <<
" x " <<
200 d_view.dimension_1 () <<
"; requested " << lclNumRows <<
" x " << numCols
201 <<
". Please report this bug to the Tpetra developers.");
202 #endif // HAVE_TPETRA_DEBUG 204 typename dual_view_type::t_host h_view = Kokkos::create_mirror_view (d_view);
206 dual_view_type dv (d_view, h_view);
211 dv.template modify<typename dev_view_type::memory_space> ();
220 template<
class T,
class ExecSpace>
221 struct MakeUnmanagedView {
234 typedef typename Kokkos::Impl::if_c<
235 Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
236 typename ExecSpace::memory_space,
237 Kokkos::HostSpace>::value,
238 typename ExecSpace::device_type,
239 typename Kokkos::DualView<T*, ExecSpace>::host_mirror_space>::type host_exec_space;
240 typedef Kokkos::LayoutLeft array_layout;
241 typedef Kokkos::View<T*, array_layout, host_exec_space,
242 Kokkos::MemoryUnmanaged> view_type;
244 static view_type getView (
const Teuchos::ArrayView<T>& x_in)
246 const size_t numEnt =
static_cast<size_t> (x_in.size ());
250 return view_type (x_in.getRawPtr (), numEnt);
258 template<
class DualViewType>
260 takeSubview (
const DualViewType& X,
262 #ifdef KOKKOS_USING_EXPERIMENTAL_VIEW
263 const Kokkos::Experimental::Impl::ALL_t&,
267 const std::pair<size_t, size_t>& colRng)
269 if (X.dimension_0 () == 0 && X.dimension_1 () != 0) {
270 return DualViewType (
"MV::DualView", 0, colRng.second - colRng.first);
273 return subview (X, Kokkos::ALL (), colRng);
280 template<
class DualViewType>
282 takeSubview (
const DualViewType& X,
283 const std::pair<size_t, size_t>& rowRng,
284 const std::pair<size_t, size_t>& colRng)
286 if (X.dimension_0 () == 0 && X.dimension_1 () != 0) {
287 return DualViewType (
"MV::DualView", 0, colRng.second - colRng.first);
290 return subview (X, rowRng, colRng);
298 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
300 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic>::
301 vectorIndexOutOfRange (
const size_t VectorIndex)
const {
302 return (VectorIndex < 1 && VectorIndex != 0) || VectorIndex >= getNumVectors();
305 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
306 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic>::
311 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
314 const size_t numVecs,
315 const bool zeroOut) :
320 const size_t lclNumRows = this->getLocalLength ();
321 view_ = allocDualView<Scalar, LocalOrdinal, GlobalOrdinal, Node> (lclNumRows, numVecs, zeroOut);
325 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
329 view_ (source.view_),
330 origView_ (source.origView_),
331 whichVectors_ (source.whichVectors_)
334 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
337 const Teuchos::DataAccess copyOrView) :
339 view_ (source.view_),
340 origView_ (source.origView_),
341 whichVectors_ (source.whichVectors_)
344 const char tfecfFuncName[] =
"MultiVector(const MultiVector&, " 345 "const Teuchos::DataAccess): ";
349 if (copyOrView == Teuchos::Copy) {
353 this->view_ = cpy.view_;
354 this->origView_ = cpy.origView_;
355 this->whichVectors_ = cpy.whichVectors_;
357 else if (copyOrView == Teuchos::View) {
360 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
361 true, std::invalid_argument,
"Second argument 'copyOrView' has an " 362 "invalid value " << copyOrView <<
". Valid values include " 363 "Teuchos::Copy = " << Teuchos::Copy <<
" and Teuchos::View = " 364 << Teuchos::View <<
".");
368 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
371 const dual_view_type& view) :
376 const char tfecfFuncName[] =
"MultiVector(map,view): ";
381 origView_.stride (stride);
382 const size_t LDA = (origView_.dimension_1 () > 1) ? stride[1] :
383 origView_.dimension_0 ();
384 const size_t lclNumRows = this->getLocalLength ();
385 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
386 LDA < lclNumRows, std::invalid_argument,
"The input Kokkos::DualView's " 387 "column stride LDA = " << LDA <<
" < getLocalLength() = " << lclNumRows
388 <<
". This may also mean that the input view's first dimension (number " 389 "of rows = " << view.dimension_0 () <<
") does not not match the number " 390 "of entries in the Map on the calling process.");
394 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
397 const typename dual_view_type::t_dev& d_view) :
400 using Teuchos::ArrayRCP;
402 const char tfecfFuncName[] =
"MultiVector(map,d_view): ";
409 d_view.stride (stride);
410 const size_t LDA = (d_view.dimension_1 () > 1) ? stride[1] :
411 d_view.dimension_0 ();
412 const size_t lclNumRows = this->getLocalLength ();
413 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
414 LDA < lclNumRows, std::invalid_argument,
"The input Kokkos::View's " 415 "column stride LDA = " << LDA <<
" < getLocalLength() = " << lclNumRows
416 <<
". This may also mean that the input view's first dimension (number " 417 "of rows = " << d_view.dimension_0 () <<
") does not not match the " 418 "number of entries in the Map on the calling process.");
423 view_ = dual_view_type (d_view, Kokkos::create_mirror (d_view));
427 this->
template modify<device_type> ();
431 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
434 const dual_view_type& view,
435 const dual_view_type& origView) :
440 const char tfecfFuncName[] =
"MultiVector(map,view,origView): ";
445 origView_.stride (stride);
446 const size_t LDA = (origView_.dimension_1 () > 1) ? stride[1] :
447 origView_.dimension_0 ();
448 const size_t lclNumRows = this->getLocalLength ();
449 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
450 LDA < lclNumRows, std::invalid_argument,
"The input Kokkos::DualView's " 451 "column stride LDA = " << LDA <<
" < getLocalLength() = " << lclNumRows
452 <<
". This may also mean that the input origView's first dimension (number " 453 "of rows = " << origView.dimension_0 () <<
") does not not match the number " 454 "of entries in the Map on the calling process.");
458 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
461 const dual_view_type& view,
462 const Teuchos::ArrayView<const size_t>& whichVectors) :
466 whichVectors_ (whichVectors.begin (), whichVectors.end ())
469 using Kokkos::subview;
470 const char tfecfFuncName[] =
"MultiVector(map,view,whichVectors): ";
472 const size_t lclNumRows = map.is_null () ? size_t (0) :
473 map->getNodeNumElements ();
480 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
481 view.dimension_1 () != 0 &&
static_cast<size_t> (view.dimension_0 ()) < lclNumRows,
482 std::invalid_argument,
"view.dimension_0() = " << view.dimension_0 ()
483 <<
" < map->getNodeNumElements() = " << lclNumRows <<
".");
484 if (whichVectors.size () != 0) {
485 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
486 view.dimension_1 () != 0 && view.dimension_1 () == 0,
487 std::invalid_argument,
"view.dimension_1() = 0, but whichVectors.size()" 488 " = " << whichVectors.size () <<
" > 0.");
489 size_t maxColInd = 0;
490 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
491 for (size_type k = 0; k < whichVectors.size (); ++k) {
492 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
493 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
494 std::invalid_argument,
"whichVectors[" << k <<
"] = " 495 "Teuchos::OrdinalTraits<size_t>::invalid().");
496 maxColInd = std::max (maxColInd, whichVectors[k]);
498 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
499 view.dimension_1 () != 0 &&
static_cast<size_t> (view.dimension_1 ()) <= maxColInd,
500 std::invalid_argument,
"view.dimension_1() = " << view.dimension_1 ()
501 <<
" <= max(whichVectors) = " << maxColInd <<
".");
507 origView_.stride (stride);
508 const size_t LDA = (origView_.dimension_1 () > 1) ? stride[1] :
509 origView_.dimension_0 ();
510 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
511 LDA < lclNumRows, std::invalid_argument,
512 "LDA = " << LDA <<
" < this->getLocalLength() = " << lclNumRows <<
".");
514 if (whichVectors.size () == 1) {
523 const std::pair<size_t, size_t> colRng (whichVectors[0],
524 whichVectors[0] + 1);
525 view_ = takeSubview (view_, ALL (), colRng);
526 origView_ = takeSubview (origView_, ALL (), colRng);
528 whichVectors_.clear ();
532 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
535 const dual_view_type& view,
536 const dual_view_type& origView,
537 const Teuchos::ArrayView<const size_t>& whichVectors) :
540 origView_ (origView),
541 whichVectors_ (whichVectors.begin (), whichVectors.end ())
544 using Kokkos::subview;
545 const char tfecfFuncName[] =
"MultiVector(map,view,origView,whichVectors): ";
547 const size_t lclNumRows = this->getLocalLength ();
554 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
555 view.dimension_1 () != 0 &&
static_cast<size_t> (view.dimension_0 ()) < lclNumRows,
556 std::invalid_argument,
"view.dimension_0() = " << view.dimension_0 ()
557 <<
" < map->getNodeNumElements() = " << lclNumRows <<
".");
558 if (whichVectors.size () != 0) {
559 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
560 view.dimension_1 () != 0 && view.dimension_1 () == 0,
561 std::invalid_argument,
"view.dimension_1() = 0, but whichVectors.size()" 562 " = " << whichVectors.size () <<
" > 0.");
563 size_t maxColInd = 0;
564 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
565 for (size_type k = 0; k < whichVectors.size (); ++k) {
566 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
567 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
568 std::invalid_argument,
"whichVectors[" << k <<
"] = " 569 "Teuchos::OrdinalTraits<size_t>::invalid().");
570 maxColInd = std::max (maxColInd, whichVectors[k]);
572 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
573 view.dimension_1 () != 0 &&
static_cast<size_t> (view.dimension_1 ()) <= maxColInd,
574 std::invalid_argument,
"view.dimension_1() = " << view.dimension_1 ()
575 <<
" <= max(whichVectors) = " << maxColInd <<
".");
580 origView_.stride (stride);
581 const size_t LDA = (origView_.dimension_1 () > 1) ? stride[1] :
582 origView_.dimension_0 ();
583 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
584 LDA < lclNumRows, std::invalid_argument,
"Input DualView's column stride" 585 " = " << LDA <<
" < this->getLocalLength() = " << lclNumRows <<
".");
587 if (whichVectors.size () == 1) {
596 const std::pair<size_t, size_t> colRng (whichVectors[0],
597 whichVectors[0] + 1);
598 view_ = takeSubview (view_, ALL (), colRng);
599 origView_ = takeSubview (origView_, ALL (), colRng);
601 whichVectors_.clear ();
605 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
608 const Teuchos::ArrayView<const Scalar>& data,
610 const size_t numVecs) :
613 typedef LocalOrdinal LO;
614 typedef GlobalOrdinal GO;
615 const char tfecfFuncName[] =
"MultiVector(map,data,LDA,numVecs): ";
621 const size_t lclNumRows =
622 map.is_null () ? size_t (0) : map->getNodeNumElements ();
623 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
624 (LDA < lclNumRows, std::invalid_argument,
"LDA = " << LDA <<
" < " 625 "map->getNodeNumElements() = " << lclNumRows <<
".");
627 const size_t minNumEntries = LDA * (numVecs - 1) + lclNumRows;
628 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
629 (static_cast<size_t> (data.size ()) < minNumEntries,
630 std::invalid_argument,
"Input Teuchos::ArrayView does not have enough " 631 "entries, given the input Map and number of vectors in the MultiVector." 632 " data.size() = " << data.size () <<
" < (LDA*(numVecs-1)) + " 633 "map->getNodeNumElements () = " << minNumEntries <<
".");
636 this->view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
637 this->
template modify<device_type> ();
638 auto X_out = this->
template getLocalView<device_type> ();
649 Kokkos::MemoryUnmanaged> X_in_orig (X_in_raw, LDA, numVecs);
650 const Kokkos::pair<size_t, size_t> rowRng (0, lclNumRows);
651 auto X_in = Kokkos::subview (X_in_orig, rowRng, Kokkos::ALL ());
656 size_t outStrides[8];
657 X_out.stride (outStrides);
658 const size_t outStride = (X_out.dimension_1 () > 1) ? outStrides[1] :
659 X_out.dimension_0 ();
660 if (LDA == outStride) {
666 typedef decltype (Kokkos::subview (X_out, Kokkos::ALL (), 0))
668 typedef decltype (Kokkos::subview (X_in, Kokkos::ALL (), 0))
670 for (
size_t j = 0; j < numVecs; ++j) {
671 out_col_view_type X_out_j = Kokkos::subview (X_out, Kokkos::ALL (), j);
672 in_col_view_type X_in_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
678 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
681 const Teuchos::ArrayView<
const Teuchos::ArrayView<const Scalar> >& ArrayOfPtrs,
682 const size_t numVecs) :
686 typedef LocalOrdinal LO;
687 typedef GlobalOrdinal GO;
688 const char tfecfFuncName[] =
"MultiVector(map,ArrayOfPtrs,numVecs): ";
691 const size_t lclNumRows =
692 map.is_null () ? size_t (0) : map->getNodeNumElements ();
693 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
694 (numVecs < 1 || numVecs != static_cast<size_t> (ArrayOfPtrs.size ()),
695 std::runtime_error,
"Either numVecs (= " << numVecs <<
") < 1, or " 696 "ArrayOfPtrs.size() (= " << ArrayOfPtrs.size () <<
") != numVecs.");
697 for (
size_t j = 0; j < numVecs; ++j) {
698 Teuchos::ArrayView<const Scalar> X_j_av = ArrayOfPtrs[j];
699 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
700 static_cast<size_t> (X_j_av.size ()) < lclNumRows,
701 std::invalid_argument,
"ArrayOfPtrs[" << j <<
"].size() = " 702 << X_j_av.size () <<
" < map->getNodeNumElements() = " << lclNumRows
706 view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
707 this->
template modify<device_type> ();
708 auto X_out = this->getLocalView<device_type> ();
712 typedef typename decltype (X_out)::array_layout array_layout;
713 typedef typename Kokkos::View<
const IST*,
716 Kokkos::MemoryUnmanaged> input_col_view_type;
718 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
719 for (
size_t j = 0; j < numVecs; ++j) {
720 Teuchos::ArrayView<const IST> X_j_av =
721 Teuchos::av_reinterpret_cast<
const IST> (ArrayOfPtrs[j]);
722 input_col_view_type X_j_in (X_j_av.getRawPtr (), lclNumRows);
723 auto X_j_out = Kokkos::subview (X_out, rowRng, j);
729 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
733 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
736 return whichVectors_.empty ();
739 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
744 if (this->getMap ().is_null ()) {
745 return static_cast<size_t> (0);
747 return this->getMap ()->getNodeNumElements ();
751 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
756 if (this->getMap ().is_null ()) {
757 return static_cast<size_t> (0);
759 return this->getMap ()->getGlobalNumElements ();
763 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
768 if (isConstantStride ()) {
772 origView_.stride (stride);
773 const size_t LDA = (origView_.dimension_1 () > 1) ? stride[1] :
774 origView_.dimension_0 ();
778 return static_cast<size_t> (0);
782 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
791 const this_type* src =
dynamic_cast<const this_type*
> (&sourceObj);
801 return src->getNumVectors () == this->getNumVectors ();
805 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
809 return this->getNumVectors ();
812 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
816 const size_t numSameIDs,
817 const Kokkos::DualView<const LocalOrdinal*, device_type>& permuteToLIDs,
818 const Kokkos::DualView<const LocalOrdinal*, device_type>& permuteFromLIDs)
821 using ::Tpetra::Details::ProfilingRegion;
822 using KokkosRefactor::Details::permute_array_multi_column;
823 using KokkosRefactor::Details::permute_array_multi_column_variable_stride;
824 using Kokkos::Compat::create_const_view;
825 typedef typename dual_view_type::t_dev::memory_space DMS;
826 typedef typename dual_view_type::t_host::memory_space HMS;
828 const char tfecfFuncName[] =
"copyAndPermuteNew: ";
829 ProfilingRegion regionCAP (
"Tpetra::MultiVector::copyAndPermute");
831 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
832 (permuteToLIDs.dimension_0 () != permuteFromLIDs.dimension_0 (),
833 std::runtime_error,
"permuteToLIDs.dimension_0() = " 834 << permuteToLIDs.dimension_0 () <<
" != permuteFromLIDs.dimension_0() = " 835 << permuteFromLIDs.dimension_0 () <<
".");
838 const MV& sourceMV =
dynamic_cast<const MV&
> (sourceObj);
839 const size_t numCols = this->getNumVectors ();
847 const bool copyOnHost = sourceMV.template need_sync<device_type> ();
850 this->
template sync<Kokkos::HostSpace> ();
851 this->
template modify<Kokkos::HostSpace> ();
854 this->
template sync<device_type> ();
855 this->
template modify<device_type> ();
877 if (numSameIDs > 0) {
878 const std::pair<size_t, size_t> rows (0, numSameIDs);
880 auto tgt_h = this->
template getLocalView<HMS> ();
881 auto src_h = create_const_view (sourceMV.template getLocalView<HMS> ());
883 for (
size_t j = 0; j < numCols; ++j) {
884 const size_t tgtCol = isConstantStride () ? j : whichVectors_[j];
885 const size_t srcCol =
886 sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
888 auto tgt_j = Kokkos::subview (tgt_h, rows, tgtCol);
889 auto src_j = Kokkos::subview (src_h, rows, srcCol);
894 auto tgt_d = this->
template getLocalView<DMS> ();
895 auto src_d = create_const_view (sourceMV.template getLocalView<DMS> ());
897 for (
size_t j = 0; j < numCols; ++j) {
898 const size_t tgtCol = isConstantStride () ? j : whichVectors_[j];
899 const size_t srcCol =
900 sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
902 auto tgt_j = Kokkos::subview (tgt_d, rows, tgtCol);
903 auto src_j = Kokkos::subview (src_d, rows, srcCol);
919 if (permuteFromLIDs.dimension_0 () == 0 ||
920 permuteToLIDs.dimension_0 () == 0) {
927 Kokkos::DualView<const LocalOrdinal*, device_type> permuteToLIDs_nc =
929 Kokkos::DualView<const LocalOrdinal*, device_type> permuteFromLIDs_nc =
934 const bool nonConstStride =
935 ! this->isConstantStride () || ! sourceMV.isConstantStride ();
941 Kokkos::DualView<const size_t*, device_type> srcWhichVecs;
942 Kokkos::DualView<const size_t*, device_type> tgtWhichVecs;
943 if (nonConstStride) {
944 if (this->whichVectors_.size () == 0) {
945 Kokkos::DualView<size_t*, device_type> tmpTgt (
"tgtWhichVecs", numCols);
946 tmpTgt.template modify<HMS> ();
947 for (
size_t j = 0; j < numCols; ++j) {
948 tmpTgt.h_view(j) = j;
951 tmpTgt.template sync<DMS> ();
953 tgtWhichVecs = tmpTgt;
956 Teuchos::ArrayView<const size_t> tgtWhichVecsT = this->whichVectors_ ();
958 getDualViewCopyFromArrayView<size_t, device_type> (tgtWhichVecsT,
962 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
963 (static_cast<size_t> (tgtWhichVecs.dimension_0 ()) !=
964 this->getNumVectors (),
965 std::logic_error,
"tgtWhichVecs.dimension_0() = " <<
966 tgtWhichVecs.dimension_0 () <<
" != this->getNumVectors() = " <<
967 this->getNumVectors () <<
".");
969 if (sourceMV.whichVectors_.size () == 0) {
970 Kokkos::DualView<size_t*, device_type> tmpSrc (
"srcWhichVecs", numCols);
971 tmpSrc.template modify<HMS> ();
972 for (
size_t j = 0; j < numCols; ++j) {
973 tmpSrc.h_view(j) = j;
976 tmpSrc.template sync<DMS> ();
978 srcWhichVecs = tmpSrc;
981 Teuchos::ArrayView<const size_t> srcWhichVecsT =
982 sourceMV.whichVectors_ ();
984 getDualViewCopyFromArrayView<size_t, device_type> (srcWhichVecsT,
988 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
989 (static_cast<size_t> (srcWhichVecs.dimension_0 ()) !=
990 sourceMV.getNumVectors (), std::logic_error,
991 "srcWhichVecs.dimension_0() = " << srcWhichVecs.dimension_0 ()
992 <<
" != sourceMV.getNumVectors() = " << sourceMV.getNumVectors ()
997 auto tgt_h = this->
template getLocalView<HMS> ();
998 auto src_h = create_const_view (sourceMV.template getLocalView<HMS> ());
999 permuteToLIDs_nc.template sync<HMS> ();
1000 auto permuteToLIDs_h =
1001 create_const_view (permuteToLIDs_nc.template view<HMS> ());
1002 permuteFromLIDs_nc.template sync<HMS> ();
1003 auto permuteFromLIDs_h =
1004 create_const_view (permuteFromLIDs_nc.template view<HMS> ());
1006 if (nonConstStride) {
1009 auto tgtWhichVecs_h =
1010 create_const_view (tgtWhichVecs.template view<HMS> ());
1011 auto srcWhichVecs_h =
1012 create_const_view (srcWhichVecs.template view<HMS> ());
1013 permute_array_multi_column_variable_stride (tgt_h, src_h,
1017 srcWhichVecs_h, numCols);
1020 permute_array_multi_column (tgt_h, src_h, permuteToLIDs_h,
1021 permuteFromLIDs_h, numCols);
1025 auto tgt_d = this->
template getLocalView<DMS> ();
1026 auto src_d = create_const_view (sourceMV.template getLocalView<DMS> ());
1027 permuteToLIDs_nc.template sync<DMS> ();
1028 auto permuteToLIDs_d =
1029 create_const_view (permuteToLIDs_nc.template view<DMS> ());
1030 permuteFromLIDs_nc.template sync<DMS> ();
1031 auto permuteFromLIDs_d =
1032 create_const_view (permuteFromLIDs_nc.template view<DMS> ());
1034 if (nonConstStride) {
1037 auto tgtWhichVecs_d =
1038 create_const_view (tgtWhichVecs.template view<DMS> ());
1039 auto srcWhichVecs_d =
1040 create_const_view (srcWhichVecs.template view<DMS> ());
1041 permute_array_multi_column_variable_stride (tgt_d, src_d,
1045 srcWhichVecs_d, numCols);
1048 permute_array_multi_column (tgt_d, src_d, permuteToLIDs_d,
1049 permuteFromLIDs_d, numCols);
1054 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
1058 const Kokkos::DualView<const local_ordinal_type*, device_type> &exportLIDs,
1059 Kokkos::DualView<impl_scalar_type*, device_type>& exports,
1060 const Kokkos::DualView<size_t*, device_type>& ,
1061 size_t& constantNumPackets,
1064 using ::Tpetra::Details::ProfilingRegion;
1065 using Kokkos::Compat::create_const_view;
1066 using Kokkos::Compat::getKokkosViewDeepCopy;
1069 typedef typename Kokkos::DualView<IST*, device_type>::t_host::memory_space
1071 typedef typename Kokkos::DualView<IST*, device_type>::t_dev::memory_space
1073 typedef typename Kokkos::DualView<IST*, device_type>::t_host::execution_space
1074 host_execution_space;
1075 typedef typename Kokkos::DualView<IST*, device_type>::t_dev::execution_space
1076 dev_execution_space;
1077 ProfilingRegion regionPAP (
"Tpetra::MultiVector::packAndPrepare");
1083 #ifdef HAVE_TPETRA_DEBUG 1084 constexpr
bool debugCheckIndices =
true;
1086 constexpr
bool debugCheckIndices =
false;
1087 #endif // HAVE_TPETRA_DEBUG 1089 const bool printDebugOutput =
false;
1090 if (printDebugOutput) {
1091 std::cerr <<
"$$$ MV::packAndPrepareNew" << std::endl;
1094 const MV& sourceMV =
dynamic_cast<const MV&
> (sourceObj);
1109 const bool packOnHost =
1110 exportLIDs.modified_host () > exportLIDs.modified_device ();
1111 auto src_dev = sourceMV.template getLocalView<dev_memory_space> ();
1112 auto src_host = sourceMV.template getLocalView<host_memory_space> ();
1114 if (sourceMV.template need_sync<Kokkos::HostSpace> ()) {
1117 src_host = decltype (src_host) (
"MV::DualView::h_view",
1118 src_dev.dimension_0 (),
1119 src_dev.dimension_1 ());
1124 if (sourceMV.template need_sync<device_type> ()) {
1127 src_dev = decltype (src_dev) (
"MV::DualView::d_view",
1128 src_host.dimension_0 (),
1129 src_host.dimension_1 ());
1134 const size_t numCols = sourceMV.getNumVectors ();
1139 constantNumPackets = numCols;
1143 if (exportLIDs.dimension_0 () == 0) {
1144 if (printDebugOutput) {
1145 std::cerr <<
"$$$ MV::packAndPrepareNew DONE" << std::endl;
1165 if (printDebugOutput) {
1166 std::cerr <<
"$$$ MV::packAndPrepareNew realloc" << std::endl;
1169 const size_t numExportLIDs = exportLIDs.dimension_0 ();
1170 const size_t newExportsSize = numCols * numExportLIDs;
1171 if (static_cast<size_t> (exports.dimension_0 ()) != newExportsSize) {
1172 if (printDebugOutput) {
1173 std::ostringstream os;
1174 const int myRank = this->getMap ()->getComm ()->getRank ();
1175 os <<
"$$$ MV::packAndPrepareNew (Proc " << myRank <<
") realloc " 1176 "exports from " << exports.dimension_0 () <<
" to " << newExportsSize
1178 std::cerr << os.str ();
1181 execution_space::fence ();
1182 exports = Kokkos::DualView<impl_scalar_type*, device_type> (
"exports", newExportsSize);
1183 execution_space::fence ();
1190 exports.template modify<host_memory_space> ();
1193 exports.template modify<dev_memory_space> ();
1209 if (sourceMV.isConstantStride ()) {
1210 using KokkosRefactor::Details::pack_array_single_column;
1211 if (printDebugOutput) {
1212 std::cerr <<
"$$$ MV::packAndPrepareNew pack numCols=1 const stride" << std::endl;
1215 pack_array_single_column (exports.template view<host_memory_space> (),
1216 create_const_view (src_host),
1217 exportLIDs.template view<host_memory_space> (),
1222 pack_array_single_column (exports.template view<dev_memory_space> (),
1223 create_const_view (src_dev),
1224 exportLIDs.template view<dev_memory_space> (),
1230 using KokkosRefactor::Details::pack_array_single_column;
1231 if (printDebugOutput) {
1232 std::cerr <<
"$$$ MV::packAndPrepareNew pack numCols=1 nonconst stride" << std::endl;
1235 pack_array_single_column (exports.template view<host_memory_space> (),
1236 create_const_view (src_host),
1237 exportLIDs.template view<host_memory_space> (),
1238 sourceMV.whichVectors_[0],
1242 pack_array_single_column (exports.template view<dev_memory_space> (),
1243 create_const_view (src_dev),
1244 exportLIDs.template view<dev_memory_space> (),
1245 sourceMV.whichVectors_[0],
1251 if (sourceMV.isConstantStride ()) {
1252 using KokkosRefactor::Details::pack_array_multi_column;
1253 if (printDebugOutput) {
1254 std::cerr <<
"$$$ MV::packAndPrepareNew pack numCols>1 const stride" << std::endl;
1257 pack_array_multi_column (exports.template view<host_memory_space> (),
1258 create_const_view (src_host),
1259 exportLIDs.template view<host_memory_space> (),
1264 pack_array_multi_column (exports.template view<dev_memory_space> (),
1265 create_const_view (src_dev),
1266 exportLIDs.template view<dev_memory_space> (),
1272 using KokkosRefactor::Details::pack_array_multi_column_variable_stride;
1273 if (printDebugOutput) {
1274 std::cerr <<
"$$$ MV::packAndPrepareNew pack numCols>1 nonconst stride" << std::endl;
1277 pack_array_multi_column_variable_stride (exports.template view<host_memory_space> (),
1278 create_const_view (src_host),
1279 exportLIDs.template view<host_memory_space> (),
1280 getKokkosViewDeepCopy<host_execution_space> (sourceMV.whichVectors_ ()),
1285 pack_array_multi_column_variable_stride (exports.template view<dev_memory_space> (),
1286 create_const_view (src_dev),
1287 exportLIDs.template view<dev_memory_space> (),
1288 getKokkosViewDeepCopy<dev_execution_space> (sourceMV.whichVectors_ ()),
1295 if (printDebugOutput) {
1296 std::cerr <<
"$$$ MV::packAndPrepareNew DONE" << std::endl;
1301 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
1305 const Kokkos::DualView<const impl_scalar_type*, device_type>& imports,
1306 const Kokkos::DualView<const size_t*, device_type>& ,
1307 const size_t constantNumPackets,
1311 using ::Tpetra::Details::ProfilingRegion;
1312 using KokkosRefactor::Details::unpack_array_multi_column;
1313 using KokkosRefactor::Details::unpack_array_multi_column_variable_stride;
1314 using Kokkos::Compat::getKokkosViewDeepCopy;
1316 typedef typename Kokkos::DualView<IST*, device_type>::t_host::memory_space
1318 typedef typename Kokkos::DualView<IST*, device_type>::t_dev::memory_space
1320 const char tfecfFuncName[] =
"unpackAndCombineNew: ";
1321 const char suffix[] =
" Please report this bug to the Tpetra developers.";
1322 ProfilingRegion regionUAC (
"Tpetra::MultiVector::unpackAndCombine");
1328 #ifdef HAVE_TPETRA_DEBUG 1329 constexpr
bool debugCheckIndices =
true;
1331 constexpr
bool debugCheckIndices =
false;
1332 #endif // HAVE_TPETRA_DEBUG 1335 if (importLIDs.dimension_0 () == 0) {
1339 const size_t numVecs = getNumVectors ();
1340 #ifdef HAVE_TPETRA_DEBUG 1341 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1342 static_cast<size_t> (imports.dimension_0 ()) !=
1343 numVecs * importLIDs.dimension_0 (),
1345 "imports.dimension_0() = " << imports.dimension_0 ()
1346 <<
" != getNumVectors() * importLIDs.dimension_0() = " << numVecs
1347 <<
" * " << importLIDs.dimension_0 () <<
" = " 1348 << numVecs * importLIDs.dimension_0 () <<
".");
1350 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1351 (constantNumPackets == static_cast<size_t> (0), std::runtime_error,
1352 "constantNumPackets input argument must be nonzero.");
1354 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1355 (static_cast<size_t> (numVecs) !=
1356 static_cast<size_t> (constantNumPackets),
1357 std::runtime_error,
"constantNumPackets must equal numVecs.");
1358 #endif // HAVE_TPETRA_DEBUG 1365 const bool unpackOnHost =
1366 imports.modified_host () > imports.modified_device ();
1367 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1368 (unpackOnHost && importLIDs.modified_host () < importLIDs.modified_device (),
1369 std::logic_error,
"The 'imports' buffer was last modified on host, " 1370 "but importLIDs was last modified on device." << suffix);
1371 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1372 (! unpackOnHost && importLIDs.modified_host () > importLIDs.modified_device (),
1373 std::logic_error,
"The 'imports' buffer was last modified on device, " 1374 "but importLIDs was last modified on host." << suffix);
1381 this->
template sync<host_memory_space> ();
1382 this->
template modify<host_memory_space> ();
1385 this->
template sync<dev_memory_space> ();
1386 this->
template modify<dev_memory_space> ();
1388 auto X_d = this->
template getLocalView<dev_memory_space> ();
1389 auto X_h = this->
template getLocalView<host_memory_space> ();
1390 auto imports_d = imports.template view<dev_memory_space> ();
1391 auto imports_h = imports.template view<host_memory_space> ();
1392 auto importLIDs_d = importLIDs.template view<dev_memory_space> ();
1393 auto importLIDs_h = importLIDs.template view<host_memory_space> ();
1395 Kokkos::DualView<size_t*, device_type> whichVecs;
1396 if (! isConstantStride ()) {
1397 Kokkos::View<
const size_t*, host_memory_space,
1398 Kokkos::MemoryUnmanaged> whichVecsIn (whichVectors_.getRawPtr (),
1400 whichVecs = Kokkos::DualView<size_t*, device_type> (
"whichVecs", numVecs);
1402 whichVecs.template modify<host_memory_space> ();
1406 whichVecs.template modify<dev_memory_space> ();
1410 auto whichVecs_d = whichVecs.template view<dev_memory_space> ();
1411 auto whichVecs_h = whichVecs.template view<host_memory_space> ();
1421 if (numVecs > 0 && importLIDs.dimension_0 () > 0) {
1427 KokkosRefactor::Details::InsertOp<execution_space> op;
1429 if (isConstantStride ()) {
1431 unpack_array_multi_column (X_h, imports_h, importLIDs_h, op,
1432 numVecs, debugCheckIndices);
1436 unpack_array_multi_column (X_d, imports_d, importLIDs_d, op,
1437 numVecs, debugCheckIndices);
1442 unpack_array_multi_column_variable_stride (X_h, imports_h,
1449 unpack_array_multi_column_variable_stride (X_d, imports_d,
1457 else if (CM ==
ADD) {
1458 KokkosRefactor::Details::AddOp<execution_space> op;
1460 if (isConstantStride ()) {
1462 unpack_array_multi_column (X_h, imports_h, importLIDs_h, op,
1463 numVecs, debugCheckIndices);
1466 unpack_array_multi_column (X_d, imports_d, importLIDs_d, op,
1467 numVecs, debugCheckIndices);
1472 unpack_array_multi_column_variable_stride (X_h, imports_h,
1479 unpack_array_multi_column_variable_stride (X_d, imports_d,
1488 KokkosRefactor::Details::AbsMaxOp<execution_space> op;
1490 if (isConstantStride ()) {
1492 unpack_array_multi_column (X_h, imports_h, importLIDs_h, op,
1493 numVecs, debugCheckIndices);
1496 unpack_array_multi_column (X_d, imports_d, importLIDs_d, op,
1497 numVecs, debugCheckIndices);
1502 unpack_array_multi_column_variable_stride (X_h, imports_h,
1509 unpack_array_multi_column_variable_stride (X_d, imports_d,
1518 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1520 std::invalid_argument,
"Invalid CombineMode: " << CM <<
". Valid " 1521 "CombineMode values are ADD, REPLACE, INSERT, and ABSMAX.");
1526 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
1531 if (isConstantStride ()) {
1532 return static_cast<size_t> (view_.dimension_1 ());
1534 return static_cast<size_t> (whichVectors_.size ());
1540 template<
class RV,
class XMV>
1542 lclDotImpl (
const RV& dotsOut,
1545 const size_t lclNumRows,
1546 const size_t numVecs,
1547 const Teuchos::ArrayView<const size_t>& whichVecsX,
1548 const Teuchos::ArrayView<const size_t>& whichVecsY,
1549 const bool constantStrideX,
1550 const bool constantStrideY)
1553 using Kokkos::subview;
1554 typedef typename RV::non_const_value_type
dot_type;
1555 #ifdef HAVE_TPETRA_DEBUG 1556 const char prefix[] =
"Tpetra::MultiVector::lclDotImpl: ";
1557 #endif // HAVE_TPETRA_DEBUG 1559 static_assert (Kokkos::Impl::is_view<RV>::value,
1560 "Tpetra::MultiVector::lclDotImpl: " 1561 "The first argument dotsOut is not a Kokkos::View.");
1562 static_assert (RV::rank == 1,
"Tpetra::MultiVector::lclDotImpl: " 1563 "The first argument dotsOut must have rank 1.");
1564 static_assert (Kokkos::Impl::is_view<XMV>::value,
1565 "Tpetra::MultiVector::lclDotImpl: The type of the 2nd and " 1566 "3rd arguments (X_lcl and Y_lcl) is not a Kokkos::View.");
1567 static_assert (XMV::rank == 2,
"Tpetra::MultiVector::lclDotImpl: " 1568 "X_lcl and Y_lcl must have rank 2.");
1573 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
1574 const std::pair<size_t, size_t> colRng (0, numVecs);
1575 RV theDots = subview (dotsOut, colRng);
1576 XMV X = subview (X_lcl, rowRng, Kokkos::ALL());
1577 XMV Y = subview (Y_lcl, rowRng, Kokkos::ALL());
1579 #ifdef HAVE_TPETRA_DEBUG 1580 if (lclNumRows != 0) {
1581 TEUCHOS_TEST_FOR_EXCEPTION
1582 (X.dimension_0 () != lclNumRows, std::logic_error, prefix <<
1583 "X.dimension_0() = " << X.dimension_0 () <<
" != lclNumRows " 1584 "= " << lclNumRows <<
". " 1585 "Please report this bug to the Tpetra developers.");
1586 TEUCHOS_TEST_FOR_EXCEPTION
1587 (Y.dimension_0 () != lclNumRows, std::logic_error, prefix <<
1588 "Y.dimension_0() = " << Y.dimension_0 () <<
" != lclNumRows " 1589 "= " << lclNumRows <<
". " 1590 "Please report this bug to the Tpetra developers.");
1594 TEUCHOS_TEST_FOR_EXCEPTION
1596 (X.dimension_0 () != lclNumRows || X.dimension_1 () != numVecs),
1597 std::logic_error, prefix <<
"X is " << X.dimension_0 () <<
" x " <<
1598 X.dimension_1 () <<
" (constant stride), which differs from the " 1599 "local dimensions " << lclNumRows <<
" x " << numVecs <<
". " 1600 "Please report this bug to the Tpetra developers.");
1601 TEUCHOS_TEST_FOR_EXCEPTION
1602 (! constantStrideX &&
1603 (X.dimension_0 () != lclNumRows || X.dimension_1 () < numVecs),
1604 std::logic_error, prefix <<
"X is " << X.dimension_0 () <<
" x " <<
1605 X.dimension_1 () <<
" (NOT constant stride), but the local " 1606 "dimensions are " << lclNumRows <<
" x " << numVecs <<
". " 1607 "Please report this bug to the Tpetra developers.");
1608 TEUCHOS_TEST_FOR_EXCEPTION
1610 (Y.dimension_0 () != lclNumRows || Y.dimension_1 () != numVecs),
1611 std::logic_error, prefix <<
"Y is " << Y.dimension_0 () <<
" x " <<
1612 Y.dimension_1 () <<
" (constant stride), which differs from the " 1613 "local dimensions " << lclNumRows <<
" x " << numVecs <<
". " 1614 "Please report this bug to the Tpetra developers.");
1615 TEUCHOS_TEST_FOR_EXCEPTION
1616 (! constantStrideY &&
1617 (Y.dimension_0 () != lclNumRows || Y.dimension_1 () < numVecs),
1618 std::logic_error, prefix <<
"Y is " << Y.dimension_0 () <<
" x " <<
1619 Y.dimension_1 () <<
" (NOT constant stride), but the local " 1620 "dimensions are " << lclNumRows <<
" x " << numVecs <<
". " 1621 "Please report this bug to the Tpetra developers.");
1623 #endif // HAVE_TPETRA_DEBUG 1625 if (lclNumRows == 0) {
1626 const dot_type zero = Kokkos::Details::ArithTraits<dot_type>::zero ();
1630 if (constantStrideX && constantStrideY) {
1631 if(X.dimension_1() == 1) {
1632 typename RV::non_const_value_type result =
1633 KokkosBlas::dot (Kokkos::subview(X,Kokkos::ALL(),0),
1634 Kokkos::subview(Y,Kokkos::ALL(),0));
1638 KokkosBlas::dot (theDots, X, Y);
1647 for (
size_t k = 0; k < numVecs; ++k) {
1648 const size_t X_col = constantStrideX ? k : whichVecsX[k];
1649 const size_t Y_col = constantStrideY ? k : whichVecsY[k];
1650 KokkosBlas::dot (subview (theDots, k), subview (X, ALL (), X_col),
1651 subview (Y, ALL (), Y_col));
1659 gblDotImpl (
const RV& dotsOut,
1660 const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
1661 const bool distributed)
1663 using Teuchos::REDUCE_MAX;
1664 using Teuchos::REDUCE_SUM;
1665 using Teuchos::reduceAll;
1666 typedef typename RV::non_const_value_type
dot_type;
1668 const size_t numVecs = dotsOut.dimension_0 ();
1683 if (distributed && ! comm.is_null ()) {
1686 const int nv =
static_cast<int> (numVecs);
1687 const bool commIsInterComm = Details::isInterComm (*comm);
1689 if (commIsInterComm) {
1693 typename RV::non_const_type lclDots (Kokkos::ViewAllocateWithoutInitializing (
"tmp"), numVecs);
1695 const dot_type*
const lclSum = lclDots.ptr_on_device ();
1696 dot_type*
const gblSum = dotsOut.ptr_on_device ();
1697 reduceAll<int, dot_type> (*comm, REDUCE_SUM, nv, lclSum, gblSum);
1700 dot_type*
const inout = dotsOut.ptr_on_device ();
1701 reduceAll<int, dot_type> (*comm, REDUCE_SUM, nv, inout, inout);
1707 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
1711 const Kokkos::View<dot_type*, device_type>& dots)
const 1713 using Kokkos::create_mirror_view;
1714 using Kokkos::subview;
1715 using Teuchos::Comm;
1716 using Teuchos::null;
1719 typedef Kokkos::View<dot_type*, device_type> RV;
1721 const char tfecfFuncName[] =
"Tpetra::MultiVector::dot: ";
1725 const size_t numVecs = this->getNumVectors ();
1729 const size_t lclNumRows = this->getLocalLength ();
1730 const size_t numDots =
static_cast<size_t> (dots.dimension_0 ());
1732 #ifdef HAVE_TPETRA_DEBUG 1734 const bool compat = this->getMap ()->isCompatible (* (A.
getMap ()));
1735 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1736 ! compat, std::invalid_argument,
"Tpetra::MultiVector::dot: *this is " 1737 "not compatible with the input MultiVector A. We only test for this " 1738 "in a debug build.");
1740 #endif // HAVE_TPETRA_DEBUG 1749 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1751 "MultiVectors do not have the same local length. " 1752 "this->getLocalLength() = " << lclNumRows <<
" != " 1754 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1756 "MultiVectors must have the same number of columns (vectors). " 1757 "this->getNumVectors() = " << numVecs <<
" != " 1759 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1760 numDots != numVecs, std::runtime_error,
1761 "The output array 'dots' must have the same number of entries as the " 1762 "number of columns (vectors) in *this and A. dots.dimension_0() = " <<
1763 numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
1765 const std::pair<size_t, size_t> colRng (0, numVecs);
1766 RV dotsOut = subview (dots, colRng);
1767 RCP<const Comm<int> > comm = this->getMap ().is_null () ? null :
1768 this->getMap ()->getComm ();
1773 const bool useHostVersion = A.template need_sync<device_type> ();
1774 if (useHostVersion) {
1777 typedef typename dual_view_type::t_host XMV;
1778 typedef typename XMV::memory_space cur_memory_space;
1782 const_cast<MV*
> (
this)->
template sync<cur_memory_space> ();
1783 auto thisView = this->
template getLocalView<cur_memory_space> ();
1784 auto A_view = A.template getLocalView<cur_memory_space> ();
1786 lclDotImpl<RV, XMV> (dotsOut, thisView, A_view, lclNumRows, numVecs,
1789 auto dotsOutHost = Kokkos::create_mirror_view (dotsOut);
1791 gblDotImpl (dotsOutHost, comm, this->isDistributed ());
1796 typedef typename dual_view_type::t_dev XMV;
1797 typedef typename XMV::memory_space cur_memory_space;
1803 const_cast<MV*
> (
this)->
template sync<cur_memory_space> ();
1804 auto thisView = this->
template getLocalView<cur_memory_space> ();
1805 auto A_view = A.template getLocalView<cur_memory_space> ();
1807 lclDotImpl<RV, XMV> (dotsOut, thisView, A_view, lclNumRows, numVecs,
1810 gblDotImpl (dotsOut, comm, this->isDistributed ());
1815 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
1820 using Teuchos::Comm;
1823 typedef typename MV::dot_type
dot_type;
1824 typedef typename MV::dual_view_type::t_dev::memory_space dev_memory_space;
1825 typedef typename MV::dual_view_type::t_host::memory_space host_memory_space;
1828 RCP<const typename MV::map_type> map = x.
getMap ();
1829 RCP<const Comm<int> > comm = map.is_null () ? Teuchos::null : map->getComm ();
1830 if (comm.is_null ()) {
1831 return Kokkos::ArithTraits<dot_type>::zero ();
1834 typedef LocalOrdinal LO;
1838 const LO lclNumRows =
static_cast<LO
> (std::min (x.
getLocalLength (),
1840 const Kokkos::pair<LO, LO> rowRng (0, lclNumRows);
1841 dot_type lclDot = Kokkos::ArithTraits<dot_type>::zero ();
1842 dot_type gblDot = Kokkos::ArithTraits<dot_type>::zero ();
1854 const bool runOnHost = y_vec->template need_sync<dev_memory_space> ();
1859 typedef host_memory_space cur_memory_space;
1861 x_vec->template sync<cur_memory_space> ();
1862 auto x_2d = x_vec->template getLocalView<cur_memory_space> ();
1863 auto x_1d = Kokkos::subview (x_2d, rowRng, 0);
1864 auto y_2d = y_vec->template getLocalView<cur_memory_space> ();
1865 auto y_1d = Kokkos::subview (y_2d, rowRng, 0);
1866 lclDot = KokkosBlas::dot (x_1d, y_1d);
1869 typedef dev_memory_space cur_memory_space;
1871 x_vec->template sync<cur_memory_space> ();
1872 auto x_2d = x_vec->template getLocalView<cur_memory_space> ();
1873 auto x_1d = Kokkos::subview (x_2d, rowRng, 0);
1874 auto y_2d = y_vec->template getLocalView<cur_memory_space> ();
1875 auto y_1d = Kokkos::subview (y_2d, rowRng, 0);
1876 lclDot = KokkosBlas::dot (x_1d, y_1d);
1880 using Teuchos::outArg;
1881 using Teuchos::REDUCE_SUM;
1882 using Teuchos::reduceAll;
1883 reduceAll<int, dot_type> (*comm, REDUCE_SUM, lclDot, outArg (gblDot));
1895 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
1899 const Teuchos::ArrayView<dot_type>& dots)
const 1902 const char tfecfFuncName[] =
"dot: ";
1905 const size_t numVecs = this->getNumVectors ();
1906 const size_t lclNumRows = this->getLocalLength ();
1907 const size_t numDots =
static_cast<size_t> (dots.size ());
1917 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1919 "MultiVectors do not have the same local length. " 1920 "this->getLocalLength() = " << lclNumRows <<
" != " 1922 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1924 "MultiVectors must have the same number of columns (vectors). " 1925 "this->getNumVectors() = " << numVecs <<
" != " 1927 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1928 (numDots != numVecs, std::runtime_error,
1929 "The output array 'dots' must have the same number of entries as the " 1930 "number of columns (vectors) in *this and A. dots.dimension_0() = " <<
1931 numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
1934 const dot_type gblDot = multiVectorSingleColumnDot (const_cast<MV&> (*
this), A);
1942 typedef Kokkos::View<dot_type*, device_type> dev_dots_view_type;
1943 typedef MakeUnmanagedView<dot_type, device_type> view_getter_type;
1944 typedef typename view_getter_type::view_type host_dots_view_type;
1946 host_dots_view_type dotsHostView (dots.getRawPtr (), numDots);
1947 dev_dots_view_type dotsDevView (
"MV::dot tmp", numDots);
1948 this->dot (A, dotsDevView);
1954 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
1957 norm2 (
const Teuchos::ArrayView<mag_type>& norms)
const 1959 typedef Kokkos::View<mag_type*, device_type> dev_norms_view_type;
1960 typedef MakeUnmanagedView<mag_type, device_type> view_getter_type;
1961 typedef typename view_getter_type::view_type host_norms_view_type;
1965 const size_t numNorms =
static_cast<size_t> (norms.size ());
1966 host_norms_view_type normsHostView (norms.getRawPtr (), numNorms);
1967 dev_norms_view_type normsDevView (
"MV::norm2 tmp", numNorms);
1968 this->norm2 (normsDevView);
1973 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
1976 norm2 (
const Kokkos::View<mag_type*, device_type>& norms)
const 1979 this->normImpl (norms, NORM_TWO);
1983 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
1984 void TPETRA_DEPRECATED
1987 const Teuchos::ArrayView<mag_type> &norms)
const 1990 using Kokkos::subview;
1991 using Teuchos::Comm;
1992 using Teuchos::null;
1994 using Teuchos::reduceAll;
1995 using Teuchos::REDUCE_SUM;
1996 typedef Kokkos::Details::ArithTraits<impl_scalar_type> ATS;
1997 typedef Kokkos::Details::ArithTraits<mag_type> ATM;
1998 typedef Kokkos::View<mag_type*, device_type> norms_view_type;
2000 const char tfecfFuncName[] =
"normWeighted: ";
2002 const size_t numVecs = this->getNumVectors ();
2003 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2004 static_cast<size_t> (norms.size ()) != numVecs, std::runtime_error,
2005 "norms.size() = " << norms.size () <<
" != this->getNumVectors() = " 2009 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2010 ! OneW && weights.
getNumVectors () != numVecs, std::runtime_error,
2011 "The input MultiVector of weights must contain either one column, " 2012 "or must have the same number of columns as *this. " 2014 <<
" and this->getNumVectors() = " << numVecs <<
".");
2016 #ifdef HAVE_TPETRA_DEBUG 2017 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2018 ! this->getMap ()->isCompatible (*weights.
getMap ()), std::runtime_error,
2019 "MultiVectors do not have compatible Maps:" << std::endl
2020 <<
"this->getMap(): " << std::endl << *this->getMap()
2021 <<
"weights.getMap(): " << std::endl << *weights.
getMap() << std::endl);
2023 const size_t lclNumRows = this->getLocalLength ();
2024 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2026 "MultiVectors do not have the same local length.");
2027 #endif // HAVE_TPETRA_DEBUG 2029 norms_view_type lclNrms (
"lclNrms", numVecs);
2032 const_cast<MV*
> (
this)->
template sync<device_type> ();
2033 const_cast<MV&
> (weights).
template sync<device_type> ();
2035 auto X_lcl = this->
template getLocalView<device_type> ();
2036 auto W_lcl = weights.template getLocalView<device_type> ();
2038 if (isConstantStride () && ! OneW) {
2039 KokkosBlas::nrm2w_squared (lclNrms, X_lcl, W_lcl);
2042 for (
size_t j = 0; j < numVecs; ++j) {
2043 const size_t X_col = this->isConstantStride () ? j :
2044 this->whichVectors_[j];
2045 const size_t W_col = OneW ?
static_cast<size_t> (0) :
2047 KokkosBlas::nrm2w_squared (subview (lclNrms, j),
2048 subview (X_lcl, ALL (), X_col),
2049 subview (W_lcl, ALL (), W_col));
2054 ATM::one () /
static_cast<mag_type> (this->getGlobalLength ());
2055 RCP<const Comm<int> > comm = this->getMap ().is_null () ?
2056 Teuchos::null : this->getMap ()->getComm ();
2058 if (! comm.is_null () && this->isDistributed ()) {
2060 reduceAll<int, mag_type> (*comm, REDUCE_SUM,
static_cast<int> (numVecs),
2061 lclNrms.ptr_on_device (), norms.getRawPtr ());
2062 for (
size_t k = 0; k < numVecs; ++k) {
2063 norms[k] = ATM::sqrt (norms[k] * OneOverN);
2067 typename norms_view_type::HostMirror lclNrms_h =
2068 Kokkos::create_mirror_view (lclNrms);
2070 for (
size_t k = 0; k < numVecs; ++k) {
2071 norms[k] = ATM::sqrt (ATS::magnitude (lclNrms_h(k)) * OneOverN);
2077 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
2080 norm1 (
const Teuchos::ArrayView<mag_type>& norms)
const 2082 typedef Kokkos::View<mag_type*, device_type> dev_norms_view_type;
2083 typedef MakeUnmanagedView<mag_type, device_type> view_getter_type;
2084 typedef typename view_getter_type::view_type host_norms_view_type;
2086 const size_t numNorms =
static_cast<size_t> (norms.size ());
2087 host_norms_view_type normsHostView (norms.getRawPtr (), numNorms);
2088 dev_norms_view_type normsDevView (
"MV::norm1 tmp", numNorms);
2089 this->norm1 (normsDevView);
2094 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
2097 norm1 (
const Kokkos::View<mag_type*, device_type>& norms)
const 2099 this->normImpl (norms, NORM_ONE);
2103 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
2106 normInf (
const Teuchos::ArrayView<mag_type>& norms)
const 2108 typedef Kokkos::View<mag_type*, device_type> dev_norms_view_type;
2109 typedef MakeUnmanagedView<mag_type, device_type> view_getter_type;
2110 typedef typename view_getter_type::view_type host_norms_view_type;
2112 const size_t numNorms =
static_cast<size_t> (norms.size ());
2113 host_norms_view_type normsHostView (norms.getRawPtr (), numNorms);
2114 dev_norms_view_type normsDevView (
"MV::normInf tmp", numNorms);
2115 this->normInf (normsDevView);
2120 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
2123 normInf (
const Kokkos::View<mag_type*, device_type>& norms)
const 2125 this->normImpl (norms, NORM_INF);
2137 template<
class RV,
class XMV>
2139 lclNormImpl (
const RV& normsOut,
2141 const size_t lclNumRows,
2142 const size_t numVecs,
2143 const Teuchos::ArrayView<const size_t>& whichVecs,
2144 const bool constantStride,
2148 using Kokkos::subview;
2149 typedef typename RV::non_const_value_type mag_type;
2151 static_assert (Kokkos::Impl::is_view<RV>::value,
2152 "Tpetra::MultiVector::lclNormImpl: " 2153 "The first argument RV is not a Kokkos::View.");
2154 static_assert (RV::rank == 1,
"Tpetra::MultiVector::lclNormImpl: " 2155 "The first argument normsOut must have rank 1.");
2156 static_assert (Kokkos::Impl::is_view<XMV>::value,
2157 "Tpetra::MultiVector::lclNormImpl: " 2158 "The second argument X_lcl is not a Kokkos::View.");
2159 static_assert (XMV::rank == 2,
"Tpetra::MultiVector::lclNormImpl: " 2160 "The second argument X_lcl must have rank 2.");
2165 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2166 const std::pair<size_t, size_t> colRng (0, numVecs);
2167 RV theNorms = subview (normsOut, colRng);
2168 XMV X = subview (X_lcl, rowRng, Kokkos::ALL());
2173 TEUCHOS_TEST_FOR_EXCEPTION(
2174 lclNumRows != 0 && constantStride && ( \
2175 ( X.dimension_0 () != lclNumRows ) ||
2176 ( X.dimension_1 () != numVecs ) ),
2177 std::logic_error,
"Constant Stride X's dimensions are " << X.dimension_0 () <<
" x " 2178 << X.dimension_1 () <<
", which differ from the local dimensions " 2179 << lclNumRows <<
" x " << numVecs <<
". Please report this bug to " 2180 "the Tpetra developers.");
2182 TEUCHOS_TEST_FOR_EXCEPTION(
2183 lclNumRows != 0 && !constantStride && ( \
2184 ( X.dimension_0 () != lclNumRows ) ||
2185 ( X.dimension_1 () < numVecs ) ),
2186 std::logic_error,
"Strided X's dimensions are " << X.dimension_0 () <<
" x " 2187 << X.dimension_1 () <<
", which are incompatible with the local dimensions " 2188 << lclNumRows <<
" x " << numVecs <<
". Please report this bug to " 2189 "the Tpetra developers.");
2191 if (lclNumRows == 0) {
2192 const mag_type zeroMag = Kokkos::Details::ArithTraits<mag_type>::zero ();
2196 if (constantStride) {
2197 if (whichNorm == IMPL_NORM_INF) {
2198 KokkosBlas::nrmInf (theNorms, X);
2200 else if (whichNorm == IMPL_NORM_ONE) {
2201 KokkosBlas::nrm1 (theNorms, X);
2203 else if (whichNorm == IMPL_NORM_TWO) {
2204 KokkosBlas::nrm2_squared (theNorms, X);
2207 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error,
"Should never get here!");
2216 for (
size_t k = 0; k < numVecs; ++k) {
2217 const size_t X_col = constantStride ? k : whichVecs[k];
2218 if (whichNorm == IMPL_NORM_INF) {
2219 KokkosBlas::nrmInf (subview (theNorms, k),
2220 subview (X, ALL (), X_col));
2222 else if (whichNorm == IMPL_NORM_ONE) {
2223 KokkosBlas::nrm1 (subview (theNorms, k),
2224 subview (X, ALL (), X_col));
2226 else if (whichNorm == IMPL_NORM_TWO) {
2227 KokkosBlas::nrm2_squared (subview (theNorms, k),
2228 subview (X, ALL (), X_col));
2231 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error,
"Should never get here!");
2240 gblNormImpl (
const RV& normsOut,
2241 const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
2242 const bool distributed,
2245 using Teuchos::REDUCE_MAX;
2246 using Teuchos::REDUCE_SUM;
2247 using Teuchos::reduceAll;
2248 typedef typename RV::non_const_value_type mag_type;
2250 const size_t numVecs = normsOut.dimension_0 ();
2265 if (distributed && ! comm.is_null ()) {
2271 RV lclNorms (
"MV::normImpl lcl", numVecs);
2273 const mag_type*
const lclSum = lclNorms.ptr_on_device ();
2274 mag_type*
const gblSum = normsOut.ptr_on_device ();
2275 const int nv =
static_cast<int> (numVecs);
2276 if (whichNorm == IMPL_NORM_INF) {
2277 reduceAll<int, mag_type> (*comm, REDUCE_MAX, nv, lclSum, gblSum);
2279 reduceAll<int, mag_type> (*comm, REDUCE_SUM, nv, lclSum, gblSum);
2283 if (whichNorm == IMPL_NORM_TWO) {
2289 const bool inHostMemory =
2290 Kokkos::Impl::is_same<
typename RV::memory_space,
2291 typename RV::host_mirror_space::memory_space>::value;
2293 for (
size_t j = 0; j < numVecs; ++j) {
2294 normsOut(j) = Kokkos::Details::ArithTraits<mag_type>::sqrt (normsOut(j));
2302 KokkosBlas::Impl::SquareRootFunctor<RV> f (normsOut);
2303 typedef typename RV::execution_space execution_space;
2304 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
2305 Kokkos::parallel_for (range_type (0, numVecs), f);
2312 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
2315 normImpl (
const Kokkos::View<mag_type*, device_type>& norms,
2318 using Kokkos::create_mirror_view;
2319 using Kokkos::subview;
2320 using Teuchos::Comm;
2321 using Teuchos::null;
2324 typedef Kokkos::View<mag_type*, device_type> RV;
2326 const size_t numVecs = this->getNumVectors ();
2330 const size_t lclNumRows = this->getLocalLength ();
2331 const size_t numNorms =
static_cast<size_t> (norms.dimension_0 ());
2332 TEUCHOS_TEST_FOR_EXCEPTION(
2333 numNorms < numVecs, std::runtime_error,
"Tpetra::MultiVector::normImpl: " 2334 "'norms' must have at least as many entries as the number of vectors in " 2335 "*this. norms.dimension_0() = " << numNorms <<
" < this->getNumVectors()" 2336 " = " << numVecs <<
".");
2338 const std::pair<size_t, size_t> colRng (0, numVecs);
2339 RV normsOut = subview (norms, colRng);
2341 EWhichNormImpl lclNormType;
2342 if (whichNorm == NORM_ONE) {
2343 lclNormType = IMPL_NORM_ONE;
2344 }
else if (whichNorm == NORM_TWO) {
2345 lclNormType = IMPL_NORM_TWO;
2347 lclNormType = IMPL_NORM_INF;
2350 RCP<const Comm<int> > comm = this->getMap ().is_null () ? null :
2351 this->getMap ()->getComm ();
2354 const bool useHostVersion = this->
template need_sync<device_type> ();
2355 if (useHostVersion) {
2358 typedef typename dual_view_type::t_host XMV;
2359 typedef typename XMV::memory_space cur_memory_space;
2361 auto thisView = this->
template getLocalView<cur_memory_space> ();
2362 lclNormImpl<RV, XMV> (normsOut, thisView, lclNumRows, numVecs,
2363 this->whichVectors_, this->isConstantStride (),
2365 auto normsOutHost = Kokkos::create_mirror_view (normsOut);
2367 gblNormImpl (normsOutHost, comm, this->isDistributed (), lclNormType);
2372 typedef typename dual_view_type::t_dev XMV;
2373 typedef typename XMV::memory_space cur_memory_space;
2375 auto thisView = this->
template getLocalView<cur_memory_space> ();
2376 lclNormImpl<RV, XMV> (normsOut, thisView, lclNumRows, numVecs,
2377 this->whichVectors_, this->isConstantStride (),
2379 gblNormImpl (normsOut, comm, this->isDistributed (), lclNormType);
2383 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
2386 meanValue (
const Teuchos::ArrayView<impl_scalar_type>& means)
const 2390 using Kokkos::subview;
2391 using Teuchos::Comm;
2393 using Teuchos::reduceAll;
2394 using Teuchos::REDUCE_SUM;
2395 typedef Kokkos::Details::ArithTraits<impl_scalar_type> ATS;
2397 const size_t lclNumRows = this->getLocalLength ();
2398 const size_t numVecs = this->getNumVectors ();
2399 const size_t numMeans =
static_cast<size_t> (means.size ());
2401 TEUCHOS_TEST_FOR_EXCEPTION(
2402 numMeans != numVecs, std::runtime_error,
2403 "Tpetra::MultiVector::meanValue: means.size() = " << numMeans
2404 <<
" != this->getNumVectors() = " << numVecs <<
".");
2406 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2407 const std::pair<size_t, size_t> colRng (0, numVecs);
2412 typedef Kokkos::View<impl_scalar_type*, device_type> local_view_type;
2414 typename local_view_type::HostMirror::array_layout,
2416 Kokkos::MemoryTraits<Kokkos::Unmanaged> > host_local_view_type;
2417 host_local_view_type meansOut (means.getRawPtr (), numMeans);
2419 RCP<const Comm<int> > comm = this->getMap ().is_null () ? Teuchos::null :
2420 this->getMap ()->getComm ();
2423 const bool useHostVersion = this->
template need_sync<device_type> ();
2424 if (useHostVersion) {
2426 auto X_lcl = subview (this->
template getLocalView<Kokkos::HostSpace> (),
2427 rowRng, Kokkos::ALL ());
2429 typename local_view_type::HostMirror lclSums (
"MV::meanValue tmp", numVecs);
2430 if (isConstantStride ()) {
2431 KokkosBlas::sum (lclSums, X_lcl);
2434 for (
size_t j = 0; j < numVecs; ++j) {
2435 const size_t col = whichVectors_[j];
2436 KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
2443 if (! comm.is_null () && this->isDistributed ()) {
2444 reduceAll (*comm, REDUCE_SUM, static_cast<int> (numVecs),
2445 lclSums.ptr_on_device (), meansOut.ptr_on_device ());
2453 auto X_lcl = subview (this->
template getLocalView<device_type> (),
2454 rowRng, Kokkos::ALL ());
2457 local_view_type lclSums (
"MV::meanValue tmp", numVecs);
2458 if (isConstantStride ()) {
2459 KokkosBlas::sum (lclSums, X_lcl);
2462 for (
size_t j = 0; j < numVecs; ++j) {
2463 const size_t col = whichVectors_[j];
2464 KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
2472 if (! comm.is_null () && this->isDistributed ()) {
2473 reduceAll (*comm, REDUCE_SUM, static_cast<int> (numVecs),
2474 lclSums.ptr_on_device (), meansOut.ptr_on_device ());
2484 const impl_scalar_type OneOverN =
2485 ATS::one () /
static_cast<mag_type> (this->getGlobalLength ());
2486 for (
size_t k = 0; k < numMeans; ++k) {
2487 meansOut(k) = meansOut(k) * OneOverN;
2492 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
2498 typedef Kokkos::Details::ArithTraits<IST> ATS;
2499 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2500 typedef typename pool_type::generator_type generator_type;
2502 const IST max = Kokkos::rand<generator_type, IST>::max ();
2503 const IST min = ATS::is_signed ? IST (-max) : ATS::zero ();
2505 this->randomize (static_cast<Scalar> (min), static_cast<Scalar> (max));
2509 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
2515 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2526 const uint64_t myRank =
2527 static_cast<uint64_t
> (this->getMap ()->getComm ()->getRank ());
2528 uint64_t seed64 =
static_cast<uint64_t
> (std::rand ()) + myRank + 17311uLL;
2529 unsigned int seed =
static_cast<unsigned int> (seed64&0xffffffff);
2531 pool_type rand_pool (seed);
2532 const IST max =
static_cast<IST
> (maxVal);
2533 const IST min =
static_cast<IST
> (minVal);
2538 this->view_.modified_device () = 0;
2539 this->view_.modified_host () = 0;
2541 this->
template modify<device_type> ();
2542 auto thisView = this->getLocalView<device_type> ();
2544 if (isConstantStride ()) {
2545 Kokkos::fill_random (thisView, rand_pool, min, max);
2548 const size_t numVecs = getNumVectors ();
2549 for (
size_t k = 0; k < numVecs; ++k) {
2550 const size_t col = whichVectors_[k];
2551 auto X_k = Kokkos::subview (thisView, Kokkos::ALL (), col);
2552 Kokkos::fill_random (X_k, rand_pool, min, max);
2558 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
2565 using Kokkos::subview;
2566 typedef typename device_type::memory_space DMS;
2567 typedef Kokkos::HostSpace HMS;
2572 const size_t lclNumRows = this->getLocalLength ();
2573 const size_t numVecs = this->getNumVectors ();
2574 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2575 const std::pair<size_t, size_t> colRng (0, numVecs);
2581 const bool useHostVersion = this->
template need_sync<device_type> ();
2583 if (! useHostVersion) {
2584 this->
template modify<DMS> ();
2585 auto X = subview (this->
template getLocalView<DMS> (), rowRng, ALL ());
2587 auto X_0 = subview (X, ALL (), static_cast<size_t> (0));
2590 else if (isConstantStride ()) {
2594 for (
size_t k = 0; k < numVecs; ++k) {
2595 const size_t col = whichVectors_[k];
2596 auto X_k = subview (X, ALL (), col);
2602 this->
template modify<HMS> ();
2603 auto X = subview (this->
template getLocalView<HMS> (), rowRng, ALL ());
2605 auto X_0 = subview (X, ALL (), static_cast<size_t> (0));
2608 else if (isConstantStride ()) {
2612 for (
size_t k = 0; k < numVecs; ++k) {
2613 const size_t col = whichVectors_[k];
2614 auto X_k = subview (X, ALL (), col);
2622 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
2627 using Teuchos::ArrayRCP;
2628 using Teuchos::Comm;
2635 TEUCHOS_TEST_FOR_EXCEPTION(
2636 ! this->isConstantStride (), std::logic_error,
2637 "Tpetra::MultiVector::replaceMap: This method does not currently work " 2638 "if the MultiVector is a column view of another MultiVector (that is, if " 2639 "isConstantStride() == false).");
2669 #ifdef HAVE_TEUCHOS_DEBUG 2683 #endif // HAVE_TEUCHOS_DEBUG 2685 if (this->getMap ().is_null ()) {
2690 TEUCHOS_TEST_FOR_EXCEPTION(
2691 newMap.is_null (), std::invalid_argument,
2692 "Tpetra::MultiVector::replaceMap: both current and new Maps are null. " 2693 "This probably means that the input Map is incorrect.");
2697 const size_t newNumRows = newMap->getNodeNumElements ();
2698 const size_t origNumRows = view_.dimension_0 ();
2699 const size_t numCols = this->getNumVectors ();
2701 if (origNumRows != newNumRows || view_.dimension_1 () != numCols) {
2702 view_ = allocDualView<Scalar, LocalOrdinal, GlobalOrdinal, Node> (newNumRows, numCols);
2705 else if (newMap.is_null ()) {
2708 const size_t newNumRows =
static_cast<size_t> (0);
2709 const size_t numCols = this->getNumVectors ();
2710 view_ = allocDualView<Scalar, LocalOrdinal, GlobalOrdinal, Node> (newNumRows, numCols);
2713 this->map_ = newMap;
2716 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
2722 if (theAlpha == Kokkos::Details::ArithTraits<impl_scalar_type>::one ()) {
2725 const size_t lclNumRows = this->getLocalLength ();
2726 const size_t numVecs = this->getNumVectors ();
2727 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2728 const std::pair<size_t, size_t> colRng (0, numVecs);
2736 const bool useHostVersion = this->
template need_sync<device_type> ();
2737 if (useHostVersion) {
2739 Kokkos::subview (this->
template getLocalView<Kokkos::HostSpace> (),
2740 rowRng, Kokkos::ALL ());
2741 if (isConstantStride ()) {
2742 KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2745 for (
size_t k = 0; k < numVecs; ++k) {
2746 const size_t Y_col = this->isConstantStride () ? k :
2747 this->whichVectors_[k];
2748 auto Y_k = Kokkos::subview (Y_lcl, Kokkos::ALL (), Y_col);
2749 KokkosBlas::scal (Y_k, theAlpha, Y_k);
2755 Kokkos::subview (this->
template getLocalView<device_type> (),
2756 rowRng, Kokkos::ALL ());
2757 if (isConstantStride ()) {
2758 KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2761 for (
size_t k = 0; k < numVecs; ++k) {
2762 const size_t Y_col = this->isConstantStride () ? k :
2763 this->whichVectors_[k];
2764 auto Y_k = Kokkos::subview (Y_lcl, Kokkos::ALL (), Y_col);
2765 KokkosBlas::scal (Y_k, theAlpha, Y_k);
2772 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
2775 scale (
const Teuchos::ArrayView<const Scalar>& alphas)
2777 const size_t numVecs = this->getNumVectors ();
2778 const size_t numAlphas =
static_cast<size_t> (alphas.size ());
2779 TEUCHOS_TEST_FOR_EXCEPTION(
2780 numAlphas != numVecs, std::invalid_argument,
"Tpetra::MultiVector::" 2781 "scale: alphas.size() = " << numAlphas <<
" != this->getNumVectors() = " 2785 typedef Kokkos::DualView<impl_scalar_type*, device_type> k_alphas_type ;
2786 k_alphas_type k_alphas (
"alphas::tmp", numAlphas);
2787 k_alphas.template modify<typename k_alphas_type::host_mirror_space> ();
2788 for (
size_t i = 0; i < numAlphas; ++i) {
2791 k_alphas.template sync<typename k_alphas_type::memory_space> ();
2793 this->scale (k_alphas.d_view);
2796 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
2799 scale (
const Kokkos::View<const impl_scalar_type*, device_type>& alphas)
2802 using Kokkos::subview;
2804 const size_t lclNumRows = this->getLocalLength ();
2805 const size_t numVecs = this->getNumVectors ();
2806 TEUCHOS_TEST_FOR_EXCEPTION(
2807 static_cast<size_t> (alphas.dimension_0 ()) != numVecs,
2808 std::invalid_argument,
"Tpetra::MultiVector::scale(alphas): " 2809 "alphas.dimension_0() = " << alphas.dimension_0 ()
2810 <<
" != this->getNumVectors () = " << numVecs <<
".");
2811 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2812 const std::pair<size_t, size_t> colRng (0, numVecs);
2822 const bool useHostVersion = this->
template need_sync<device_type> ();
2823 if (useHostVersion) {
2826 auto alphas_h = Kokkos::create_mirror_view (alphas);
2827 typedef typename decltype (alphas_h)::memory_space cur_memory_space;
2831 Kokkos::subview (this->
template getLocalView<cur_memory_space> (),
2832 rowRng, Kokkos::ALL ());
2833 if (isConstantStride ()) {
2834 KokkosBlas::scal (Y_lcl, alphas_h, Y_lcl);
2837 for (
size_t k = 0; k < numVecs; ++k) {
2838 const size_t Y_col = this->isConstantStride () ? k :
2839 this->whichVectors_[k];
2840 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2843 KokkosBlas::scal (Y_k, alphas_h(k), Y_k);
2849 Kokkos::subview (this->
template getLocalView<device_type> (),
2850 rowRng, Kokkos::ALL());
2851 if (isConstantStride ()) {
2852 KokkosBlas::scal (Y_lcl, alphas, Y_lcl);
2855 for (
size_t k = 0; k < numVecs; ++k) {
2856 const size_t Y_col = this->isConstantStride () ? k :
2857 this->whichVectors_[k];
2858 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2864 KokkosBlas::scal (Y_k, alphas(k), Y_k);
2870 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
2877 using Kokkos::subview;
2878 const char tfecfFuncName[] =
"scale: ";
2880 const size_t lclNumRows = getLocalLength ();
2881 const size_t numVecs = getNumVectors ();
2883 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2885 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = " 2887 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2889 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = " 2893 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2894 const std::pair<size_t, size_t> colRng (0, numVecs);
2896 typedef typename dual_view_type::t_dev dev_view_type;
2897 typedef typename dual_view_type::t_host host_view_type;
2900 const bool useHostVersion = this->
template need_sync<device_type> ();
2901 if (useHostVersion) {
2905 typedef typename host_view_type::memory_space cur_memory_space;
2907 this->
template sync<cur_memory_space> ();
2908 this->
template modify<cur_memory_space> ();
2909 auto Y_lcl_orig = this->
template getLocalView<cur_memory_space> ();
2910 auto X_lcl_orig = A.template getLocalView<cur_memory_space> ();
2911 auto Y_lcl = subview (Y_lcl_orig, rowRng, Kokkos::ALL ());
2912 auto X_lcl = subview (X_lcl_orig, rowRng, Kokkos::ALL ());
2915 KokkosBlas::scal (Y_lcl, theAlpha, X_lcl);
2919 for (
size_t k = 0; k < numVecs; ++k) {
2920 const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
2922 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2923 auto X_k = subview (X_lcl, ALL (), X_col);
2925 KokkosBlas::scal (Y_k, theAlpha, X_k);
2930 typedef typename dev_view_type::memory_space cur_memory_space;
2932 this->
template sync<cur_memory_space> ();
2933 this->
template modify<cur_memory_space> ();
2934 auto Y_lcl_orig = this->
template getLocalView<cur_memory_space> ();
2935 auto X_lcl_orig = A.template getLocalView<cur_memory_space> ();
2936 auto Y_lcl = subview (Y_lcl_orig, rowRng, Kokkos::ALL ());
2937 auto X_lcl = subview (X_lcl_orig, rowRng, Kokkos::ALL ());
2940 KokkosBlas::scal (Y_lcl, theAlpha, X_lcl);
2944 for (
size_t k = 0; k < numVecs; ++k) {
2945 const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
2947 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2948 auto X_k = subview (X_lcl, ALL (), X_col);
2950 KokkosBlas::scal (Y_k, theAlpha, X_k);
2957 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
2963 const char tfecfFuncName[] =
"reciprocal: ";
2965 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2967 "MultiVectors do not have the same local length. " 2968 "this->getLocalLength() = " << getLocalLength ()
2970 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2971 A.
getNumVectors () != this->getNumVectors (), std::runtime_error,
2972 ": MultiVectors do not have the same number of columns (vectors). " 2973 "this->getNumVectors() = " << getNumVectors ()
2974 <<
" != A.getNumVectors() = " << A.
getNumVectors () <<
".");
2978 const size_t numVecs = getNumVectors ();
2980 this->
template sync<device_type> ();
2981 this->
template modify<device_type> ();
2987 const_cast<MV&
> (A).
template sync<device_type> ();
2989 auto this_view_dev = this->
template getLocalView<device_type> ();
2990 auto A_view_dev = A.template getLocalView<device_type> ();
2993 KokkosBlas::reciprocal (this_view_dev, A_view_dev);
2997 using Kokkos::subview;
2998 typedef Kokkos::View<impl_scalar_type*, device_type> view_type;
3000 for (
size_t k = 0; k < numVecs; ++k) {
3001 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
3002 view_type vector_k = subview (this_view_dev, ALL (), this_col);
3003 const size_t A_col = isConstantStride () ? k : A.
whichVectors_[k];
3004 view_type vector_Ak = subview (A_view_dev, ALL (), A_col);
3005 KokkosBlas::reciprocal (vector_k, vector_Ak);
3009 catch (std::runtime_error &e) {
3010 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
true, std::runtime_error,
3011 ": Caught exception from Kokkos: " << e.what () << std::endl);
3016 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3022 const char tfecfFuncName[] =
"abs";
3023 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3025 ": MultiVectors do not have the same local length. " 3026 "this->getLocalLength() = " << getLocalLength ()
3028 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3029 A.
getNumVectors () != this->getNumVectors (), std::runtime_error,
3030 ": MultiVectors do not have the same number of columns (vectors). " 3031 "this->getNumVectors() = " << getNumVectors ()
3032 <<
" != A.getNumVectors() = " << A.
getNumVectors () <<
".");
3036 const size_t numVecs = getNumVectors ();
3038 this->
template sync<device_type> ();
3039 this->
template modify<device_type> ();
3045 const_cast<MV&
> (A).
template sync<device_type> ();
3047 auto this_view_dev = this->
template getLocalView<device_type> ();
3048 auto A_view_dev = A.template getLocalView<device_type> ();
3051 KokkosBlas::abs (this_view_dev, A_view_dev);
3055 using Kokkos::subview;
3056 typedef Kokkos::View<impl_scalar_type*, device_type> view_type;
3058 for (
size_t k=0; k < numVecs; ++k) {
3059 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
3060 view_type vector_k = subview (this_view_dev, ALL (), this_col);
3061 const size_t A_col = isConstantStride () ? k : A.
whichVectors_[k];
3062 view_type vector_Ak = subview (A_view_dev, ALL (), A_col);
3063 KokkosBlas::abs (vector_k, vector_Ak);
3069 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3077 using Kokkos::subview;
3078 const char tfecfFuncName[] =
"update: ";
3082 const size_t lclNumRows = getLocalLength ();
3083 const size_t numVecs = getNumVectors ();
3085 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3087 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = " 3089 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3091 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = " 3096 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
3097 const std::pair<size_t, size_t> colRng (0, numVecs);
3099 typedef typename dual_view_type::t_dev dev_view_type;
3100 typedef typename dual_view_type::t_host host_view_type;
3103 const bool useHostVersion = this->
template need_sync<device_type> ();
3104 if (useHostVersion) {
3108 typedef typename host_view_type::memory_space cur_memory_space;
3110 this->
template sync<cur_memory_space> ();
3111 this->
template modify<cur_memory_space> ();
3112 auto Y_lcl_orig = this->
template getLocalView<cur_memory_space> ();
3113 auto Y_lcl = subview (Y_lcl_orig, rowRng, Kokkos::ALL ());
3114 auto X_lcl_orig = A.template getLocalView<cur_memory_space> ();
3115 auto X_lcl = subview (X_lcl_orig, rowRng, Kokkos::ALL ());
3118 KokkosBlas::axpby (theAlpha, X_lcl, theBeta, Y_lcl);
3122 for (
size_t k = 0; k < numVecs; ++k) {
3123 const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
3125 auto Y_k = subview (Y_lcl, ALL (), Y_col);
3126 auto X_k = subview (X_lcl, ALL (), X_col);
3128 KokkosBlas::axpby (theAlpha, X_k, theBeta, Y_k);
3135 typedef typename dev_view_type::memory_space cur_memory_space;
3136 this->
template sync<cur_memory_space> ();
3137 this->
template modify<cur_memory_space> ();
3138 auto Y_lcl_orig = this->
template getLocalView<cur_memory_space> ();
3139 auto Y_lcl = subview (Y_lcl_orig, rowRng, Kokkos::ALL ());
3140 auto X_lcl_orig = A.template getLocalView<cur_memory_space> ();
3141 auto X_lcl = subview (X_lcl_orig, rowRng, Kokkos::ALL ());
3144 KokkosBlas::axpby (theAlpha, X_lcl, theBeta, Y_lcl);
3148 for (
size_t k = 0; k < numVecs; ++k) {
3149 const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
3151 auto Y_k = subview (Y_lcl, ALL (), Y_col);
3152 auto X_k = subview (X_lcl, ALL (), X_col);
3154 KokkosBlas::axpby (theAlpha, X_k, theBeta, Y_k);
3160 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3167 const Scalar& gamma)
3170 using Kokkos::subview;
3172 const char tfecfFuncName[] =
"update(alpha,A,beta,B,gamma): ";
3176 const size_t lclNumRows = this->getLocalLength ();
3177 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3179 "The input MultiVector A has " << A.
getLocalLength () <<
" local " 3180 "row(s), but this MultiVector has " << lclNumRows <<
" local " 3182 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3184 "The input MultiVector B has " << B.
getLocalLength () <<
" local " 3185 "row(s), but this MultiVector has " << lclNumRows <<
" local " 3187 const size_t numVecs = getNumVectors ();
3188 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3190 "The input MultiVector A has " << A.
getNumVectors () <<
" column(s), " 3191 "but this MultiVector has " << numVecs <<
" column(s).");
3192 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3194 "The input MultiVector B has " << B.
getNumVectors () <<
" column(s), " 3195 "but this MultiVector has " << numVecs <<
" column(s).");
3205 this->
template sync<device_type> ();
3206 const_cast<MV&
> (A).
template sync<device_type> ();
3207 const_cast<MV&
> (B).
template sync<device_type> ();
3210 this->
template modify<device_type> ();
3212 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
3213 const std::pair<size_t, size_t> colRng (0, numVecs);
3218 auto C_lcl = subview (this->
template getLocalView<device_type> (), rowRng, ALL ());
3219 auto A_lcl = subview (A.template getLocalView<device_type> (), rowRng, ALL ());
3220 auto B_lcl = subview (B.template getLocalView<device_type> (), rowRng, ALL ());
3223 KokkosBlas::update (theAlpha, A_lcl, theBeta, B_lcl, theGamma, C_lcl);
3228 for (
size_t k = 0; k < numVecs; ++k) {
3229 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
3232 KokkosBlas::update (theAlpha, subview (A_lcl, rowRng, A_col),
3233 theBeta, subview (B_lcl, rowRng, B_col),
3234 theGamma, subview (C_lcl, rowRng, this_col));
3239 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3240 Teuchos::ArrayRCP<const Scalar>
3245 using Kokkos::subview;
3252 const_cast<MV*
> (
this)->
template sync<Kokkos::HostSpace> ();
3255 auto hostView = this->
template getLocalView<Kokkos::HostSpace> ();
3258 const size_t col = isConstantStride () ? j : whichVectors_[j];
3259 auto hostView_j = subview (hostView, ALL (), col);
3262 Teuchos::ArrayRCP<const impl_scalar_type> dataAsArcp =
3263 Kokkos::Compat::persistingView (hostView_j, 0, getLocalLength ());
3265 #ifdef HAVE_TPETRA_DEBUG 3266 TEUCHOS_TEST_FOR_EXCEPTION
3267 (static_cast<size_t> (hostView_j.dimension_0 ()) <
3268 static_cast<size_t> (dataAsArcp.size ()), std::logic_error,
3269 "Tpetra::MultiVector::getData: hostView_j.dimension_0() = " 3270 << hostView_j.dimension_0 () <<
" < dataAsArcp.size() = " 3271 << dataAsArcp.size () <<
". " 3272 "Please report this bug to the Tpetra developers.");
3273 #endif // HAVE_TPETRA_DEBUG 3275 return Teuchos::arcp_reinterpret_cast<
const Scalar> (dataAsArcp);
3278 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3279 Teuchos::ArrayRCP<Scalar>
3284 using Kokkos::subview;
3291 const_cast<MV*
> (
this)->
template sync<Kokkos::HostSpace> ();
3295 this->
template modify<Kokkos::HostSpace> ();
3298 auto hostView = this->
template getLocalView<Kokkos::HostSpace> ();
3301 const size_t col = isConstantStride () ? j : whichVectors_[j];
3302 auto hostView_j = subview (hostView, ALL (), col);
3305 Teuchos::ArrayRCP<impl_scalar_type> dataAsArcp =
3306 Kokkos::Compat::persistingView (hostView_j, 0, getLocalLength ());
3308 #ifdef HAVE_TPETRA_DEBUG 3309 TEUCHOS_TEST_FOR_EXCEPTION
3310 (static_cast<size_t> (hostView_j.dimension_0 ()) <
3311 static_cast<size_t> (dataAsArcp.size ()), std::logic_error,
3312 "Tpetra::MultiVector::getDataNonConst: hostView_j.dimension_0() = " 3313 << hostView_j.dimension_0 () <<
" < dataAsArcp.size() = " 3314 << dataAsArcp.size () <<
". " 3315 "Please report this bug to the Tpetra developers.");
3316 #endif // HAVE_TPETRA_DEBUG 3318 return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
3321 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3326 if (
this != &source) {
3327 base_type::operator= (source);
3333 view_ = source.
view_;
3349 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3350 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
3352 subCopy (
const Teuchos::ArrayView<const size_t>& cols)
const 3359 bool contiguous =
true;
3360 const size_t numCopyVecs =
static_cast<size_t> (cols.size ());
3361 for (
size_t j = 1; j < numCopyVecs; ++j) {
3362 if (cols[j] != cols[j-1] + static_cast<size_t> (1)) {
3367 if (contiguous && numCopyVecs > 0) {
3368 return this->subCopy (Teuchos::Range1D (cols[0], cols[numCopyVecs-1]));
3371 RCP<const MV> X_sub = this->subView (cols);
3372 RCP<MV> Y (
new MV (this->getMap (), numCopyVecs,
false));
3378 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3379 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
3386 RCP<const MV> X_sub = this->subView (colRng);
3387 RCP<MV> Y (
new MV (this->getMap (), static_cast<size_t> (colRng.size ()),
false));
3392 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3396 return origView_.dimension_0 ();
3399 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3403 return origView_.dimension_1 ();
3406 template <
class Scalar,
class LO,
class GO,
class Node, const
bool classic>
3410 const size_t offset) :
3414 using Kokkos::subview;
3418 const char prefix[] =
"Tpetra::MultiVector constructor (offsetView): ";
3423 const int myRank = X.
getMap ()->getComm ()->getRank ();
3424 TEUCHOS_TEST_FOR_EXCEPTION(
3426 prefix <<
"Invalid input Map. The input Map owns " << newNumRows <<
3427 " entries on process " << myRank <<
". offset = " << offset <<
". " 3429 " rows on this process.");
3432 #ifdef HAVE_TPETRA_DEBUG 3433 const size_t strideBefore =
3438 X.template getLocalView<Kokkos::HostSpace> ().ptr_on_device ();
3439 #endif // HAVE_TPETRA_DEBUG 3441 const std::pair<size_t, size_t> origRowRng (offset, X.
origView_.dimension_0 ());
3442 const std::pair<size_t, size_t> rowRng (offset, offset + newNumRows);
3444 dual_view_type newOrigView = subview (X.
origView_, origRowRng, ALL ());
3455 dual_view_type newView = subview (offset == 0 ? X.
origView_ : X.
view_, rowRng, ALL ());
3462 if (newOrigView.dimension_0 () == 0 &&
3463 newOrigView.dimension_1 () != X.
origView_.dimension_1 ()) {
3464 newOrigView = allocDualView<Scalar, LO, GO, Node> (size_t (0),
3467 if (newView.dimension_0 () == 0 &&
3468 newView.dimension_1 () != X.
view_.dimension_1 ()) {
3469 newView = allocDualView<Scalar, LO, GO, Node> (size_t (0),
3474 MV (Teuchos::rcp (
new map_type (subMap)), newView, newOrigView) :
3477 #ifdef HAVE_TPETRA_DEBUG 3480 static_cast<size_t> (0);
3484 X.template getLocalView<Kokkos::HostSpace> ().ptr_on_device ();
3486 const size_t strideRet = subViewMV.isConstantStride () ?
3487 subViewMV.getStride () :
3488 static_cast<size_t> (0);
3489 const size_t lclNumRowsRet = subViewMV.getLocalLength ();
3490 const size_t numColsRet = subViewMV.getNumVectors ();
3492 const char suffix[] =
". This should never happen. Please report this " 3493 "bug to the Tpetra developers.";
3495 TEUCHOS_TEST_FOR_EXCEPTION(
3497 std::logic_error, prefix <<
"Returned MultiVector has a number of rows " 3498 "different than the number of local indices in the input Map. " 3499 "lclNumRowsRet: " << lclNumRowsRet <<
", subMap.getNodeNumElements(): " 3501 TEUCHOS_TEST_FOR_EXCEPTION(
3502 strideBefore != strideAfter || lclNumRowsBefore != lclNumRowsAfter ||
3503 numColsBefore != numColsAfter || hostPtrBefore != hostPtrAfter,
3504 std::logic_error, prefix <<
"Original MultiVector changed dimensions, " 3505 "stride, or host pointer after taking offset view. strideBefore: " <<
3506 strideBefore <<
", strideAfter: " << strideAfter <<
", lclNumRowsBefore: " 3507 << lclNumRowsBefore <<
", lclNumRowsAfter: " << lclNumRowsAfter <<
3508 ", numColsBefore: " << numColsBefore <<
", numColsAfter: " <<
3509 numColsAfter <<
", hostPtrBefore: " << hostPtrBefore <<
", hostPtrAfter: " 3510 << hostPtrAfter << suffix);
3511 TEUCHOS_TEST_FOR_EXCEPTION(
3512 strideBefore != strideRet, std::logic_error, prefix <<
"Returned " 3513 "MultiVector has different stride than original MultiVector. " 3514 "strideBefore: " << strideBefore <<
", strideRet: " << strideRet <<
3515 ", numColsBefore: " << numColsBefore <<
", numColsRet: " << numColsRet
3517 TEUCHOS_TEST_FOR_EXCEPTION(
3518 numColsBefore != numColsRet, std::logic_error,
3519 prefix <<
"Returned MultiVector has a different number of columns than " 3520 "original MultiVector. numColsBefore: " << numColsBefore <<
", " 3521 "numColsRet: " << numColsRet << suffix);
3522 #endif // HAVE_TPETRA_DEBUG 3527 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3528 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
3531 const size_t offset)
const 3534 return Teuchos::rcp (
new MV (*
this, *subMap, offset));
3537 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3538 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
3541 const size_t offset)
3544 return Teuchos::rcp (
new MV (*
this, *subMap, offset));
3547 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3548 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
3550 subView (
const Teuchos::ArrayView<const size_t>& cols)
const 3552 using Teuchos::Array;
3556 const size_t numViewCols =
static_cast<size_t> (cols.size ());
3557 TEUCHOS_TEST_FOR_EXCEPTION(
3558 numViewCols < 1, std::runtime_error,
"Tpetra::MultiVector::subView" 3559 "(const Teuchos::ArrayView<const size_t>&): The input array cols must " 3560 "contain at least one entry, but cols.size() = " << cols.size ()
3565 bool contiguous =
true;
3566 for (
size_t j = 1; j < numViewCols; ++j) {
3567 if (cols[j] != cols[j-1] + static_cast<size_t> (1)) {
3573 if (numViewCols == 0) {
3575 return rcp (
new MV (this->getMap (), numViewCols));
3578 return this->subView (Teuchos::Range1D (cols[0], cols[numViewCols-1]));
3582 if (isConstantStride ()) {
3583 return rcp (
new MV (this->getMap (), view_, origView_, cols));
3586 Array<size_t> newcols (cols.size ());
3587 for (
size_t j = 0; j < numViewCols; ++j) {
3588 newcols[j] = whichVectors_[cols[j]];
3590 return rcp (
new MV (this->getMap (), view_, origView_, newcols ()));
3595 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3596 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
3601 using Kokkos::subview;
3602 using Teuchos::Array;
3606 const char tfecfFuncName[] =
"subView(Range1D): ";
3608 const size_t lclNumRows = this->getLocalLength ();
3609 const size_t numVecs = this->getNumVectors ();
3613 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3614 static_cast<size_t> (colRng.size ()) > numVecs, std::runtime_error,
3615 "colRng.size() = " << colRng.size () <<
" > this->getNumVectors() = " 3617 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3618 numVecs != 0 && colRng.size () != 0 &&
3619 (colRng.lbound () <
static_cast<Teuchos::Ordinal
> (0) ||
3620 static_cast<size_t> (colRng.ubound ()) >= numVecs),
3621 std::invalid_argument,
"Nonempty input range [" << colRng.lbound () <<
3622 "," << colRng.ubound () <<
"] exceeds the valid range of column indices " 3623 "[0, " << numVecs <<
"].");
3625 RCP<const MV> X_ret;
3635 const std::pair<size_t, size_t> rows (0, lclNumRows);
3636 if (colRng.size () == 0) {
3637 const std::pair<size_t, size_t> cols (0, 0);
3638 dual_view_type X_sub = takeSubview (this->view_, ALL (), cols);
3639 X_ret = rcp (
new MV (this->getMap (), X_sub, origView_));
3643 if (isConstantStride ()) {
3644 const std::pair<size_t, size_t> cols (colRng.lbound (),
3645 colRng.ubound () + 1);
3646 dual_view_type X_sub = takeSubview (this->view_, ALL (), cols);
3647 X_ret = rcp (
new MV (this->getMap (), X_sub, origView_));
3650 if (static_cast<size_t> (colRng.size ()) == static_cast<size_t> (1)) {
3653 const std::pair<size_t, size_t> col (whichVectors_[0] + colRng.lbound (),
3654 whichVectors_[0] + colRng.ubound () + 1);
3655 dual_view_type X_sub = takeSubview (view_, ALL (), col);
3656 X_ret = rcp (
new MV (this->getMap (), X_sub, origView_));
3659 Array<size_t> which (whichVectors_.begin () + colRng.lbound (),
3660 whichVectors_.begin () + colRng.ubound () + 1);
3661 X_ret = rcp (
new MV (this->getMap (), view_, origView_, which));
3666 #ifdef HAVE_TPETRA_DEBUG 3667 using Teuchos::Comm;
3668 using Teuchos::outArg;
3669 using Teuchos::REDUCE_MIN;
3670 using Teuchos::reduceAll;
3672 RCP<const Comm<int> > comm = this->getMap ().is_null () ? Teuchos::null :
3673 this->getMap ()->getComm ();
3674 if (! comm.is_null ()) {
3678 if (X_ret.is_null ()) {
3681 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3682 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3683 lclSuccess != 1, std::logic_error,
"X_ret (the subview of this " 3684 "MultiVector; the return value of this method) is null on some MPI " 3685 "process in this MultiVector's communicator. This should never " 3686 "happen. Please report this bug to the Tpetra developers.");
3688 if (! X_ret.is_null () &&
3689 X_ret->getNumVectors () !=
static_cast<size_t> (colRng.size ())) {
3692 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3693 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3694 lclSuccess != 1, std::logic_error,
3695 "X_ret->getNumVectors() != colRng.size(), on at least one MPI process " 3696 "in this MultiVector's communicator. This should never happen. " 3697 "Please report this bug to the Tpetra developers.");
3699 #endif // HAVE_TPETRA_DEBUG 3705 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3706 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
3711 return Teuchos::rcp_const_cast<MV> (this->subView (cols));
3715 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3716 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
3721 return Teuchos::rcp_const_cast<MV> (this->subView (colRng));
3725 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3726 Teuchos::RCP<const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
3731 using Kokkos::subview;
3735 #ifdef HAVE_TPETRA_DEBUG 3736 const char tfecfFuncName[] =
"getVector(NonConst): ";
3737 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3738 this->vectorIndexOutOfRange (j), std::runtime_error,
"Input index j (== " 3739 << j <<
") exceeds valid range [0, " << this->getNumVectors ()
3741 #endif // HAVE_TPETRA_DEBUG 3742 const size_t jj = this->isConstantStride () ?
3743 static_cast<size_t> (j) :
3744 static_cast<size_t> (this->whichVectors_[j]);
3745 const std::pair<size_t, size_t> rng (jj, jj+1);
3746 return rcp (
new V (this->getMap (),
3747 takeSubview (this->view_, ALL (), rng),
3752 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3753 Teuchos::RCP<Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
3758 return Teuchos::rcp_const_cast<V> (this->getVector (j));
3762 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3765 get1dCopy (
const Teuchos::ArrayView<Scalar>& A,
const size_t LDA)
const 3767 typedef decltype (this->
template getLocalView<device_type> ())
3769 typedef decltype (this->
template getLocalView<Kokkos::HostSpace> ())
3772 typedef Kokkos::View<IST*,
typename host_view_type::array_layout,
3773 Kokkos::HostSpace, Kokkos::MemoryUnmanaged> input_col_type;
3774 const char tfecfFuncName[] =
"get1dCopy: ";
3776 const size_t numRows = this->getLocalLength ();
3777 const size_t numCols = this->getNumVectors ();
3778 const std::pair<size_t, size_t> rowRange (0, numRows);
3780 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3781 LDA < numRows, std::runtime_error,
3782 "LDA = " << LDA <<
" < numRows = " << numRows <<
".");
3783 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3784 numRows > static_cast<size_t> (0) &&
3785 numCols > static_cast<size_t> (0) &&
3786 static_cast<size_t> (A.size ()) < LDA * (numCols - 1) + numRows,
3788 "A.size() = " << A.size () <<
", but its size must be at least " 3789 << (LDA * (numCols - 1) + numRows) <<
" to hold all the entries.");
3804 const bool useHostVersion = this->
template need_sync<device_type> ();
3806 dev_view_type srcView_dev;
3807 host_view_type srcView_host;
3808 if (useHostVersion) {
3809 srcView_host = this->
template getLocalView<Kokkos::HostSpace> ();
3812 srcView_dev = this->
template getLocalView<device_type> ();
3815 for (
size_t j = 0; j < numCols; ++j) {
3816 const size_t srcCol =
3817 this->isConstantStride () ? j : this->whichVectors_[j];
3818 const size_t dstCol = j;
3819 IST*
const dstColRaw =
3820 reinterpret_cast<IST*
> (A.getRawPtr () + LDA * dstCol);
3821 input_col_type dstColView (dstColRaw, numRows);
3823 if (useHostVersion) {
3824 auto srcColView_host = Kokkos::subview (srcView_host, rowRange, srcCol);
3825 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3826 dstColView.dimension_0 () != srcColView_host.dimension_0 (),
3827 std::logic_error,
": srcColView and dstColView_host have different " 3828 "dimensions. Please report this bug to the Tpetra developers.");
3832 auto srcColView_dev = Kokkos::subview (srcView_dev, rowRange, srcCol);
3833 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3834 dstColView.dimension_0 () != srcColView_dev.dimension_0 (),
3835 std::logic_error,
": srcColView and dstColView_dev have different " 3836 "dimensions. Please report this bug to the Tpetra developers.");
3843 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3846 get2dCopy (
const Teuchos::ArrayView<
const Teuchos::ArrayView<Scalar> >& ArrayOfPtrs)
const 3849 const char tfecfFuncName[] =
"get2dCopy: ";
3850 const size_t numRows = this->getLocalLength ();
3851 const size_t numCols = this->getNumVectors ();
3853 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3854 static_cast<size_t> (ArrayOfPtrs.size ()) != numCols,
3855 std::runtime_error,
"Input array of pointers must contain as many " 3856 "entries (arrays) as the MultiVector has columns. ArrayOfPtrs.size() = " 3857 << ArrayOfPtrs.size () <<
" != getNumVectors() = " << numCols <<
".");
3859 if (numRows != 0 && numCols != 0) {
3861 for (
size_t j = 0; j < numCols; ++j) {
3862 const size_t dstLen =
static_cast<size_t> (ArrayOfPtrs[j].size ());
3863 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3864 dstLen < numRows, std::invalid_argument,
"Array j = " << j <<
" of " 3865 "the input array of arrays is not long enough to fit all entries in " 3866 "that column of the MultiVector. ArrayOfPtrs[j].size() = " << dstLen
3867 <<
" < getLocalLength() = " << numRows <<
".");
3871 for (
size_t j = 0; j < numCols; ++j) {
3872 Teuchos::RCP<const V> X_j = this->getVector (j);
3873 const size_t LDA =
static_cast<size_t> (ArrayOfPtrs[j].size ());
3874 X_j->get1dCopy (ArrayOfPtrs[j], LDA);
3879 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3880 Teuchos::ArrayRCP<const Scalar>
3884 if (getLocalLength () == 0 || getNumVectors () == 0) {
3885 return Teuchos::null;
3889 TEUCHOS_TEST_FOR_EXCEPTION(
3890 ! isConstantStride (), std::runtime_error,
"Tpetra::MultiVector::" 3891 "get1dView: This MultiVector does not have constant stride, so it is " 3892 "not possible to view its data as a single array. You may check " 3893 "whether a MultiVector has constant stride by calling " 3894 "isConstantStride().");
3901 const_cast<MV*
> (
this)->
template sync<Kokkos::HostSpace> ();
3904 auto X_lcl = this->
template getLocalView<Kokkos::HostSpace> ();
3905 Teuchos::ArrayRCP<const impl_scalar_type> dataAsArcp =
3906 Kokkos::Compat::persistingView (X_lcl);
3907 return Teuchos::arcp_reinterpret_cast<
const Scalar> (dataAsArcp);
3911 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3912 Teuchos::ArrayRCP<Scalar>
3916 if (getLocalLength () == 0 || getNumVectors () == 0) {
3917 return Teuchos::null;
3919 TEUCHOS_TEST_FOR_EXCEPTION
3920 (! isConstantStride (), std::runtime_error,
"Tpetra::MultiVector::" 3921 "get1dViewNonConst: This MultiVector does not have constant stride, " 3922 "so it is not possible to view its data as a single array. You may " 3923 "check whether a MultiVector has constant stride by calling " 3924 "isConstantStride().");
3928 this->
template sync<Kokkos::HostSpace> ();
3931 auto X_lcl = this->
template getLocalView<Kokkos::HostSpace> ();
3932 Teuchos::ArrayRCP<impl_scalar_type> dataAsArcp =
3933 Kokkos::Compat::persistingView (X_lcl);
3934 return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
3938 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3939 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> >
3947 this->sync<Kokkos::HostSpace> ();
3951 this->modify<Kokkos::HostSpace> ();
3953 const size_t myNumRows = this->getLocalLength ();
3954 const size_t numCols = this->getNumVectors ();
3955 const Kokkos::pair<size_t, size_t> rowRange (0, myNumRows);
3960 auto X_lcl = this->getLocalView<Kokkos::HostSpace> ();
3962 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> > views (numCols);
3963 for (
size_t j = 0; j < numCols; ++j) {
3964 const size_t col = this->isConstantStride () ? j : this->whichVectors_[j];
3965 auto X_lcl_j = Kokkos::subview (X_lcl, rowRange, col);
3966 Teuchos::ArrayRCP<impl_scalar_type> X_lcl_j_arcp =
3967 Kokkos::Compat::persistingView (X_lcl_j);
3968 views[j] = Teuchos::arcp_reinterpret_cast<Scalar> (X_lcl_j_arcp);
3973 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
3974 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> >
3987 const_cast<this_type*
> (
this)->sync<Kokkos::HostSpace> ();
3989 const size_t myNumRows = this->getLocalLength ();
3990 const size_t numCols = this->getNumVectors ();
3991 const Kokkos::pair<size_t, size_t> rowRange (0, myNumRows);
3996 auto X_lcl = this->getLocalView<Kokkos::HostSpace> ();
3998 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> > views (numCols);
3999 for (
size_t j = 0; j < numCols; ++j) {
4000 const size_t col = this->isConstantStride () ? j : this->whichVectors_[j];
4001 auto X_lcl_j = Kokkos::subview (X_lcl, rowRange, col);
4002 Teuchos::ArrayRCP<const impl_scalar_type> X_lcl_j_arcp =
4003 Kokkos::Compat::persistingView (X_lcl_j);
4004 views[j] = Teuchos::arcp_reinterpret_cast<
const Scalar> (X_lcl_j_arcp);
4009 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
4013 Teuchos::ETransp transB,
4014 const Scalar& alpha,
4019 using Teuchos::CONJ_TRANS;
4020 using Teuchos::NO_TRANS;
4021 using Teuchos::TRANS;
4024 using Teuchos::rcpFromRef;
4025 typedef Kokkos::Details::ArithTraits<impl_scalar_type> ATS;
4026 typedef Teuchos::ScalarTraits<Scalar> STS;
4028 const char errPrefix[] =
"Tpetra::MultiVector::multiply: ";
4066 #ifdef HAVE_TPETRA_DEBUG 4067 if (! this->getMap ().is_null () && ! this->getMap ()->getComm ().is_null ()) {
4073 const bool lclBad = this->getLocalLength () != A_nrows ||
4074 this->getNumVectors () != B_ncols || A_ncols != B_nrows;
4076 const int lclGood = lclBad ? 0 : 1;
4079 using Teuchos::REDUCE_MIN;
4080 using Teuchos::reduceAll;
4081 using Teuchos::outArg;
4083 auto comm = this->getMap ()->getComm ();
4084 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
4086 TEUCHOS_TEST_FOR_EXCEPTION
4087 (gblGood != 1, std::runtime_error, errPrefix <<
"Local dimensions of " 4088 "*this, op(A), and op(B) are not consistent on at least one process " 4089 "in this object's communicator.");
4091 #endif // HAVE_TPETRA_DEBUG 4095 const bool C_is_local = ! this->isDistributed ();
4097 const bool Case1 = C_is_local && A_is_local && B_is_local;
4099 const bool Case2 = C_is_local && ! A_is_local && ! B_is_local &&
4100 transA != NO_TRANS &&
4103 const bool Case3 = ! C_is_local && ! A_is_local && B_is_local &&
4107 TEUCHOS_TEST_FOR_EXCEPTION(
4108 ! Case1 && ! Case2 && ! Case3, std::runtime_error, errPrefix
4109 <<
"Multiplication of op(A) and op(B) into *this is not a " 4110 "supported use case.");
4112 if (beta != STS::zero () && Case2) {
4118 const int myRank = this->getMap ()->getComm ()->getRank ();
4120 beta_local = ATS::zero ();
4129 if (! isConstantStride ()) {
4130 C_tmp = rcp (
new MV (*
this, Teuchos::Copy));
4132 C_tmp = rcp (
this,
false);
4135 RCP<const MV> A_tmp;
4137 A_tmp = rcp (
new MV (A, Teuchos::Copy));
4139 A_tmp = rcpFromRef (A);
4142 RCP<const MV> B_tmp;
4144 B_tmp = rcp (
new MV (B, Teuchos::Copy));
4146 B_tmp = rcpFromRef (B);
4149 TEUCHOS_TEST_FOR_EXCEPTION(
4150 ! C_tmp->isConstantStride () || ! B_tmp->isConstantStride () ||
4151 ! A_tmp->isConstantStride (), std::logic_error, errPrefix
4152 <<
"Failed to make temporary constant-stride copies of MultiVectors.");
4155 typedef LocalOrdinal LO;
4157 const LO A_lclNumRows = A_tmp->getLocalLength ();
4158 const LO A_numVecs = A_tmp->getNumVectors ();
4159 auto A_lcl = A_tmp->template getLocalView<device_type> ();
4160 auto A_sub = Kokkos::subview (A_lcl,
4161 std::make_pair (LO (0), A_lclNumRows),
4162 std::make_pair (LO (0), A_numVecs));
4163 const LO B_lclNumRows = B_tmp->getLocalLength ();
4164 const LO B_numVecs = B_tmp->getNumVectors ();
4165 auto B_lcl = B_tmp->template getLocalView<device_type> ();
4166 auto B_sub = Kokkos::subview (B_lcl,
4167 std::make_pair (LO (0), B_lclNumRows),
4168 std::make_pair (LO (0), B_numVecs));
4169 const LO C_lclNumRows = C_tmp->getLocalLength ();
4170 const LO C_numVecs = C_tmp->getNumVectors ();
4171 auto C_lcl = C_tmp->template getLocalView<device_type> ();
4172 auto C_sub = Kokkos::subview (C_lcl,
4173 std::make_pair (LO (0), C_lclNumRows),
4174 std::make_pair (LO (0), C_numVecs));
4175 const char ctransA = (transA == Teuchos::NO_TRANS ?
'N' :
4176 (transA == Teuchos::TRANS ?
'T' :
'C'));
4177 const char ctransB = (transB == Teuchos::NO_TRANS ?
'N' :
4178 (transB == Teuchos::TRANS ?
'T' :
'C'));
4182 ::Tpetra::Details::Blas::gemm (ctransA, ctransB, alpha_IST, A_sub,
4183 B_sub, beta_local, C_sub);
4186 if (! isConstantStride ()) {
4191 A_tmp = Teuchos::null;
4192 B_tmp = Teuchos::null;
4200 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
4209 using Kokkos::subview;
4212 const char tfecfFuncName[] =
"elementWiseMultiply: ";
4214 const size_t lclNumRows = this->getLocalLength ();
4215 const size_t numVecs = this->getNumVectors ();
4217 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4219 std::runtime_error,
"MultiVectors do not have the same local length.");
4220 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4221 numVecs != B.
getNumVectors (), std::runtime_error,
"this->getNumVectors" 4222 "() = " << numVecs <<
" != B.getNumVectors() = " << B.
getNumVectors ()
4231 this->
template sync<device_type> ();
4232 this->
template modify<device_type> ();
4233 const_cast<V&
> (A).
template sync<device_type> ();
4234 const_cast<MV&
> (B).
template sync<device_type> ();
4235 auto this_view = this->
template getLocalView<device_type> ();
4236 auto A_view = A.template getLocalView<device_type> ();
4237 auto B_view = B.template getLocalView<device_type> ();
4245 KokkosBlas::mult (scalarThis,
4248 subview (A_view, ALL (), 0),
4252 for (
size_t j = 0; j < numVecs; ++j) {
4253 const size_t C_col = isConstantStride () ? j : whichVectors_[j];
4255 KokkosBlas::mult (scalarThis,
4256 subview (this_view, ALL (), C_col),
4258 subview (A_view, ALL (), 0),
4259 subview (B_view, ALL (), B_col));
4264 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
4269 using Teuchos::reduceAll;
4270 using Teuchos::REDUCE_SUM;
4271 typedef decltype (this->
template getLocalView<device_type> ())
4273 typedef decltype (this->
template getLocalView<Kokkos::HostSpace> ())
4278 TEUCHOS_TEST_FOR_EXCEPTION
4279 (this->isDistributed (), std::runtime_error,
4280 "Tpetra::MultiVector::reduce should only be called with locally " 4281 "replicated or otherwise not distributed MultiVector objects.");
4282 const Teuchos::Comm<int>& comm = * (this->getMap ()->getComm ());
4283 if (comm.getSize () == 1) {
4287 const size_t lclNumRows = getLocalLength ();
4288 const size_t numCols = getNumVectors ();
4289 const size_t totalAllocSize = lclNumRows * numCols;
4296 TEUCHOS_TEST_FOR_EXCEPTION(
4297 lclNumRows > static_cast<size_t> (std::numeric_limits<int>::max ()),
4298 std::runtime_error,
"Tpetra::MultiVector::reduce: On Process " <<
4299 comm.getRank () <<
", the number of local rows " << lclNumRows <<
4300 " does not fit in int.");
4309 const bool useHostVersion = this->
template need_sync<device_type> ();
4311 dev_view_type srcView_dev;
4312 host_view_type srcView_host;
4313 if (useHostVersion) {
4314 srcView_host = this->
template getLocalView<Kokkos::HostSpace> ();
4315 if (lclNumRows != static_cast<size_t> (srcView_host.dimension_0 ())) {
4317 const Kokkos::pair<size_t, size_t> rowRng (0, lclNumRows);
4318 srcView_host = Kokkos::subview (srcView_host, rowRng, Kokkos::ALL ());
4322 srcView_dev = this->
template getLocalView<device_type> ();
4323 if (lclNumRows != static_cast<size_t> (srcView_dev.dimension_0 ())) {
4325 const Kokkos::pair<size_t, size_t> rowRng (0, lclNumRows);
4326 srcView_dev = Kokkos::subview (srcView_dev, rowRng, Kokkos::ALL ());
4334 const bool contig = isConstantStride () && getStride () == lclNumRows;
4335 dev_view_type srcBuf_dev;
4336 host_view_type srcBuf_host;
4337 if (useHostVersion) {
4339 srcBuf_host = srcView_host;
4342 srcBuf_host = decltype (srcBuf_host) (
"srcBuf", lclNumRows, numCols);
4348 srcBuf_dev = srcView_dev;
4351 srcBuf_dev = decltype (srcBuf_dev) (
"srcBuf", lclNumRows, numCols);
4362 const bool correct =
4363 (useHostVersion &&
static_cast<size_t> (srcBuf_host.size ()) >= totalAllocSize) ||
4364 (! useHostVersion &&
static_cast<size_t> (srcBuf_dev.size ()) >= totalAllocSize);
4365 TEUCHOS_TEST_FOR_EXCEPTION
4366 (! correct, std::logic_error,
"Tpetra::MultiVector::reduce: Violated " 4367 "invariant of temporary source buffer construction. Please report " 4368 "this bug to the Tpetra developers.");
4376 dev_view_type tgtBuf_dev;
4377 host_view_type tgtBuf_host;
4378 if (useHostVersion) {
4379 tgtBuf_host = host_view_type (
"tgtBuf", lclNumRows, numCols);
4382 tgtBuf_dev = dev_view_type (
"tgtBuf", lclNumRows, numCols);
4385 const int reduceCount =
static_cast<int> (totalAllocSize);
4386 if (useHostVersion) {
4387 TEUCHOS_TEST_FOR_EXCEPTION
4388 (static_cast<size_t> (tgtBuf_host.size ()) < totalAllocSize,
4389 std::logic_error,
"Tpetra::MultiVector::reduce: tgtBuf_host.size() = " 4390 << tgtBuf_host.size () <<
" < lclNumRows*numCols = " << totalAllocSize
4391 <<
". Please report this bug to the Tpetra developers.");
4392 reduceAll<int, impl_scalar_type> (comm, REDUCE_SUM, reduceCount,
4393 srcBuf_host.ptr_on_device (),
4394 tgtBuf_host.ptr_on_device ());
4397 TEUCHOS_TEST_FOR_EXCEPTION
4398 (static_cast<size_t> (tgtBuf_dev.size ()) < totalAllocSize,
4399 std::logic_error,
"Tpetra::MultiVector::reduce: tgtBuf_dev.size() = " 4400 << tgtBuf_dev.size () <<
" < lclNumRows*numCols = " << totalAllocSize
4401 <<
". Please report this bug to the Tpetra developers.");
4402 reduceAll<int, impl_scalar_type> (comm, REDUCE_SUM, reduceCount,
4403 srcBuf_dev.ptr_on_device (),
4404 tgtBuf_dev.ptr_on_device ());
4408 if (useHostVersion) {
4409 this->
template modify<Kokkos::HostSpace> ();
4410 if (contig || isConstantStride ()) {
4414 for (
size_t j = 0; j < numCols; ++j) {
4415 auto X_j_out = Kokkos::subview (srcView_host, Kokkos::ALL (), j);
4416 auto X_j_in = Kokkos::subview (tgtBuf_host, Kokkos::ALL (), j);
4422 this->
template modify<device_type> ();
4423 if (contig || isConstantStride ()) {
4427 for (
size_t j = 0; j < numCols; ++j) {
4428 auto X_j_out = Kokkos::subview (srcView_dev, Kokkos::ALL (), j);
4429 auto X_j_in = Kokkos::subview (tgtBuf_dev, Kokkos::ALL (), j);
4439 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
4446 #ifdef HAVE_TPETRA_DEBUG 4447 const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
4448 const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
4449 TEUCHOS_TEST_FOR_EXCEPTION(
4450 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4452 "Tpetra::MultiVector::replaceLocalValue: row index " << lclRow
4453 <<
" is invalid. The range of valid row indices on this process " 4454 << this->getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4455 <<
", " << maxLocalIndex <<
"].");
4456 TEUCHOS_TEST_FOR_EXCEPTION(
4457 vectorIndexOutOfRange(col),
4459 "Tpetra::MultiVector::replaceLocalValue: vector index " << col
4460 <<
" of the multivector is invalid.");
4462 const size_t colInd = isConstantStride () ? col : whichVectors_[col];
4463 view_.h_view (lclRow, colInd) = ScalarValue;
4467 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
4473 const bool atomic)
const 4475 #ifdef HAVE_TPETRA_DEBUG 4476 const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
4477 const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
4478 TEUCHOS_TEST_FOR_EXCEPTION(
4479 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4481 "Tpetra::MultiVector::sumIntoLocalValue: row index " << lclRow
4482 <<
" is invalid. The range of valid row indices on this process " 4483 << this->getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4484 <<
", " << maxLocalIndex <<
"].");
4485 TEUCHOS_TEST_FOR_EXCEPTION(
4486 vectorIndexOutOfRange(col),
4488 "Tpetra::MultiVector::sumIntoLocalValue: vector index " << col
4489 <<
" of the multivector is invalid.");
4491 const size_t colInd = isConstantStride () ? col : whichVectors_[col];
4493 Kokkos::atomic_add (& (view_.h_view(lclRow, colInd)), value);
4496 view_.h_view (lclRow, colInd) += value;
4501 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
4510 const LocalOrdinal MyRow = this->map_->getLocalElement (gblRow);
4511 #ifdef HAVE_TPETRA_DEBUG 4512 TEUCHOS_TEST_FOR_EXCEPTION(
4513 MyRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
4515 "Tpetra::MultiVector::replaceGlobalValue: Global row index " << gblRow
4516 <<
"is not present on this process " 4517 << this->getMap ()->getComm ()->getRank () <<
".");
4518 TEUCHOS_TEST_FOR_EXCEPTION(
4519 vectorIndexOutOfRange (col), std::runtime_error,
4520 "Tpetra::MultiVector::replaceGlobalValue: Vector index " << col
4521 <<
" of the multivector is invalid.");
4522 #endif // HAVE_TPETRA_DEBUG 4523 this->replaceLocalValue (MyRow, col, ScalarValue);
4526 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
4532 const bool atomic)
const 4536 const LocalOrdinal lclRow = this->map_->getLocalElement (globalRow);
4537 #ifdef HAVE_TEUCHOS_DEBUG 4538 TEUCHOS_TEST_FOR_EXCEPTION(
4539 lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
4541 "Tpetra::MultiVector::sumIntoGlobalValue: Global row index " << globalRow
4542 <<
"is not present on this process " 4543 << this->getMap ()->getComm ()->getRank () <<
".");
4544 TEUCHOS_TEST_FOR_EXCEPTION(
4545 vectorIndexOutOfRange(col),
4547 "Tpetra::MultiVector::sumIntoGlobalValue: Vector index " << col
4548 <<
" of the multivector is invalid.");
4550 this->sumIntoLocalValue (lclRow, col, value, atomic);
4553 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
4555 Teuchos::ArrayRCP<T>
4561 typename dual_view_type::array_layout,
4563 const size_t col = isConstantStride () ? j : whichVectors_[j];
4564 col_dual_view_type X_col =
4565 Kokkos::subview (view_, Kokkos::ALL (), col);
4566 return Kokkos::Compat::persistingView (X_col.d_view);
4569 template <
class Scalar,
4571 class GlobalOrdinal,
4580 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
4585 using Teuchos::TypeNameTraits;
4587 std::ostringstream out;
4588 out <<
"\"" << className <<
"\": {";
4589 out <<
"Template parameters: {Scalar: " << TypeNameTraits<Scalar>::name ()
4590 <<
", LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name ()
4591 <<
", GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name ()
4592 <<
", Node" << Node::name ()
4594 if (this->getObjectLabel () !=
"") {
4595 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
4597 out <<
", numRows: " << this->getGlobalLength ();
4598 if (className !=
"Tpetra::Vector") {
4599 out <<
", numCols: " << this->getNumVectors ()
4600 <<
", isConstantStride: " << this->isConstantStride ();
4602 if (this->isConstantStride ()) {
4603 out <<
", columnStride: " << this->getStride ();
4610 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
4615 return this->descriptionImpl (
"Tpetra::MultiVector");
4618 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
4623 typedef LocalOrdinal LO;
4626 if (vl <= Teuchos::VERB_LOW) {
4627 return std::string ();
4629 auto map = this->getMap ();
4630 if (map.is_null ()) {
4631 return std::string ();
4633 auto outStringP = Teuchos::rcp (
new std::ostringstream ());
4634 auto outp = Teuchos::getFancyOStream (outStringP);
4635 Teuchos::FancyOStream& out = *outp;
4636 auto comm = map->getComm ();
4637 const int myRank = comm->getRank ();
4638 const int numProcs = comm->getSize ();
4640 out <<
"Process " << myRank <<
" of " << numProcs <<
":" << endl;
4641 Teuchos::OSTab tab1 (out);
4644 const LO lclNumRows =
static_cast<LO
> (this->getLocalLength ());
4645 out <<
"Local number of rows: " << lclNumRows << endl;
4647 if (vl > Teuchos::VERB_MEDIUM) {
4650 if (this->getNumVectors () !=
static_cast<size_t> (1)) {
4651 out <<
"isConstantStride: " << this->isConstantStride () << endl;
4653 if (this->isConstantStride ()) {
4654 out <<
"Column stride: " << this->getStride () << endl;
4657 if (vl > Teuchos::VERB_HIGH && lclNumRows > 0) {
4661 auto X_dual = this->getDualView ();
4662 typename dual_view_type::t_host X_host;
4663 if (X_dual.template need_sync<Kokkos::HostSpace> ()) {
4669 auto X_dev = this->
template getLocalView<device_type> ();
4670 auto X_host_copy = Kokkos::create_mirror_view (X_dev);
4672 X_host = X_host_copy;
4678 X_host = this->
template getLocalView<Kokkos::HostSpace> ();
4682 out <<
"Values: " << endl
4684 const LO numCols =
static_cast<LO
> (this->getNumVectors ());
4686 for (LO i = 0; i < lclNumRows; ++i) {
4688 if (i + 1 < lclNumRows) {
4694 for (LO i = 0; i < lclNumRows; ++i) {
4695 for (LO j = 0; j < numCols; ++j) {
4697 if (j + 1 < numCols) {
4701 if (i + 1 < lclNumRows) {
4711 return outStringP->str ();
4714 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
4718 const std::string& className,
4719 const Teuchos::EVerbosityLevel verbLevel)
const 4721 using Teuchos::TypeNameTraits;
4722 using Teuchos::VERB_DEFAULT;
4723 using Teuchos::VERB_NONE;
4724 using Teuchos::VERB_LOW;
4726 const Teuchos::EVerbosityLevel vl =
4727 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
4729 if (vl == VERB_NONE) {
4736 auto map = this->getMap ();
4737 if (map.is_null ()) {
4740 auto comm = map->getComm ();
4741 if (comm.is_null ()) {
4745 const int myRank = comm->getRank ();
4754 Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
4758 tab0 = Teuchos::rcp (
new Teuchos::OSTab (out));
4759 out <<
"\"" << className <<
"\":" << endl;
4760 tab1 = Teuchos::rcp (
new Teuchos::OSTab (out));
4762 out <<
"Template parameters:" << endl;
4763 Teuchos::OSTab tab2 (out);
4764 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
4765 <<
"LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
4766 <<
"GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
4767 <<
"Node: " << Node::name () << endl;
4769 if (this->getObjectLabel () !=
"") {
4770 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
4772 out <<
"Global number of rows: " << this->getGlobalLength () << endl;
4773 if (className !=
"Tpetra::Vector") {
4774 out <<
"Number of columns: " << this->getNumVectors () << endl;
4781 if (vl > VERB_LOW) {
4782 const std::string lclStr = this->localDescribeToString (vl);
4787 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
4791 const Teuchos::EVerbosityLevel verbLevel)
const 4793 this->describeImpl (out,
"Tpetra::MultiVector", verbLevel);
4796 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
4801 replaceMap (newMap);
4804 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node, const
bool classic>
4809 typedef LocalOrdinal LO;
4811 typedef typename dual_view_type::host_mirror_space::device_type HMDT;
4812 const char prefix[] =
"Tpetra::deep_copy (MultiVector): ";
4813 const bool debug =
false;
4815 TEUCHOS_TEST_FOR_EXCEPTION
4817 this->getNumVectors () != src.
getNumVectors (), std::invalid_argument,
4818 prefix <<
"Global dimensions of the two Tpetra::MultiVector " 4819 "objects do not match. src has dimensions [" << src.
getGlobalLength ()
4820 <<
"," << src.
getNumVectors () <<
"], and *this has dimensions [" 4821 << this->getGlobalLength () <<
"," << this->getNumVectors () <<
"].");
4823 TEUCHOS_TEST_FOR_EXCEPTION
4824 (this->getLocalLength () != src.
getLocalLength (), std::invalid_argument,
4825 prefix <<
"The local row counts of the two Tpetra::MultiVector " 4826 "objects do not match. src has " << src.
getLocalLength () <<
" row(s) " 4827 <<
" and *this has " << this->getLocalLength () <<
" row(s).");
4833 this->view_.modified_device () = 0;
4834 this->view_.modified_host () = 0;
4836 if (debug && this->getMap ()->getComm ()->getRank () == 0) {
4837 std::cout <<
"*** MultiVector::assign: ";
4841 if (debug && this->getMap ()->getComm ()->getRank () == 0) {
4842 std::cout <<
"Both *this and src have constant stride" << std::endl;
4846 const bool useHostVersion = src.template need_sync<device_type> ();
4848 if (useHostVersion) {
4849 this->
template modify<HMDT> ();
4851 Details::localDeepCopyConstStride (this->
template getLocalView<HMDT> (),
4852 src.template getLocalView<HMDT> ());
4853 this->
template sync<DT> ();
4856 this->
template modify<DT> ();
4858 Details::localDeepCopyConstStride (this->
template getLocalView<DT> (),
4859 src.template getLocalView<DT> ());
4860 this->
template sync<HMDT> ();
4864 if (this->isConstantStride ()) {
4865 if (debug && this->getMap ()->getComm ()->getRank () == 0) {
4866 std::cout <<
"Only *this has constant stride";
4869 const LO numWhichVecs =
static_cast<LO
> (src.
whichVectors_.size ());
4870 const std::string whichVecsLabel (
"MV::deep_copy::whichVecs");
4877 const bool useHostVersion = src.template need_sync<device_type> ();
4878 if (useHostVersion) {
4879 if (debug && this->getMap ()->getComm ()->getRank () == 0) {
4880 std::cout <<
"; Copy from host version of src" << std::endl;
4886 typedef Kokkos::View<LO*, HMDT> whichvecs_type;
4887 whichvecs_type srcWhichVecs (whichVecsLabel, numWhichVecs);
4888 for (LO i = 0; i < numWhichVecs; ++i) {
4894 Details::localDeepCopy (this->
template getLocalView<HMDT> (),
4895 src.template getLocalView<HMDT> (),
4896 true,
false, srcWhichVecs, srcWhichVecs);
4898 this->
template sync<DT> ();
4901 if (debug && this->getMap ()->getComm ()->getRank () == 0) {
4902 std::cout <<
"; Copy from device version of src" << std::endl;
4908 typedef Kokkos::DualView<LO*, DT> whichvecs_type;
4909 whichvecs_type srcWhichVecs (whichVecsLabel, numWhichVecs);
4910 srcWhichVecs.template modify<HMDT> ();
4911 for (LO i = 0; i < numWhichVecs; ++i) {
4912 srcWhichVecs.h_view(i) =
static_cast<LO
> (src.
whichVectors_[i]);
4915 srcWhichVecs.template sync<DT> ();
4918 this->
template modify<DT> ();
4923 Details::localDeepCopy (this->
template getLocalView<DT> (),
4924 src.template getLocalView<DT> (),
4925 true,
false, srcWhichVecs.d_view,
4926 srcWhichVecs.d_view);
4929 this->
template sync<HMDT> ();
4935 if (debug && this->getMap ()->getComm ()->getRank () == 0) {
4936 std::cout <<
"Only src has constant stride" << std::endl;
4940 const bool useHostVersion = src.template need_sync<device_type> ();
4941 if (useHostVersion) {
4946 typedef Kokkos::View<LO*, HMDT> whichvecs_type;
4947 const LO numWhichVecs =
static_cast<LO
> (this->whichVectors_.size ());
4948 whichvecs_type whichVecs (
"MV::deep_copy::whichVecs", numWhichVecs);
4949 for (LO i = 0; i < numWhichVecs; ++i) {
4950 whichVecs(i) =
static_cast<LO
> (this->whichVectors_[i]);
4954 Details::localDeepCopy (this->
template getLocalView<HMDT> (),
4955 src.template getLocalView<HMDT> (),
4958 whichVecs, whichVecs);
4963 this->
template sync<DT> ();
4968 typedef Kokkos::DualView<LO*, DT> whichvecs_type;
4969 const std::string whichVecsLabel (
"MV::deep_copy::whichVecs");
4970 const LO numWhichVecs =
static_cast<LO
> (this->whichVectors_.size ());
4971 whichvecs_type whichVecs (whichVecsLabel, numWhichVecs);
4972 whichVecs.template modify<HMDT> ();
4973 for (LO i = 0; i < numWhichVecs; ++i) {
4974 whichVecs.h_view(i) = this->whichVectors_[i];
4977 whichVecs.template sync<DT> ();
4980 Details::localDeepCopy (this->
template getLocalView<DT> (),
4981 src.template getLocalView<DT> (),
4984 whichVecs.d_view, whichVecs.d_view);
4990 this->
template sync<HMDT> ();
4994 if (debug && this->getMap ()->getComm ()->getRank () == 0) {
4995 std::cout <<
"Neither *this nor src has constant stride" << std::endl;
4999 const bool useHostVersion = src.template need_sync<device_type> ();
5000 if (useHostVersion) {
5001 const LO dstNumWhichVecs =
static_cast<LO
> (this->whichVectors_.size ());
5002 Kokkos::View<LO*, HMDT> whichVectorsDst (
"dstWhichVecs", dstNumWhichVecs);
5003 for (LO i = 0; i < dstNumWhichVecs; ++i) {
5004 whichVectorsDst(i) = this->whichVectors_[i];
5008 const LO srcNumWhichVecs =
static_cast<LO
> (src.
whichVectors_.size ());
5009 Kokkos::View<LO*, HMDT> whichVectorsSrc (
"srcWhichVecs", srcNumWhichVecs);
5010 for (LO i = 0; i < srcNumWhichVecs; ++i) {
5016 Details::localDeepCopy (this->
template getLocalView<HMDT> (),
5017 src.template getLocalView<HMDT> (),
5020 whichVectorsDst, whichVectorsSrc);
5027 this->
template sync<HMDT> ();
5032 const LO dstNumWhichVecs =
static_cast<LO
> (this->whichVectors_.size ());
5033 Kokkos::DualView<LO*, DT> whichVecsDst (
"MV::deep_copy::whichVecsDst",
5035 whichVecsDst.template modify<HMDT> ();
5036 for (LO i = 0; i < dstNumWhichVecs; ++i) {
5037 whichVecsDst.h_view(i) =
static_cast<LO
> (this->whichVectors_[i]);
5040 whichVecsDst.template sync<DT> ();
5046 const LO srcNumWhichVecs =
static_cast<LO
> (src.
whichVectors_.size ());
5047 Kokkos::DualView<LO*, DT> whichVecsSrc (
"MV::deep_copy::whichVecsSrc",
5049 whichVecsSrc.template modify<HMDT> ();
5050 for (LO i = 0; i < srcNumWhichVecs; ++i) {
5051 whichVecsSrc.h_view(i) =
static_cast<LO
> (src.
whichVectors_[i]);
5054 whichVecsSrc.template sync<DT> ();
5058 Details::localDeepCopy (this->
template getLocalView<DT> (),
5059 src.template getLocalView<DT> (),
5062 whichVecsDst.d_view, whichVecsSrc.d_view);
5069 template <
class Scalar,
class LO,
class GO,
class NT, const
bool classic>
5070 Teuchos::RCP<MultiVector<Scalar, LO, GO, NT, classic> >
5075 return Teuchos::rcp (
new MV (map, numVectors));
5078 template <
class ST,
class LO,
class GO,
class NT, const
bool classic>
5096 #define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \ 5097 template class MultiVector< SCALAR , LO , GO , NODE >; \ 5098 template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src); \ 5099 template Teuchos::RCP<MultiVector< SCALAR , LO , GO , NODE > > createMultiVector (const Teuchos::RCP<const Map<LO, GO, NODE> >& map, size_t numVectors); 5101 #endif // TPETRA_MULTIVECTOR_DEF_HPP Namespace Tpetra contains the class and methods constituting the Tpetra library.
size_t getStride() const
Stride between columns in the multivector.
device_type::execution_space execution_space
The Kokkos execution space.
MultiVector< ST, LO, GO, NT, classic > createCopy(const MultiVector< ST, LO, GO, NT, classic > &src)
Return a deep copy of the given MultiVector.
EWhichNormImpl
Input argument for localNormImpl() (which see).
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Teuchos::Array< size_t > whichVectors_
Indices of columns this multivector is viewing.
One or more distributed dense vectors.
void deep_copy(MultiVector< DS, DL, DG, DN, dstClassic > &dst, const MultiVector< SS, SL, SG, SN, srcClassic > &src)
Copy the contents of the MultiVector src into dst.
Teuchos::RCP< const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > > getVector(const size_t j) const
Return a Vector which is a const view of column j.
void removeEmptyProcessesInPlace(Teuchos::RCP< DistObjectType > &input, const Teuchos::RCP< const Map< typename DistObjectType::local_ordinal_type, typename DistObjectType::global_ordinal_type, typename DistObjectType::node_type > > &newMap)
Remove processes which contain no elements in this object's Map.
dual_view_type view_
The Kokkos::DualView containing the MultiVector's data.
Kokkos::Details::InnerProductSpaceTraits< impl_scalar_type >::dot_type dot_type
Type of an inner ("dot") product result.
Node::device_type device_type
The Kokkos Device type.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > > createMultiVector(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &map, const size_t numVectors)
Nonmember MultiVector "constructor": Create a MultiVector from a given Map.
size_t getNodeNumElements() const
The number of elements belonging to the calling process.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
size_t global_size_t
Global size_t object.
Insert new values that don't currently exist.
Kokkos::DualView< T *, DT > getDualViewCopyFromArrayView(const Teuchos::ArrayView< const T > &x_av, const char label[], const bool leaveOnHost)
Get a 1-D Kokkos::DualView which is a deep copy of the input Teuchos::ArrayView (which views host mem...
bool isConstantStride() const
Whether this multivector has constant stride between columns.
size_t getLocalLength() const
Local number of rows on the calling process.
Sets up and executes a communication plan for a Tpetra DistObject.
CombineMode
Rule for combining data in an Import or Export.
Sum new values into existing values.
Declaration of Tpetra::Details::isInterComm.
global_size_t getGlobalLength() const
Global number of rows in the multivector.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Replace old value with maximum of magnitudes of old and new values.
Abstract base class for objects that can be the source of an Import or Export operation.
Replace existing values with new values.
Kokkos::Details::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
Kokkos::Details::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
Kokkos::DualView< impl_scalar_type **, Kokkos::LayoutLeft, typename execution_space::execution_space > dual_view_type
Kokkos::DualView specialization used by this class.
A parallel distribution of indices over processes.
size_t getNumVectors() const
Number of columns in the multivector.
A distributed dense vector.
Stand-alone utility functions and macros.
void assign(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &src)
Copy the contents of src into *this (deep copy).
size_t getOrigNumLocalRows() const
"Original" number of rows in the (local) data.
EWhichNorm
Input argument for normImpl() (which see).
Teuchos::RCP< Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > > getVectorNonConst(const size_t j)
Return a Vector which is a nonconst view of column j.
bool isDistributed() const
Whether this is a globally distributed object.
dual_view_type origView_
The "original view" of the MultiVector's data.