40#ifndef TPETRA_BLOCKCRSMATRIX_DEF_HPP
41#define TPETRA_BLOCKCRSMATRIX_DEF_HPP
49#include "Tpetra_BlockMultiVector.hpp"
52#include "Teuchos_TimeMonitor.hpp"
53#ifdef HAVE_TPETRA_DEBUG
69#if defined(__CUDACC__)
71# if defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
72# undef TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA
75#elif defined(__GNUC__)
77# if defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
78# undef TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA
84# if ! defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
85# define TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA 1
95 struct BlockCrsRowStruct {
96 T totalNumEntries, totalNumBytes, maxRowLength;
97 KOKKOS_DEFAULTED_FUNCTION BlockCrsRowStruct() =
default;
98 KOKKOS_DEFAULTED_FUNCTION BlockCrsRowStruct(
const BlockCrsRowStruct &b) =
default;
99 KOKKOS_INLINE_FUNCTION BlockCrsRowStruct(
const T& numEnt,
const T& numBytes,
const T& rowLength)
100 : totalNumEntries(numEnt), totalNumBytes(numBytes), maxRowLength(rowLength) {}
105 KOKKOS_INLINE_FUNCTION
106 void operator+=(
volatile BlockCrsRowStruct<T> &a,
107 volatile const BlockCrsRowStruct<T> &b) {
108 a.totalNumEntries += b.totalNumEntries;
109 a.totalNumBytes += b.totalNumBytes;
110 a.maxRowLength = a.maxRowLength > b.maxRowLength ? a.maxRowLength : b.maxRowLength;
115 KOKKOS_INLINE_FUNCTION
116 void operator+=(BlockCrsRowStruct<T> &a,
const BlockCrsRowStruct<T> &b) {
117 a.totalNumEntries += b.totalNumEntries;
118 a.totalNumBytes += b.totalNumBytes;
119 a.maxRowLength = a.maxRowLength > b.maxRowLength ? a.maxRowLength : b.maxRowLength;
122 template<
typename T,
typename ExecSpace>
123 struct BlockCrsReducer {
124 typedef BlockCrsReducer reducer;
125 typedef T value_type;
126 typedef Kokkos::View<value_type,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type;
129 KOKKOS_INLINE_FUNCTION
130 BlockCrsReducer(value_type &val) : value(&val) {}
132 KOKKOS_INLINE_FUNCTION
void join(value_type &dst, value_type &src)
const { dst += src; }
133 KOKKOS_INLINE_FUNCTION
void join(
volatile value_type &dst,
const volatile value_type &src)
const { dst += src; }
134 KOKKOS_INLINE_FUNCTION
void init(value_type &val)
const { val = value_type(); }
135 KOKKOS_INLINE_FUNCTION value_type& reference() {
return *value; }
136 KOKKOS_INLINE_FUNCTION result_view_type view()
const {
return result_view_type(value); }
139 template<
class AlphaCoeffType,
141 class MatrixValuesType,
146 class BcrsApplyNoTransFunctor {
148 static_assert (Kokkos::Impl::is_view<MatrixValuesType>::value,
149 "MatrixValuesType must be a Kokkos::View.");
150 static_assert (Kokkos::Impl::is_view<OutVecType>::value,
151 "OutVecType must be a Kokkos::View.");
152 static_assert (Kokkos::Impl::is_view<InVecType>::value,
153 "InVecType must be a Kokkos::View.");
154 static_assert (std::is_same<MatrixValuesType,
155 typename MatrixValuesType::const_type>::value,
156 "MatrixValuesType must be a const Kokkos::View.");
157 static_assert (std::is_same<OutVecType,
158 typename OutVecType::non_const_type>::value,
159 "OutVecType must be a nonconst Kokkos::View.");
160 static_assert (std::is_same<InVecType, typename InVecType::const_type>::value,
161 "InVecType must be a const Kokkos::View.");
162 static_assert (
static_cast<int> (MatrixValuesType::rank) == 1,
163 "MatrixValuesType must be a rank-1 Kokkos::View.");
164 static_assert (
static_cast<int> (InVecType::rank) == 1,
165 "InVecType must be a rank-1 Kokkos::View.");
166 static_assert (
static_cast<int> (OutVecType::rank) == 1,
167 "OutVecType must be a rank-1 Kokkos::View.");
168 typedef typename MatrixValuesType::non_const_value_type scalar_type;
171 typedef typename GraphType::device_type device_type;
174 typedef typename std::remove_const<typename GraphType::data_type>::type
183 void setX (
const InVecType& X) { X_ = X; }
191 void setY (
const OutVecType& Y) { Y_ = Y; }
193 typedef typename Kokkos::ArithTraits<typename std::decay<AlphaCoeffType>::type>::val_type alpha_coeff_type;
194 typedef typename Kokkos::ArithTraits<typename std::decay<BetaCoeffType>::type>::val_type beta_coeff_type;
197 BcrsApplyNoTransFunctor (
const alpha_coeff_type& alpha,
198 const GraphType& graph,
199 const MatrixValuesType& val,
200 const local_ordinal_type blockSize,
202 const beta_coeff_type& beta,
203 const OutVecType& Y) :
205 ptr_ (graph.row_map),
206 ind_ (graph.entries),
208 blockSize_ (blockSize),
215 KOKKOS_INLINE_FUNCTION
void
216 operator () (
const typename Kokkos::TeamPolicy<typename device_type::execution_space>::member_type & member)
const
218 Kokkos::abort(
"Tpetra::BcrsApplyNoTransFunctor:: this should not be called");
222 KOKKOS_INLINE_FUNCTION
void
223 operator () (
const local_ordinal_type& lclRow)
const
225 using ::Tpetra::COPY;
226 using ::Tpetra::FILL;
227 using ::Tpetra::SCAL;
228 using ::Tpetra::GEMV;
229 using Kokkos::Details::ArithTraits;
235 using Kokkos::parallel_for;
236 using Kokkos::subview;
237 typedef typename decltype (ptr_)::non_const_value_type offset_type;
238 typedef Kokkos::View<
typename MatrixValuesType::const_value_type**,
241 Kokkos::MemoryTraits<Kokkos::Unmanaged> >
244 const offset_type Y_ptBeg = lclRow * blockSize_;
245 const offset_type Y_ptEnd = Y_ptBeg + blockSize_;
246 auto Y_cur = subview (Y_, ::Kokkos::make_pair (Y_ptBeg, Y_ptEnd));
250 if (beta_ == ArithTraits<beta_coeff_type>::zero ()) {
251 FILL (Y_cur, ArithTraits<beta_coeff_type>::zero ());
253 else if (beta_ != ArithTraits<beta_coeff_type>::one ()) {
257 if (alpha_ != ArithTraits<alpha_coeff_type>::zero ()) {
258 const offset_type blkBeg = ptr_[lclRow];
259 const offset_type blkEnd = ptr_[lclRow+1];
261 const offset_type bs2 = blockSize_ * blockSize_;
262 for (offset_type absBlkOff = blkBeg; absBlkOff < blkEnd;
264 little_block_type A_cur (val_.data () + absBlkOff * bs2,
265 blockSize_, blockSize_);
266 const offset_type X_blkCol = ind_[absBlkOff];
267 const offset_type X_ptBeg = X_blkCol * blockSize_;
268 const offset_type X_ptEnd = X_ptBeg + blockSize_;
269 auto X_cur = subview (X_, ::Kokkos::make_pair (X_ptBeg, X_ptEnd));
271 GEMV (alpha_, A_cur, X_cur, Y_cur);
277 alpha_coeff_type alpha_;
278 typename GraphType::row_map_type::const_type ptr_;
279 typename GraphType::entries_type::const_type ind_;
280 MatrixValuesType val_;
283 beta_coeff_type beta_;
287 template<
class AlphaCoeffType,
289 class MatrixValuesType,
293 class BcrsApplyNoTransFunctor<AlphaCoeffType,
301 static_assert (Kokkos::Impl::is_view<MatrixValuesType>::value,
302 "MatrixValuesType must be a Kokkos::View.");
303 static_assert (Kokkos::Impl::is_view<OutVecType>::value,
304 "OutVecType must be a Kokkos::View.");
305 static_assert (Kokkos::Impl::is_view<InVecType>::value,
306 "InVecType must be a Kokkos::View.");
307 static_assert (std::is_same<MatrixValuesType,
308 typename MatrixValuesType::const_type>::value,
309 "MatrixValuesType must be a const Kokkos::View.");
310 static_assert (std::is_same<OutVecType,
311 typename OutVecType::non_const_type>::value,
312 "OutVecType must be a nonconst Kokkos::View.");
313 static_assert (std::is_same<InVecType, typename InVecType::const_type>::value,
314 "InVecType must be a const Kokkos::View.");
315 static_assert (
static_cast<int> (MatrixValuesType::rank) == 1,
316 "MatrixValuesType must be a rank-1 Kokkos::View.");
317 static_assert (
static_cast<int> (InVecType::rank) == 1,
318 "InVecType must be a rank-1 Kokkos::View.");
319 static_assert (
static_cast<int> (OutVecType::rank) == 1,
320 "OutVecType must be a rank-1 Kokkos::View.");
321 typedef typename MatrixValuesType::non_const_value_type scalar_type;
324 typedef typename GraphType::device_type device_type;
327 typedef typename std::remove_const<typename GraphType::data_type>::type
336 void setX (
const InVecType& X) { X_ = X; }
344 void setY (
const OutVecType& Y) { Y_ = Y; }
346 typedef typename Kokkos::ArithTraits<typename std::decay<AlphaCoeffType>::type>::val_type alpha_coeff_type;
347 typedef typename Kokkos::ArithTraits<typename std::decay<BetaCoeffType>::type>::val_type beta_coeff_type;
350 BcrsApplyNoTransFunctor (
const alpha_coeff_type& alpha,
351 const GraphType& graph,
352 const MatrixValuesType& val,
353 const local_ordinal_type blockSize,
355 const beta_coeff_type& beta,
356 const OutVecType& Y) :
358 ptr_ (graph.row_map),
359 ind_ (graph.entries),
361 blockSize_ (blockSize),
368 KOKKOS_INLINE_FUNCTION
void
369 operator () (
const local_ordinal_type& lclRow)
const
371 Kokkos::abort(
"Tpetra::BcrsApplyNoTransFunctor:: this should not be called");
375 KOKKOS_INLINE_FUNCTION
void
376 operator () (
const typename Kokkos::TeamPolicy<typename device_type::execution_space>::member_type & member)
const
380 using Kokkos::Details::ArithTraits;
386 using Kokkos::parallel_for;
387 using Kokkos::subview;
388 typedef typename decltype (ptr_)::non_const_value_type offset_type;
389 typedef Kokkos::View<
typename MatrixValuesType::const_value_type**,
392 Kokkos::MemoryTraits<Kokkos::Unmanaged> >
395 const offset_type Y_ptBeg = lclRow * blockSize_;
396 const offset_type Y_ptEnd = Y_ptBeg + blockSize_;
397 auto Y_cur = subview (Y_, ::Kokkos::make_pair (Y_ptBeg, Y_ptEnd));
401 if (beta_ == ArithTraits<beta_coeff_type>::zero ()) {
402 Kokkos::parallel_for(Kokkos::TeamVectorRange(member, blockSize_),
403 [&](
const local_ordinal_type &i) {
404 Y_cur(i) = ArithTraits<beta_coeff_type>::zero ();
407 else if (beta_ != ArithTraits<beta_coeff_type>::one ()) {
408 Kokkos::parallel_for(Kokkos::TeamVectorRange(member, blockSize_),
409 [&](
const local_ordinal_type &i) {
413 member.team_barrier();
415 if (alpha_ != ArithTraits<alpha_coeff_type>::zero ()) {
416 const offset_type blkBeg = ptr_[lclRow];
417 const offset_type blkEnd = ptr_[lclRow+1];
419 const offset_type bs2 = blockSize_ * blockSize_;
420 little_block_type A_cur (val_.data (), blockSize_, blockSize_);
421 auto X_cur = subview (X_, ::Kokkos::make_pair (0, blockSize_));
423 (Kokkos::TeamThreadRange(member, blkBeg, blkEnd),
424 [&](
const local_ordinal_type &absBlkOff) {
425 A_cur.assign_data(val_.data () + absBlkOff * bs2);
426 const offset_type X_blkCol = ind_[absBlkOff];
427 const offset_type X_ptBeg = X_blkCol * blockSize_;
428 X_cur.assign_data(&X_(X_ptBeg));
431 (Kokkos::ThreadVectorRange(member, blockSize_),
432 [&](
const local_ordinal_type &k0) {
434 for (local_ordinal_type k1=0;k1<blockSize_;++k1)
435 val += A_cur(k0,k1)*X_cur(k1);
436#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
438 Y_cur(k0) += alpha_*val;
444 Kokkos::atomic_add(&Y_cur(k0), alpha_*val);
452 alpha_coeff_type alpha_;
453 typename GraphType::row_map_type::const_type ptr_;
454 typename GraphType::entries_type::const_type ind_;
455 MatrixValuesType val_;
458 beta_coeff_type beta_;
463 template<
class AlphaCoeffType,
465 class MatrixValuesType,
466 class InMultiVecType,
468 class OutMultiVecType>
470 bcrsLocalApplyNoTrans (
const AlphaCoeffType& alpha,
471 const GraphType& graph,
472 const MatrixValuesType& val,
473 const typename std::remove_const<typename GraphType::data_type>::type blockSize,
474 const InMultiVecType& X,
475 const BetaCoeffType& beta,
476 const OutMultiVecType& Y
479 static_assert (Kokkos::Impl::is_view<MatrixValuesType>::value,
480 "MatrixValuesType must be a Kokkos::View.");
481 static_assert (Kokkos::Impl::is_view<OutMultiVecType>::value,
482 "OutMultiVecType must be a Kokkos::View.");
483 static_assert (Kokkos::Impl::is_view<InMultiVecType>::value,
484 "InMultiVecType must be a Kokkos::View.");
485 static_assert (
static_cast<int> (MatrixValuesType::rank) == 1,
486 "MatrixValuesType must be a rank-1 Kokkos::View.");
487 static_assert (
static_cast<int> (OutMultiVecType::rank) == 2,
488 "OutMultiVecType must be a rank-2 Kokkos::View.");
489 static_assert (
static_cast<int> (InMultiVecType::rank) == 2,
490 "InMultiVecType must be a rank-2 Kokkos::View.");
492 typedef typename GraphType::device_type::execution_space execution_space;
493 typedef typename MatrixValuesType::const_type matrix_values_type;
494 typedef typename OutMultiVecType::non_const_type out_multivec_type;
495 typedef typename InMultiVecType::const_type in_multivec_type;
496 typedef typename Kokkos::ArithTraits<typename std::decay<AlphaCoeffType>::type>::val_type alpha_type;
497 typedef typename Kokkos::ArithTraits<typename std::decay<BetaCoeffType>::type>::val_type beta_type;
498 typedef typename std::remove_const<typename GraphType::data_type>::type LO;
500 constexpr bool is_builtin_type_enabled = std::is_arithmetic<typename InMultiVecType::non_const_value_type>::value;
502 const LO numLocalMeshRows = graph.row_map.extent (0) == 0 ?
503 static_cast<LO
> (0) :
504 static_cast<LO> (graph.row_map.extent (0) - 1);
505 const LO numVecs = Y.extent (1);
506 if (numLocalMeshRows == 0 || numVecs == 0) {
513 in_multivec_type X_in = X;
514 out_multivec_type Y_out = Y;
519 typedef decltype (Kokkos::subview (X_in, Kokkos::ALL (), 0)) in_vec_type;
520 typedef decltype (Kokkos::subview (Y_out, Kokkos::ALL (), 0)) out_vec_type;
521 typedef BcrsApplyNoTransFunctor<alpha_type, GraphType,
522 matrix_values_type, in_vec_type, beta_type, out_vec_type,
523 is_builtin_type_enabled> functor_type;
525 auto X_0 = Kokkos::subview (X_in, Kokkos::ALL (), 0);
526 auto Y_0 = Kokkos::subview (Y_out, Kokkos::ALL (), 0);
529 if (is_builtin_type_enabled) {
530 functor_type functor (alpha, graph, val, blockSize, X_0, beta, Y_0);
532 typedef Kokkos::TeamPolicy<execution_space> policy_type;
533 policy_type policy(1,1);
534#if defined(KOKKOS_ENABLE_CUDA)
535 constexpr bool is_cuda = std::is_same<execution_space, Kokkos::Cuda>::value;
537 constexpr bool is_cuda =
false;
540 LO team_size = 8, vector_size = 1;
541 if (blockSize <= 5) vector_size = 4;
542 else if (blockSize <= 9) vector_size = 8;
543 else if (blockSize <= 12) vector_size = 12;
544 else if (blockSize <= 20) vector_size = 20;
545 else vector_size = 20;
546 policy = policy_type(numLocalMeshRows, team_size, vector_size);
548 policy = policy_type(numLocalMeshRows, 1, 1);
550 Kokkos::parallel_for (policy, functor);
553 for (LO j = 1; j < numVecs; ++j) {
554 auto X_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
555 auto Y_j = Kokkos::subview (Y_out, Kokkos::ALL (), j);
558 Kokkos::parallel_for (policy, functor);
561 functor_type functor (alpha, graph, val, blockSize, X_0, beta, Y_0);
562 Kokkos::RangePolicy<execution_space,LO> policy(0, numLocalMeshRows);
563 Kokkos::parallel_for (policy, functor);
564 for (LO j = 1; j < numVecs; ++j) {
565 auto X_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
566 auto Y_j = Kokkos::subview (Y_out, Kokkos::ALL (), j);
569 Kokkos::parallel_for (policy, functor);
579template<
class Scalar,
class LO,
class GO,
class Node>
580class GetLocalDiagCopy {
582 typedef typename Node::device_type device_type;
583 typedef size_t diag_offset_type;
584 typedef Kokkos::View<
const size_t*, device_type,
585 Kokkos::MemoryUnmanaged> diag_offsets_type;
586 typedef typename ::Tpetra::CrsGraph<LO, GO, Node> global_graph_type;
587 typedef typename global_graph_type::local_graph_device_type local_graph_device_type;
588 typedef typename local_graph_device_type::row_map_type row_offsets_type;
589 typedef typename ::Tpetra::BlockMultiVector<Scalar, LO, GO, Node>::impl_scalar_type IST;
590 typedef Kokkos::View<IST***, device_type, Kokkos::MemoryUnmanaged> diag_type;
591 typedef Kokkos::View<const IST*, device_type, Kokkos::MemoryUnmanaged> values_type;
594 GetLocalDiagCopy (
const diag_type& diag,
595 const values_type& val,
596 const diag_offsets_type& diagOffsets,
597 const row_offsets_type& ptr,
598 const LO blockSize) :
600 diagOffsets_ (diagOffsets),
602 blockSize_ (blockSize),
603 offsetPerBlock_ (blockSize_*blockSize_),
607 KOKKOS_INLINE_FUNCTION
void
608 operator() (
const LO& lclRowInd)
const
613 const size_t absOffset = ptr_[lclRowInd];
616 const size_t relOffset = diagOffsets_[lclRowInd];
619 const size_t pointOffset = (absOffset+relOffset)*offsetPerBlock_;
623 typedef Kokkos::View<
const IST**, Kokkos::LayoutRight,
624 device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
625 const_little_block_type;
626 const_little_block_type D_in (val_.data () + pointOffset,
627 blockSize_, blockSize_);
628 auto D_out = Kokkos::subview (diag_, lclRowInd, ALL (), ALL ());
634 diag_offsets_type diagOffsets_;
635 row_offsets_type ptr_;
642 template<
class Scalar,
class LO,
class GO,
class Node>
644 BlockCrsMatrix<Scalar, LO, GO, Node>::
645 markLocalErrorAndGetStream ()
647 * (this->localError_) =
true;
648 if ((*errs_).is_null ()) {
649 *errs_ = Teuchos::rcp (
new std::ostringstream ());
654 template<
class Scalar,
class LO,
class GO,
class Node>
669 template<
class Scalar,
class LO,
class GO,
class Node>
675 rowMeshMap_ (* (
graph.getRowMap ())),
686 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
687 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
688 "rows (isSorted() is false). This class assumes sorted rows.");
690 graphRCP_ = Teuchos::rcpFromRef(graph_);
699 "BlockCrsMatrix constructor: The input blockSize = " <<
blockSize <<
700 " <= 0. The block size must be positive.");
710 *pointImporter_ = Teuchos::rcp
716 ptrHost_ =
decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"),
ptr_h.extent(0));
717 Kokkos::deep_copy(ptrHost_,
ptr_h);
720 indHost_ =
decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"),
ind_h.extent(0));
721 Kokkos::deep_copy (indHost_,
ind_h);
724 val_ =
decltype (val_) (impl_scalar_type_dualview(
"val",
numValEnt));
728 template<
class Scalar,
class LO,
class GO,
class Node>
736 rowMeshMap_ (* (
graph.getRowMap ())),
748 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
749 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
750 "rows (isSorted() is false). This class assumes sorted rows.");
752 graphRCP_ = Teuchos::rcpFromRef(graph_);
761 "BlockCrsMatrix constructor: The input blockSize = " <<
blockSize <<
762 " <= 0. The block size must be positive.");
768 *pointImporter_ = Teuchos::rcp
774 ptrHost_ =
decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"),
ptr_h.extent(0));
775 Kokkos::deep_copy(ptrHost_,
ptr_h);
778 indHost_ =
decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"),
ind_h.extent(0));
779 Kokkos::deep_copy (indHost_,
ind_h);
782 val_ =
decltype (val_) (impl_scalar_type_dualview(
"val",
numValEnt));
786 template<
class Scalar,
class LO,
class GO,
class Node>
787 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
792 return Teuchos::rcp (
new map_type (domainPointMap_));
795 template<
class Scalar,
class LO,
class GO,
class Node>
796 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
801 return Teuchos::rcp (
new map_type (rangePointMap_));
804 template<
class Scalar,
class LO,
class GO,
class Node>
805 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
809 return graph_.getRowMap();
812 template<
class Scalar,
class LO,
class GO,
class Node>
813 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
817 return graph_.getColMap();
820 template<
class Scalar,
class LO,
class GO,
class Node>
825 return graph_.getGlobalNumRows();
828 template<
class Scalar,
class LO,
class GO,
class Node>
833 return graph_.getNodeNumRows();
836 template<
class Scalar,
class LO,
class GO,
class Node>
841 return graph_.getNodeMaxNumRowEntries();
844 template<
class Scalar,
class LO,
class GO,
class Node>
849 Teuchos::ETransp
mode,
855 mode != Teuchos::NO_TRANS &&
mode != Teuchos::TRANS &&
mode != Teuchos::CONJ_TRANS,
856 std::invalid_argument,
"Tpetra::BlockCrsMatrix::apply: "
857 "Invalid 'mode' argument. Valid values are Teuchos::NO_TRANS, "
858 "Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
867 catch (std::invalid_argument&
e) {
869 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
870 "apply: Either the input MultiVector X or the output MultiVector Y "
871 "cannot be viewed as a BlockMultiVector, given this BlockCrsMatrix's "
872 "graph. BlockMultiVector's constructor threw the following exception: "
882 }
catch (std::invalid_argument&
e) {
884 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
885 "apply: The implementation method applyBlock complained about having "
886 "an invalid argument. It reported the following: " <<
e.what ());
887 }
catch (std::logic_error&
e) {
889 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
890 "apply: The implementation method applyBlock complained about a "
891 "possible bug in its implementation. It reported the following: "
893 }
catch (std::exception&
e) {
895 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
896 "apply: The implementation method applyBlock threw an exception which "
897 "is neither std::invalid_argument nor std::logic_error, but is a "
898 "subclass of std::exception. It reported the following: "
902 true, std::logic_error,
"Tpetra::BlockCrsMatrix::"
903 "apply: The implementation method applyBlock threw an exception which "
904 "is not an instance of a subclass of std::exception. This probably "
905 "indicates a bug. Please report this to the Tpetra developers.");
909 template<
class Scalar,
class LO,
class GO,
class Node>
914 Teuchos::ETransp
mode,
919 X.getBlockSize () !=
Y.getBlockSize (), std::invalid_argument,
920 "Tpetra::BlockCrsMatrix::applyBlock: "
921 "X and Y have different block sizes. X.getBlockSize() = "
922 <<
X.getBlockSize () <<
" != Y.getBlockSize() = "
923 <<
Y.getBlockSize () <<
".");
925 if (
mode == Teuchos::NO_TRANS) {
927 }
else if (
mode == Teuchos::TRANS ||
mode == Teuchos::CONJ_TRANS) {
931 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
932 "applyBlock: Invalid 'mode' argument. Valid values are "
933 "Teuchos::NO_TRANS, Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
937 template<
class Scalar,
class LO,
class GO,
class Node>
942 auto val_d = val_.getDeviceView(Access::OverwriteAll);
946 template<
class Scalar,
class LO,
class GO,
class Node>
954 Kokkos::View<ptrdiff_t*,Kokkos::HostSpace>
962 template <
class Scalar,
class LO,
class GO,
class Node>
966 Kokkos::MemoryUnmanaged>& offsets)
const
968 graph_.getLocalDiagOffsets (offsets);
971 template <
class Scalar,
class LO,
class GO,
class Node>
975 Kokkos::MemoryUnmanaged>& diag,
977 Kokkos::MemoryUnmanaged>& offsets)
const
979 using Kokkos::parallel_for;
980 const char prefix[] =
"Tpetra::BlockCrsMatrix::getLocalDiagCopy (2-arg): ";
982 const LO
lclNumMeshRows =
static_cast<LO
> (rowMeshMap_.getNodeNumElements ());
983 const LO
blockSize = this->getBlockSize ();
986 static_cast<LO
> (diag.extent (1)) <
blockSize ||
987 static_cast<LO
> (diag.extent (2)) <
blockSize,
988 std::invalid_argument,
prefix <<
989 "The input Kokkos::View is not big enough to hold all the data.");
991 (
static_cast<LO
> (offsets.size ()) <
lclNumMeshRows, std::invalid_argument,
992 prefix <<
"offsets.size() = " << offsets.size () <<
" < local number of "
995 typedef Kokkos::RangePolicy<execution_space, LO>
policy_type;
1002 auto val_d = val_.getDeviceView(Access::ReadOnly);
1005 graph_.getLocalGraphDevice ().row_map, blockSize_));
1008#ifdef TPETRA_ENABLE_DEPRECATED_CODE
1009 template <
class Scalar,
class LO,
class GO,
class Node>
1013 Kokkos::MemoryUnmanaged>& diag,
1014 const Teuchos::ArrayView<const size_t>& offsets)
const
1016 auto offsets_view_host = Kokkos::View<size_t*,Kokkos::HostSpace>(
const_cast<size_t*
>(offsets.getRawPtr()), offsets.size());
1023 template<
class Scalar,
class LO,
class GO,
class Node>
1031 Kokkos::View<ptrdiff_t*,Kokkos::HostSpace>
1040 template<
class Scalar,
class LO,
class GO,
class Node>
1048 Kokkos::View<ptrdiff_t*,Kokkos::HostSpace>
1055#ifdef TPETRA_ENABLE_DEPRECATED_CODE
1056 template<
class Scalar,
class LO,
class GO,
class Node>
1065 if (! rowMeshMap_.isNodeLocalElement (
localRowInd)) {
1069 return Teuchos::OrdinalTraits<LO>::invalid ();
1072 const size_t absBlockOffsetStart = ptrHost_[localRowInd];
1073 colInds = indHost_.data () + absBlockOffsetStart;
1075 auto vals_host_out = getValuesHost (localRowInd);
1076 impl_scalar_type* vals_host_out_raw =
const_cast<impl_scalar_type*
>(vals_host_out.data ());
1077 vals =
reinterpret_cast<Scalar*
> (vals_host_out_raw);
1078 numInds = ptrHost_[localRowInd + 1] - absBlockOffsetStart;
1084 template<
class Scalar,
class LO,
class GO,
class Node>
1088 nonconst_local_inds_host_view_type &
Indices,
1089 nonconst_values_host_view_type &
Values,
1096 "Tpetra::BlockCrsMatrix::getLocalRowCopy : Column and/or values array is not large enough to hold "
1097 <<
numInds <<
" row entries");
1103 for (LO
i=0;
i<
numInds*blockSize_*blockSize_; ++
i) {
1109#ifdef TPETRA_ENABLE_DEPRECATED_CODE
1110 template<
class Scalar,
class LO,
class GO,
class Node>
1114 const Teuchos::ArrayView<LO>&
Indices,
1115 const Teuchos::ArrayView<Scalar>&
Values,
1122 "Tpetra::BlockCrsMatrix::getLocalRowCopy : Column and/or values array is not large enough to hold "
1123 <<
numInds <<
" row entries");
1125 const LO * colInds = indHost_.data() + ptrHost_(LocalRow);
1126 for (LO i=0; i<numInds; ++i) {
1127 Indices[i] = colInds[i];
1129 for (LO i=0; i<numInds*blockSize_*blockSize_; ++i) {
1130 Values[i] = vals[i];
1132 NumEntries = numInds;
1136 template<
class Scalar,
class LO,
class GO,
class Node>
1144 if (! rowMeshMap_.isNodeLocalElement (
localRowInd)) {
1149 return static_cast<LO
> (0);
1152 const LO
LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1169 template<
class Scalar,
class LO,
class GO,
class Node>
1177 if (! rowMeshMap_.isNodeLocalElement (
localRowInd)) {
1182 return static_cast<LO
> (0);
1189 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1190 const ptrdiff_t STINV = Teuchos::OrdinalTraits<ptrdiff_t>::invalid ();
1196 if (offsets[
k] !=
STINV) {
1207 template<
class Scalar,
class LO,
class GO,
class Node>
1215 if (! rowMeshMap_.isNodeLocalElement (
localRowInd)) {
1220 return static_cast<LO
> (0);
1222 const impl_scalar_type*
const vIn =
reinterpret_cast<const impl_scalar_type*
> (vals);
1223 auto val_out = getValuesHost(localRowInd);
1224 impl_scalar_type* vOut =
const_cast<impl_scalar_type*
>(val_out.data());
1226 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1227 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1228 size_t pointOffset = 0;
1231 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1232 const size_t blockOffset = offsets[k]*perBlockSize;
1233 if (blockOffset != STINV) {
1234 little_block_type A_old = getNonConstLocalBlockFromInput (vOut, blockOffset);
1235 const_little_block_type A_new = getConstLocalBlockFromInput (vIn, pointOffset);
1244 template<
class Scalar,
class LO,
class GO,
class Node>
1252 if (! rowMeshMap_.isNodeLocalElement (
localRowInd)) {
1257 return static_cast<LO
> (0);
1265 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1266 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1282 template<
class Scalar,
class LO,
class GO,
class Node>
1284 impl_scalar_type_dualview::t_host::const_type
1288 return val_.getHostView(Access::ReadOnly);
1291 template<
class Scalar,
class LO,
class GO,
class Node>
1292 typename BlockCrsMatrix<Scalar, LO, GO, Node>::
1293 impl_scalar_type_dualview::t_dev::const_type
1294 BlockCrsMatrix<Scalar, LO, GO, Node>::
1295 getValuesDevice ()
const
1297 return val_.getDeviceView(Access::ReadOnly);
1300 template<
class Scalar,
class LO,
class GO,
class Node>
1301 typename BlockCrsMatrix<Scalar, LO, GO, Node>::
1302 impl_scalar_type_dualview::t_host
1306 return val_.getHostView(Access::ReadWrite);
1309 template<
class Scalar,
class LO,
class GO,
class Node>
1311 impl_scalar_type_dualview::t_dev
1315 return val_.getDeviceView(Access::ReadWrite);
1318 template<
class Scalar,
class LO,
class GO,
class Node>
1319 typename BlockCrsMatrix<Scalar, LO, GO, Node>::
1320 impl_scalar_type_dualview::t_host::const_type
1324 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1325 auto val = val_.getHostView(Access::ReadOnly);
1330 template<
class Scalar,
class LO,
class GO,
class Node>
1332 impl_scalar_type_dualview::t_dev::const_type
1336 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1337 auto val = val_.getDeviceView(Access::ReadOnly);
1342 template<
class Scalar,
class LO,
class GO,
class Node>
1347 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1348 auto val = val_.getHostView(Access::ReadWrite);
1353 template<
class Scalar,
class LO,
class GO,
class Node>
1358 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1359 auto val = val_.getDeviceView(Access::ReadWrite);
1364 template<
class Scalar,
class LO,
class GO,
class Node>
1370 if (
numEntInGraph == Teuchos::OrdinalTraits<size_t>::invalid ()) {
1371 return static_cast<LO
> (0);
1377 template<
class Scalar,
class LO,
class GO,
class Node>
1382 const Teuchos::ETransp
mode,
1393 true, std::logic_error,
"Tpetra::BlockCrsMatrix::apply: "
1394 "transpose and conjugate transpose modes are not yet implemented.");
1397 template<
class Scalar,
class LO,
class GO,
class Node>
1399 BlockCrsMatrix<Scalar, LO, GO, Node>::
1400 applyBlockNoTrans (
const BlockMultiVector<Scalar, LO, GO, Node>& X,
1401 BlockMultiVector<Scalar, LO, GO, Node>& Y,
1407 typedef ::Tpetra::Import<LO, GO, Node> import_type;
1408 typedef ::Tpetra::Export<LO, GO, Node> export_type;
1409 const Scalar zero = STS::zero ();
1410 const Scalar one = STS::one ();
1411 RCP<const import_type>
import = graph_.getImporter ();
1413 RCP<const export_type> theExport = graph_.getExporter ();
1414 const char prefix[] =
"Tpetra::BlockCrsMatrix::applyBlockNoTrans: ";
1416 if (alpha == zero) {
1420 else if (beta != one) {
1425 const BMV* X_colMap = NULL;
1426 if (
import.is_null ()) {
1430 catch (std::exception& e) {
1431 TEUCHOS_TEST_FOR_EXCEPTION
1432 (
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1433 "operator= threw an exception: " << std::endl << e.what ());
1443 if ((*X_colMap_).is_null () ||
1444 (**X_colMap_).getNumVectors () != X.getNumVectors () ||
1445 (**X_colMap_).getBlockSize () != X.getBlockSize ()) {
1446 *X_colMap_ = rcp (
new BMV (* (graph_.getColMap ()), getBlockSize (),
1447 static_cast<LO
> (X.getNumVectors ())));
1449 (*X_colMap_)->getMultiVectorView().doImport (X.getMultiVectorView (),
1453 X_colMap = &(**X_colMap_);
1455 catch (std::exception& e) {
1456 TEUCHOS_TEST_FOR_EXCEPTION
1457 (
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1458 "operator= threw an exception: " << std::endl << e.what ());
1462 BMV* Y_rowMap = NULL;
1463 if (theExport.is_null ()) {
1466 else if ((*Y_rowMap_).is_null () ||
1467 (**Y_rowMap_).getNumVectors () != Y.getNumVectors () ||
1468 (**Y_rowMap_).getBlockSize () != Y.getBlockSize ()) {
1469 *Y_rowMap_ = rcp (
new BMV (* (graph_.getRowMap ()), getBlockSize (),
1470 static_cast<LO
> (X.getNumVectors ())));
1472 Y_rowMap = &(**Y_rowMap_);
1474 catch (std::exception& e) {
1475 TEUCHOS_TEST_FOR_EXCEPTION(
1476 true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1477 "operator= threw an exception: " << std::endl << e.what ());
1482 localApplyBlockNoTrans (*X_colMap, *Y_rowMap, alpha, beta);
1484 catch (std::exception& e) {
1485 TEUCHOS_TEST_FOR_EXCEPTION
1486 (
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1487 "an exception: " << e.what ());
1490 TEUCHOS_TEST_FOR_EXCEPTION
1491 (
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1492 "an exception not a subclass of std::exception.");
1495 if (! theExport.is_null ()) {
1501 template<
class Scalar,
class LO,
class GO,
class Node>
1503 BlockCrsMatrix<Scalar, LO, GO, Node>::
1504 localApplyBlockNoTrans (
const BlockMultiVector<Scalar, LO, GO, Node>& X,
1505 BlockMultiVector<Scalar, LO, GO, Node>& Y,
1509 using ::Tpetra::Impl::bcrsLocalApplyNoTrans;
1511 const impl_scalar_type alpha_impl = alpha;
1512 const auto graph = this->graph_.getLocalGraphDevice ();
1513 const impl_scalar_type beta_impl = beta;
1514 const LO blockSize = this->getBlockSize ();
1516 auto X_mv = X.getMultiVectorView ();
1517 auto Y_mv = Y.getMultiVectorView ();
1521 auto X_lcl = X_mv.getLocalViewDevice (Access::ReadOnly);
1522 auto Y_lcl = Y_mv.getLocalViewDevice (Access::ReadWrite);
1523 auto val = val_.getDeviceView(Access::ReadWrite);
1525 bcrsLocalApplyNoTrans (alpha_impl, graph, val, blockSize, X_lcl,
1529 template<
class Scalar,
class LO,
class GO,
class Node>
1531 BlockCrsMatrix<Scalar, LO, GO, Node>::
1532 findRelOffsetOfColumnIndex (
const LO localRowIndex,
1533 const LO colIndexToFind,
1534 const LO hint)
const
1536 const size_t absStartOffset = ptrHost_[localRowIndex];
1537 const size_t absEndOffset = ptrHost_[localRowIndex+1];
1538 const LO numEntriesInRow =
static_cast<LO
> (absEndOffset - absStartOffset);
1540 const LO*
const curInd = indHost_.data () + absStartOffset;
1543 if (hint < numEntriesInRow && curInd[hint] == colIndexToFind) {
1550 LO relOffset = Teuchos::OrdinalTraits<LO>::invalid ();
1555 const LO maxNumEntriesForLinearSearch = 32;
1556 if (numEntriesInRow > maxNumEntriesForLinearSearch) {
1561 const LO* beg = curInd;
1562 const LO* end = curInd + numEntriesInRow;
1563 std::pair<const LO*, const LO*> p =
1564 std::equal_range (beg, end, colIndexToFind);
1565 if (p.first != p.second) {
1567 relOffset =
static_cast<LO
> (p.first - beg);
1571 for (LO k = 0; k < numEntriesInRow; ++k) {
1572 if (colIndexToFind == curInd[k]) {
1582 template<
class Scalar,
class LO,
class GO,
class Node>
1584 BlockCrsMatrix<Scalar, LO, GO, Node>::
1585 offsetPerBlock ()
const
1587 return offsetPerBlock_;
1590 template<
class Scalar,
class LO,
class GO,
class Node>
1592 BlockCrsMatrix<Scalar, LO, GO, Node>::
1593 getConstLocalBlockFromInput (
const impl_scalar_type* val,
1594 const size_t pointOffset)
const
1597 const LO rowStride = blockSize_;
1598 return const_little_block_type (val + pointOffset, blockSize_, rowStride);
1601 template<
class Scalar,
class LO,
class GO,
class Node>
1603 BlockCrsMatrix<Scalar, LO, GO, Node>::
1604 getNonConstLocalBlockFromInput (impl_scalar_type* val,
1605 const size_t pointOffset)
const
1608 const LO rowStride = blockSize_;
1609 return little_block_type (val + pointOffset, blockSize_, rowStride);
1612 template<
class Scalar,
class LO,
class GO,
class Node>
1613 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_host_type
1614 BlockCrsMatrix<Scalar, LO, GO, Node>::
1615 getNonConstLocalBlockFromInputHost (impl_scalar_type* val,
1616 const size_t pointOffset)
const
1619 const LO rowStride = blockSize_;
1620 const size_t bs2 = blockSize_ * blockSize_;
1621 return little_block_host_type (val + bs2 * pointOffset, blockSize_, rowStride);
1624#ifdef TPETRA_ENABLE_DEPRECATED_CODE
1625 template<
class Scalar,
class LO,
class GO,
class Node>
1627 BlockCrsMatrix<Scalar, LO, GO, Node>::
1628 getLocalBlock (
const LO localRowInd,
const LO localColInd)
const
1630 using this_type = BlockCrsMatrix<Scalar, LO, GO, Node>;
1632 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1633 const LO relBlockOffset = this->findRelOffsetOfColumnIndex (localRowInd, localColInd);
1635 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid ()) {
1636 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1637 auto vals =
const_cast<this_type&
>(*this).getValuesDeviceNonConst();
1638 return getNonConstLocalBlockFromInput (vals.data(), absBlockOffset);
1641 return little_block_type ();
1646 template<
class Scalar,
class LO,
class GO,
class Node>
1648 BlockCrsMatrix<Scalar, LO, GO, Node>::
1649 getLocalBlockDeviceNonConst (
const LO localRowInd,
const LO localColInd)
const
1651 using this_type = BlockCrsMatrix<Scalar, LO, GO, Node>;
1653 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1654 const LO relBlockOffset = this->findRelOffsetOfColumnIndex (localRowInd, localColInd);
1655 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid ()) {
1656 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1657 auto vals =
const_cast<this_type&
>(*this).getValuesDeviceNonConst();
1658 auto r_val = getNonConstLocalBlockFromInput (vals.data(), absBlockOffset);
1662 return little_block_type ();
1666 template<
class Scalar,
class LO,
class GO,
class Node>
1667 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_host_type
1668 BlockCrsMatrix<Scalar, LO, GO, Node>::
1669 getLocalBlockHostNonConst (
const LO localRowInd,
const LO localColInd)
const
1671 using this_type = BlockCrsMatrix<Scalar, LO, GO, Node>;
1673 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1674 const LO relBlockOffset = this->findRelOffsetOfColumnIndex (localRowInd, localColInd);
1675 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid ()) {
1676 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1677 auto vals =
const_cast<this_type&
>(*this).getValuesHostNonConst();
1678 auto r_val = getNonConstLocalBlockFromInputHost (vals.data(), absBlockOffset);
1682 return little_block_host_type ();
1687 template<
class Scalar,
class LO,
class GO,
class Node>
1689 BlockCrsMatrix<Scalar, LO, GO, Node>::
1690 checkSizes (const ::Tpetra::SrcDistObject& source)
1693 typedef BlockCrsMatrix<Scalar, LO, GO, Node> this_type;
1694 const this_type* src =
dynamic_cast<const this_type*
> (&source);
1697 std::ostream& err = markLocalErrorAndGetStream ();
1698 err <<
"checkSizes: The source object of the Import or Export "
1699 "must be a BlockCrsMatrix with the same template parameters as the "
1700 "target object." << endl;
1705 if (src->getBlockSize () != this->getBlockSize ()) {
1706 std::ostream& err = markLocalErrorAndGetStream ();
1707 err <<
"checkSizes: The source and target objects of the Import or "
1708 <<
"Export must have the same block sizes. The source's block "
1709 <<
"size = " << src->getBlockSize () <<
" != the target's block "
1710 <<
"size = " << this->getBlockSize () <<
"." << endl;
1712 if (! src->graph_.isFillComplete ()) {
1713 std::ostream& err = markLocalErrorAndGetStream ();
1714 err <<
"checkSizes: The source object of the Import or Export is "
1715 "not fill complete. Both source and target objects must be fill "
1716 "complete." << endl;
1718 if (! this->graph_.isFillComplete ()) {
1719 std::ostream& err = markLocalErrorAndGetStream ();
1720 err <<
"checkSizes: The target object of the Import or Export is "
1721 "not fill complete. Both source and target objects must be fill "
1722 "complete." << endl;
1724 if (src->graph_.getColMap ().is_null ()) {
1725 std::ostream& err = markLocalErrorAndGetStream ();
1726 err <<
"checkSizes: The source object of the Import or Export does "
1727 "not have a column Map. Both source and target objects must have "
1728 "column Maps." << endl;
1730 if (this->graph_.getColMap ().is_null ()) {
1731 std::ostream& err = markLocalErrorAndGetStream ();
1732 err <<
"checkSizes: The target object of the Import or Export does "
1733 "not have a column Map. Both source and target objects must have "
1734 "column Maps." << endl;
1738 return ! (* (this->localError_));
1741 template<
class Scalar,
class LO,
class GO,
class Node>
1745 (const ::Tpetra::SrcDistObject&
source,
1746 const size_t numSameIDs,
1753 using ::Tpetra::Details::Behavior;
1754 using ::Tpetra::Details::dualViewStatusToString;
1755 using ::Tpetra::Details::ProfilingRegion;
1759 ProfilingRegion
profile_region(
"Tpetra::BlockCrsMatrix::copyAndPermute");
1760 const bool debug = Behavior::debug();
1761 const bool verbose = Behavior::verbose();
1766 std::ostringstream
os;
1767 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1768 os <<
"Proc " <<
myRank <<
": BlockCrsMatrix::copyAndPermute : " <<
endl;
1773 if (* (this->localError_)) {
1774 std::ostream&
err = this->markLocalErrorAndGetStream ();
1776 <<
"The target object of the Import or Export is already in an error state."
1785 std::ostringstream
os;
1789 std::cerr <<
os.str ();
1796 std::ostream&
err = this->markLocalErrorAndGetStream ();
1804 std::ostream&
err = this->markLocalErrorAndGetStream ();
1806 <<
"Both permuteToLIDs and permuteFromLIDs must be sync'd to host."
1813 std::ostream&
err = this->markLocalErrorAndGetStream ();
1815 <<
"The source (input) object of the Import or "
1816 "Export is either not a BlockCrsMatrix, or does not have the right "
1817 "template parameters. checkSizes() should have caught this. "
1818 "Please report this bug to the Tpetra developers." <<
endl;
1823#ifdef HAVE_TPETRA_DEBUG
1838#ifdef HAVE_TPETRA_DEBUG
1848 std::ostringstream
os;
1850 <<
"canUseLocalColumnIndices: "
1854 std::cerr <<
os.str ();
1863#ifdef HAVE_TPETRA_DEBUG
1864 if (!
srcRowMap.isNodeLocalElement (localRow)) {
1872 values_host_view_type
vals;
1877 if (numEntries > 0) {
1879 if (
err != numEntries) {
1881 if (!
dstRowMap.isNodeLocalElement (localRow)) {
1882#ifdef HAVE_TPETRA_DEBUG
1892 for (LO
k = 0;
k < numEntries; ++
k) {
1895#ifdef HAVE_TPETRA_DEBUG
1911 values_host_view_type
vals;
1914 if (numEntries > 0) {
1916 if (
err != numEntries) {
1918#ifdef HAVE_TPETRA_DEBUG
1919 for (LO
c = 0;
c < numEntries; ++
c) {
1931 const size_t maxNumEnt = src->graph_.getNodeMaxNumRowEntries ();
1937 values_host_view_type
vals;
1944 }
catch (std::exception&
e) {
1946 std::ostringstream
os;
1947 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1948 os <<
"Proc " <<
myRank <<
": copyAndPermute: At \"same\" localRow "
1949 << localRow <<
", src->getLocalRowView() threw an exception: "
1951 std::cerr <<
os.str ();
1956 if (numEntries > 0) {
1957 if (
static_cast<size_t> (numEntries) >
static_cast<size_t> (
lclDstCols.size ())) {
1960 std::ostringstream
os;
1961 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1962 os <<
"Proc " <<
myRank <<
": copyAndPermute: At \"same\" localRow "
1963 << localRow <<
", numEntries = " << numEntries <<
" > maxNumEnt = "
1964 << maxNumEnt <<
endl;
1965 std::cerr <<
os.str ();
1972 for (LO
j = 0;
j < numEntries; ++
j) {
1976#ifdef HAVE_TPETRA_DEBUG
1984 }
catch (std::exception&
e) {
1986 std::ostringstream
os;
1987 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1988 os <<
"Proc " <<
myRank <<
": copyAndPermute: At \"same\" localRow "
1989 << localRow <<
", this->replaceLocalValues() threw an exception: "
1991 std::cerr <<
os.str ();
1995 if (
err != numEntries) {
1998 std::ostringstream
os;
1999 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2000 os <<
"Proc " <<
myRank <<
": copyAndPermute: At \"same\" "
2001 "localRow " << localRow <<
", this->replaceLocalValues "
2002 "returned " <<
err <<
" instead of numEntries = "
2003 << numEntries <<
endl;
2004 std::cerr <<
os.str ();
2017 values_host_view_type
vals;
2022 }
catch (std::exception&
e) {
2024 std::ostringstream
os;
2025 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2026 os <<
"Proc " <<
myRank <<
": copyAndPermute: At \"permute\" "
2028 <<
", src->getLocalRowView() threw an exception: " <<
e.what ();
2029 std::cerr <<
os.str ();
2034 if (numEntries > 0) {
2035 if (
static_cast<size_t> (numEntries) >
static_cast<size_t> (
lclDstCols.size ())) {
2042 for (LO
j = 0;
j < numEntries; ++
j) {
2046#ifdef HAVE_TPETRA_DEBUG
2052 if (
err != numEntries) {
2061 std::ostream&
err = this->markLocalErrorAndGetStream ();
2062#ifdef HAVE_TPETRA_DEBUG
2063 err <<
"copyAndPermute: The graph structure of the source of the "
2064 "Import or Export must be a subset of the graph structure of the "
2066 err <<
"invalidSrcCopyRows = [";
2070 typename std::set<LO>::const_iterator
itp1 =
it;
2076 err <<
"], invalidDstCopyRows = [";
2080 typename std::set<LO>::const_iterator
itp1 =
it;
2086 err <<
"], invalidDstCopyCols = [";
2090 typename std::set<LO>::const_iterator
itp1 =
it;
2096 err <<
"], invalidDstPermuteCols = [";
2100 typename std::set<LO>::const_iterator
itp1 =
it;
2106 err <<
"], invalidPermuteFromRows = [";
2110 typename std::set<LO>::const_iterator
itp1 =
it;
2118 err <<
"copyAndPermute: The graph structure of the source of the "
2119 "Import or Export must be a subset of the graph structure of the "
2125 std::ostringstream
os;
2126 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2127 const bool lclSuccess = ! (* (this->localError_));
2128 os <<
"*** Proc " <<
myRank <<
": copyAndPermute "
2133 os <<
": error messages: " << this->errorMessages ();
2135 std::cerr <<
os.str ();
2159 template<
class LO,
class GO>
2165 using ::Tpetra::Details::PackTraits;
2175 const size_t numEntLen = PackTraits<LO>::packValueCount (numEntLO);
2176 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
2177 const size_t valsLen = numEnt * numBytesPerValue * blkSize * blkSize;
2178 return numEntLen + gidsLen + valsLen;
2192 template<
class ST,
class LO,
class GO>
2194 unpackRowCount (
const typename ::Tpetra::Details::PackTraits<LO>::input_buffer_type& imports,
2195 const size_t offset,
2196 const size_t numBytes,
2199 using Kokkos::subview;
2200 using ::Tpetra::Details::PackTraits;
2202 if (numBytes == 0) {
2204 return static_cast<size_t> (0);
2208 const size_t theNumBytes = PackTraits<LO>::packValueCount (numEntLO);
2209 TEUCHOS_TEST_FOR_EXCEPTION
2210 (theNumBytes > numBytes, std::logic_error,
"unpackRowCount: "
2211 "theNumBytes = " << theNumBytes <<
" < numBytes = " << numBytes
2213 const char*
const inBuf = imports.data () + offset;
2214 const size_t actualNumBytes = PackTraits<LO>::unpackValue (numEntLO, inBuf);
2215 TEUCHOS_TEST_FOR_EXCEPTION
2216 (actualNumBytes > numBytes, std::logic_error,
"unpackRowCount: "
2217 "actualNumBytes = " << actualNumBytes <<
" < numBytes = " << numBytes
2219 return static_cast<size_t> (numEntLO);
2226 template<
class ST,
class LO,
class GO>
2228 packRowForBlockCrs (
const typename ::Tpetra::Details::PackTraits<LO>::output_buffer_type exports,
2229 const size_t offset,
2230 const size_t numEnt,
2231 const typename ::Tpetra::Details::PackTraits<GO>::input_array_type& gidsIn,
2232 const typename ::Tpetra::Details::PackTraits<ST>::input_array_type& valsIn,
2233 const size_t numBytesPerValue,
2234 const size_t blockSize)
2236 using Kokkos::subview;
2237 using ::Tpetra::Details::PackTraits;
2243 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2246 const LO numEntLO =
static_cast<size_t> (numEnt);
2248 const size_t numEntBeg = offset;
2249 const size_t numEntLen = PackTraits<LO>::packValueCount (numEntLO);
2250 const size_t gidsBeg = numEntBeg + numEntLen;
2251 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
2252 const size_t valsBeg = gidsBeg + gidsLen;
2253 const size_t valsLen = numScalarEnt * numBytesPerValue;
2255 char*
const numEntOut = exports.data () + numEntBeg;
2256 char*
const gidsOut = exports.data () + gidsBeg;
2257 char*
const valsOut = exports.data () + valsBeg;
2259 size_t numBytesOut = 0;
2261 numBytesOut += PackTraits<LO>::packValue (numEntOut, numEntLO);
2264 Kokkos::pair<int, size_t> p;
2265 p = PackTraits<GO>::packArray (gidsOut, gidsIn.data (), numEnt);
2266 errorCode += p.first;
2267 numBytesOut += p.second;
2269 p = PackTraits<ST>::packArray (valsOut, valsIn.data (), numScalarEnt);
2270 errorCode += p.first;
2271 numBytesOut += p.second;
2274 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
2275 TEUCHOS_TEST_FOR_EXCEPTION
2276 (numBytesOut != expectedNumBytes, std::logic_error,
2277 "packRowForBlockCrs: numBytesOut = " << numBytesOut
2278 <<
" != expectedNumBytes = " << expectedNumBytes <<
".");
2280 TEUCHOS_TEST_FOR_EXCEPTION
2281 (errorCode != 0, std::runtime_error,
"packRowForBlockCrs: "
2282 "PackTraits::packArray returned a nonzero error code");
2288 template<
class ST,
class LO,
class GO>
2290 unpackRowForBlockCrs (
const typename ::Tpetra::Details::PackTraits<GO>::output_array_type& gidsOut,
2291 const typename ::Tpetra::Details::PackTraits<ST>::output_array_type& valsOut,
2292 const typename ::Tpetra::Details::PackTraits<int>::input_buffer_type& imports,
2293 const size_t offset,
2294 const size_t numBytes,
2295 const size_t numEnt,
2296 const size_t numBytesPerValue,
2297 const size_t blockSize)
2299 using ::Tpetra::Details::PackTraits;
2301 if (numBytes == 0) {
2305 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2306 TEUCHOS_TEST_FOR_EXCEPTION
2307 (
static_cast<size_t> (imports.extent (0)) <= offset,
2308 std::logic_error,
"unpackRowForBlockCrs: imports.extent(0) = "
2309 << imports.extent (0) <<
" <= offset = " << offset <<
".");
2310 TEUCHOS_TEST_FOR_EXCEPTION
2311 (
static_cast<size_t> (imports.extent (0)) < offset + numBytes,
2312 std::logic_error,
"unpackRowForBlockCrs: imports.extent(0) = "
2313 << imports.extent (0) <<
" < offset + numBytes = "
2314 << (offset + numBytes) <<
".");
2319 const size_t numEntBeg = offset;
2320 const size_t numEntLen = PackTraits<LO>::packValueCount (lid);
2321 const size_t gidsBeg = numEntBeg + numEntLen;
2322 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
2323 const size_t valsBeg = gidsBeg + gidsLen;
2324 const size_t valsLen = numScalarEnt * numBytesPerValue;
2326 const char*
const numEntIn = imports.data () + numEntBeg;
2327 const char*
const gidsIn = imports.data () + gidsBeg;
2328 const char*
const valsIn = imports.data () + valsBeg;
2330 size_t numBytesOut = 0;
2333 numBytesOut += PackTraits<LO>::unpackValue (numEntOut, numEntIn);
2334 TEUCHOS_TEST_FOR_EXCEPTION
2335 (
static_cast<size_t> (numEntOut) != numEnt, std::logic_error,
2336 "unpackRowForBlockCrs: Expected number of entries " << numEnt
2337 <<
" != actual number of entries " << numEntOut <<
".");
2340 Kokkos::pair<int, size_t> p;
2341 p = PackTraits<GO>::unpackArray (gidsOut.data (), gidsIn, numEnt);
2342 errorCode += p.first;
2343 numBytesOut += p.second;
2345 p = PackTraits<ST>::unpackArray (valsOut.data (), valsIn, numScalarEnt);
2346 errorCode += p.first;
2347 numBytesOut += p.second;
2350 TEUCHOS_TEST_FOR_EXCEPTION
2351 (numBytesOut != numBytes, std::logic_error,
2352 "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
2353 <<
" != numBytes = " << numBytes <<
".");
2355 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
2356 TEUCHOS_TEST_FOR_EXCEPTION
2357 (numBytesOut != expectedNumBytes, std::logic_error,
2358 "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
2359 <<
" != expectedNumBytes = " << expectedNumBytes <<
".");
2361 TEUCHOS_TEST_FOR_EXCEPTION
2362 (errorCode != 0, std::runtime_error,
"unpackRowForBlockCrs: "
2363 "PackTraits::unpackArray returned a nonzero error code");
2369 template<
class Scalar,
class LO,
class GO,
class Node>
2373 (const ::Tpetra::SrcDistObject&
source,
2378 Kokkos::DualView<
size_t*,
2382 using ::Tpetra::Details::Behavior;
2383 using ::Tpetra::Details::dualViewStatusToString;
2384 using ::Tpetra::Details::ProfilingRegion;
2385 using ::Tpetra::Details::PackTraits;
2387 typedef typename Kokkos::View<int*, device_type>::HostMirror::execution_space
host_exec;
2391 ProfilingRegion
profile_region(
"Tpetra::BlockCrsMatrix::packAndPrepare");
2393 const bool debug = Behavior::debug();
2394 const bool verbose = Behavior::verbose();
2399 std::ostringstream
os;
2400 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2401 os <<
"Proc " <<
myRank <<
": BlockCrsMatrix::packAndPrepare: " << std::endl;
2406 if (* (this->localError_)) {
2407 std::ostream&
err = this->markLocalErrorAndGetStream ();
2409 <<
"The target object of the Import or Export is already in an error state."
2418 std::ostringstream
os;
2420 <<
prefix <<
" " << dualViewStatusToString (
exportLIDs,
"exportLIDs") << std::endl
2421 <<
prefix <<
" " << dualViewStatusToString (exports,
"exports") << std::endl
2423 std::cerr <<
os.str ();
2430 std::ostream&
err = this->markLocalErrorAndGetStream ();
2432 <<
"exportLIDs.extent(0) = " <<
exportLIDs.extent (0)
2434 <<
"." << std::endl;
2438 std::ostream&
err = this->markLocalErrorAndGetStream ();
2439 err <<
prefix <<
"exportLIDs be sync'd to host." << std::endl;
2445 std::ostream&
err = this->markLocalErrorAndGetStream ();
2447 <<
"The source (input) object of the Import or "
2448 "Export is either not a BlockCrsMatrix, or does not have the right "
2449 "template parameters. checkSizes() should have caught this. "
2450 "Please report this bug to the Tpetra developers." << std::endl;
2466 const size_t blockSize =
static_cast<size_t> (src->getBlockSize ());
2470 auto val_host = val_.getHostView(Access::ReadOnly);
2472 PackTraits<impl_scalar_type>
2489 Kokkos::parallel_reduce
2491 [=](
const size_t &
i,
typename reducer_type::value_type &update) {
2511 std::ostringstream
os;
2513 <<
"totalNumBytes = " << totalNumBytes <<
", totalNumEntries = " << totalNumEntries
2515 std::cerr <<
os.str ();
2521 if(exports.extent(0) != totalNumBytes)
2523 const std::string
oldLabel = exports.d_view.label ();
2525 exports = Kokkos::DualView<packet_type*, buffer_device_type>(
newLabel, totalNumBytes);
2527 if (totalNumEntries > 0) {
2532 Kokkos::parallel_scan
2534 [=](
const size_t &
i,
size_t &update,
const bool &
final) {
2535 if (
final)
offset(
i) = update;
2540 std::ostream&
err = this->markLocalErrorAndGetStream ();
2542 <<
"At end of method, the final offset (in bytes) "
2544 << totalNumBytes <<
". "
2545 <<
"Please report this bug to the Tpetra developers." << std::endl;
2559 exports.modify_host();
2561 typedef Kokkos::TeamPolicy<host_exec>
policy_type;
2564 .set_scratch_size(0, Kokkos::PerTeam(
sizeof(GO)*maxRowLength));
2565 Kokkos::parallel_for
2567 [=](
const typename policy_type::member_type &
member) {
2568 const size_t i =
member.league_rank();
2569 Kokkos::View<GO*, typename host_exec::scratch_memory_space>
2574 values_host_view_type
vals;
2592 (exports.view_host(),
2595 Kokkos::View<const GO*, host_exec>(
gblColInds.data(), maxRowLength),
2604 std::ostringstream
os;
2606 <<
"numBytes computed from packRowForBlockCrs is different from "
2607 <<
"precomputed offset values, LID = " <<
i << std::endl;
2608 std::cerr <<
os.str ();
2616 std::ostringstream
os;
2617 const bool lclSuccess = ! (* (this->localError_));
2621 std::cerr <<
os.str ();
2625 template<
class Scalar,
class LO,
class GO,
class Node>
2633 Kokkos::DualView<
size_t*,
2638 using ::Tpetra::Details::Behavior;
2639 using ::Tpetra::Details::dualViewStatusToString;
2640 using ::Tpetra::Details::ProfilingRegion;
2641 using ::Tpetra::Details::PackTraits;
2644 typename Kokkos::View<int*, device_type>::HostMirror::execution_space;
2646 ProfilingRegion
profile_region(
"Tpetra::BlockCrsMatrix::unpackAndCombine");
2647 const bool verbose = Behavior::verbose ();
2652 std::ostringstream
os;
2653 auto map = this->graph_.getRowMap ();
2654 auto comm =
map.is_null () ? Teuchos::null :
map->getComm ();
2655 const int myRank = comm.is_null () ? -1 : comm->getRank ();
2656 os <<
"Proc " <<
myRank <<
": Tpetra::BlockCrsMatrix::unpackAndCombine: ";
2660 std::cerr <<
os.str ();
2665 if (* (this->localError_)) {
2666 std::ostream&
err = this->markLocalErrorAndGetStream ();
2667 std::ostringstream
os;
2668 os <<
prefix <<
"{Im/Ex}port target is already in error." <<
endl;
2670 std::cerr <<
os.str ();
2680 std::ostream&
err = this->markLocalErrorAndGetStream ();
2681 std::ostringstream
os;
2686 std::cerr <<
os.str ();
2695 std::ostream&
err = this->markLocalErrorAndGetStream ();
2696 std::ostringstream
os;
2698 <<
"Invalid CombineMode value " <<
combineMode <<
". Valid "
2699 <<
"values include ADD, INSERT, REPLACE, ABSMAX, and ZERO."
2702 std::cerr <<
os.str ();
2708 if (this->graph_.getColMap ().is_null ()) {
2709 std::ostream&
err = this->markLocalErrorAndGetStream ();
2710 std::ostringstream
os;
2711 os <<
prefix <<
"Target matrix's column Map is null." <<
endl;
2713 std::cerr <<
os.str ();
2725 const size_t blockSize = this->getBlockSize ();
2735 auto val_host = val_.getHostView(Access::ReadOnly);
2737 PackTraits<impl_scalar_type>::packValueCount
2740 const size_t maxRowNumEnt = graph_.getNodeMaxNumRowEntries ();
2744 std::ostringstream
os;
2752 std::cerr <<
os.str ();
2757 std::ostringstream
os;
2758 os <<
prefix <<
"Nothing to unpack. Done!" << std::endl;
2759 std::cerr <<
os.str ();
2766 if (imports.need_sync_host ()) {
2767 imports.sync_host ();
2779 std::ostream&
err = this->markLocalErrorAndGetStream ();
2780 std::ostringstream
os;
2781 os <<
prefix <<
"All input DualViews must be sync'd to host by now. "
2782 <<
"imports_nc.need_sync_host()="
2783 << (imports.need_sync_host () ?
"true" :
"false")
2784 <<
", numPacketsPerLID.need_sync_host()="
2786 <<
", importLIDs.need_sync_host()="
2787 << (
importLIDs.need_sync_host () ?
"true" :
"false")
2790 std::cerr <<
os.str ();
2806 Kokkos::parallel_scan
2807 (
"Tpetra::BlockCrsMatrix::unpackAndCombine: offsets",
policy,
2808 [=] (
const size_t &
i,
size_t &update,
const bool &
final) {
2809 if (
final)
offset(
i) = update;
2820 Kokkos::View<int, host_exec, Kokkos::MemoryTraits<Kokkos::Atomic> >
2824 using policy_type = Kokkos::TeamPolicy<host_exec>;
2826 .set_scratch_size (0, Kokkos::PerTeam (
sizeof (GO) *
maxRowNumEnt +
2830 using pair_type = Kokkos::pair<size_t, size_t>;
2831 Kokkos::parallel_for
2832 (
"Tpetra::BlockCrsMatrix::unpackAndCombine: unpack",
policy,
2833 [=] (
const typename policy_type::member_type&
member) {
2834 const size_t i =
member.league_rank();
2836 Kokkos::View<GO*, host_scratch_space>
gblColInds
2838 Kokkos::View<LO*, host_scratch_space>
lclColInds
2840 Kokkos::View<impl_scalar_type*, host_scratch_space>
vals
2854 std::ostringstream
os;
2856 <<
"At i = " <<
i <<
", numEnt = " <<
numEnt
2859 std::cerr <<
os.str ();
2878 imports.view_host(),
2882 catch (std::exception&
e) {
2885 std::ostringstream
os;
2886 os <<
prefix <<
"At i = " <<
i <<
", unpackRowForBlockCrs threw: "
2887 <<
e.what () <<
endl;
2888 std::cerr <<
os.str ();
2896 std::ostringstream
os;
2898 <<
"At i = " <<
i <<
", numBytes = " <<
numBytes
2901 std::cerr <<
os.str();
2909 if (
lidsOut(
k) == Teuchos::OrdinalTraits<LO>::invalid ()) {
2911 std::ostringstream
os;
2913 <<
"At i = " <<
i <<
", GID " <<
gidsOut(
k)
2914 <<
" is not owned by the calling process."
2916 std::cerr <<
os.str();
2924 const LO*
const lidsRaw =
const_cast<const LO*
> (
lidsOut.data ());
2938 std::ostringstream
os;
2940 <<
" != numCombd = " <<
numCombd <<
"."
2942 std::cerr <<
os.str ();
2950 std::ostream&
err = this->markLocalErrorAndGetStream ();
2953 err <<
" Please run again with the environment variable setting "
2954 "TPETRA_VERBOSE=1 to get more verbose diagnostic output.";
2960 std::ostringstream
os;
2961 const bool lclSuccess = ! (* (this->localError_));
2965 std::cerr <<
os.str ();
2969 template<
class Scalar,
class LO,
class GO,
class Node>
2973 using Teuchos::TypeNameTraits;
2974 std::ostringstream
os;
2975 os <<
"\"Tpetra::BlockCrsMatrix\": { "
2976 <<
"Template parameters: { "
2983 <<
", Global dimensions: ["
2984 << graph_.getDomainMap ()->getGlobalNumElements () <<
", "
2985 << graph_.getRangeMap ()->getGlobalNumElements () <<
"]"
2986 <<
", Block size: " << getBlockSize ()
2992 template<
class Scalar,
class LO,
class GO,
class Node>
2996 const Teuchos::EVerbosityLevel
verbLevel)
const
2998 using Teuchos::ArrayRCP;
2999 using Teuchos::CommRequest;
3000 using Teuchos::FancyOStream;
3001 using Teuchos::getFancyOStream;
3002 using Teuchos::ireceive;
3003 using Teuchos::isend;
3004 using Teuchos::outArg;
3005 using Teuchos::TypeNameTraits;
3006 using Teuchos::VERB_DEFAULT;
3007 using Teuchos::VERB_NONE;
3008 using Teuchos::VERB_LOW;
3009 using Teuchos::VERB_MEDIUM;
3011 using Teuchos::VERB_EXTREME;
3013 using Teuchos::wait;
3017 const Teuchos::EVerbosityLevel
vl =
3027 out <<
"\"Tpetra::BlockCrsMatrix\":" <<
endl;
3030 out <<
"Template parameters:" <<
endl;
3039 <<
"Global dimensions: ["
3040 << graph_.getDomainMap ()->getGlobalNumElements () <<
", "
3041 << graph_.getRangeMap ()->getGlobalNumElements () <<
"]" <<
endl;
3048 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ());
3049 const int myRank = comm.getRank ();
3053 getRowMap()->describe (
out,
vl);
3055 if (! getColMap ().
is_null ()) {
3056 if (getColMap() == getRowMap()) {
3058 out <<
"Column Map: same as row Map" <<
endl;
3065 getColMap ()->describe (
out,
vl);
3068 if (! getDomainMap ().
is_null ()) {
3069 if (getDomainMap () == getRowMap ()) {
3071 out <<
"Domain Map: same as row Map" <<
endl;
3074 else if (getDomainMap () == getColMap ()) {
3076 out <<
"Domain Map: same as column Map" <<
endl;
3083 getDomainMap ()->describe (
out,
vl);
3086 if (! getRangeMap ().
is_null ()) {
3087 if (getRangeMap () == getDomainMap ()) {
3089 out <<
"Range Map: same as domain Map" <<
endl;
3092 else if (getRangeMap () == getRowMap ()) {
3094 out <<
"Range Map: same as row Map" <<
endl;
3101 getRangeMap ()->describe (
out,
vl);
3107 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ());
3108 const int myRank = comm.getRank ();
3109 const int numProcs = comm.getSize ();
3116 Teuchos::OSTab
tab2 (
os);
3127 values_host_view_type
vals;
3228 template<
class Scalar,
class LO,
class GO,
class Node>
3229 Teuchos::RCP<const Teuchos::Comm<int> >
3233 return graph_.getComm();
3237 template<
class Scalar,
class LO,
class GO,
class Node>
3242 return graph_.getGlobalNumCols();
3245 template<
class Scalar,
class LO,
class GO,
class Node>
3250 return graph_.getNodeNumCols();
3253 template<
class Scalar,
class LO,
class GO,
class Node>
3258 return graph_.getIndexBase();
3261 template<
class Scalar,
class LO,
class GO,
class Node>
3266 return graph_.getGlobalNumEntries();
3269 template<
class Scalar,
class LO,
class GO,
class Node>
3274 return graph_.getNodeNumEntries();
3277 template<
class Scalar,
class LO,
class GO,
class Node>
3282 return graph_.getNumEntriesInGlobalRow(
globalRow);
3286 template<
class Scalar,
class LO,
class GO,
class Node>
3291 return graph_.getGlobalMaxNumRowEntries();
3294 template<
class Scalar,
class LO,
class GO,
class Node>
3299 return graph_.hasColMap();
3303 template<
class Scalar,
class LO,
class GO,
class Node>
3308 return graph_.isLocallyIndexed();
3311 template<
class Scalar,
class LO,
class GO,
class Node>
3316 return graph_.isGloballyIndexed();
3319 template<
class Scalar,
class LO,
class GO,
class Node>
3324 return graph_.isFillComplete ();
3327 template<
class Scalar,
class LO,
class GO,
class Node>
3335 template<
class Scalar,
class LO,
class GO,
class Node>
3339 nonconst_global_inds_host_view_type &,
3340 nonconst_values_host_view_type &,
3344 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getGlobalRowCopy: "
3345 "This class doesn't support global matrix indexing.");
3349#ifdef TPETRA_ENABLE_DEPRECATED_CODE
3350 template<
class Scalar,
class LO,
class GO,
class Node>
3354 const Teuchos::ArrayView<GO> &,
3355 const Teuchos::ArrayView<Scalar> &,
3359 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getGlobalRowCopy: "
3360 "This class doesn't support global matrix indexing.");
3365#ifdef TPETRA_ENABLE_DEPRECATED_CODE
3366 template<
class Scalar,
class LO,
class GO,
class Node>
3370 Teuchos::ArrayView<const GO> &,
3371 Teuchos::ArrayView<const Scalar> &)
const
3373 TEUCHOS_TEST_FOR_EXCEPTION(
3374 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getGlobalRowView: "
3375 "This class doesn't support global matrix indexing.");
3379 template<
class Scalar,
class LO,
class GO,
class Node>
3383 Teuchos::ArrayView<const LO>& ,
3384 Teuchos::ArrayView<const Scalar>& )
const
3386 TEUCHOS_TEST_FOR_EXCEPTION(
3387 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getLocalRowView: "
3388 "This class doesn't support local matrix indexing.");
3392 template<
class Scalar,
class LO,
class GO,
class Node>
3396 global_inds_host_view_type &,
3397 values_host_view_type &)
const
3400 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getGlobalRowView: "
3401 "This class doesn't support global matrix indexing.");
3405 template<
class Scalar,
class LO,
class GO,
class Node>
3409 local_inds_host_view_type &
colInds,
3410 values_host_view_type &
vals)
const
3412 if (! rowMeshMap_.isNodeLocalElement (
localRowInd)) {
3413 colInds = local_inds_host_view_type();
3414 vals = values_host_view_type();
3425 template<
class Scalar,
class LO,
class GO,
class Node>
3429 local_inds_host_view_type &
colInds,
3430 nonconst_values_host_view_type &
vals)
const
3432 if (! rowMeshMap_.isNodeLocalElement (
localRowInd)) {
3433 colInds = nonconst_local_inds_host_view_type();
3434 vals = nonconst_values_host_view_type();
3446 template<
class Scalar,
class LO,
class GO,
class Node>
3466 LO
bs = getBlockSize();
3467 for(
size_t r=0;
r<getNodeNumRows();
r++)
3471 for(
int b=0;
b<
bs;
b++)
3482 template<
class Scalar,
class LO,
class GO,
class Node>
3485 leftScale (const ::Tpetra::Vector<Scalar, LO, GO, Node>& )
3488 true, std::logic_error,
"Tpetra::BlockCrsMatrix::leftScale: "
3489 "not implemented.");
3493 template<
class Scalar,
class LO,
class GO,
class Node>
3496 rightScale (const ::Tpetra::Vector<Scalar, LO, GO, Node>& )
3499 true, std::logic_error,
"Tpetra::BlockCrsMatrix::rightScale: "
3500 "not implemented.");
3504 template<
class Scalar,
class LO,
class GO,
class Node>
3505 Teuchos::RCP<const ::Tpetra::RowGraph<LO, GO, Node> >
3512 template<
class Scalar,
class LO,
class GO,
class Node>
3513 typename ::Tpetra::RowMatrix<Scalar, LO, GO, Node>::mag_type
3518 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getFrobeniusNorm: "
3519 "not implemented.");
3529#define TPETRA_BLOCKCRSMATRIX_INSTANT(S,LO,GO,NODE) \
3530 template class BlockCrsMatrix< S, LO, GO, NODE >;
Linear algebra kernels for small dense matrices and vectors.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Sparse matrix whose entries are small dense square blocks, all of the same dimensions.
LO sumIntoLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Sum into values at the given (mesh, i.e., block) column indices, in the given (mesh,...
virtual bool isLocallyIndexed() const override
Whether matrix indices are locally indexed.
virtual bool isFillComplete() const override
Whether fillComplete() has been called.
virtual size_t getGlobalMaxNumRowEntries() const override
The maximum number of entries in any row over all processes in the matrix's communicator.
void applyBlock(const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar beta=Teuchos::ScalarTraits< Scalar >::zero())
Version of apply() that takes BlockMultiVector input and output.
LO replaceLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like replaceLocalValues, but avoids computing row offsets.
virtual size_t getNodeNumEntries() const override
The local number of stored (structurally nonzero) entries.
virtual void getLocalRowCopy(LO LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Not implemented.
LO absMaxLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Variant of getLocalDiagCopy() that uses precomputed offsets and puts diagonal blocks in a 3-D Kokkos:...
virtual void getGlobalRowView(GO GlobalRow, global_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant, nonpersisting, globally indexed view of the given row of the matrix.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const override
Print a description of this object to the given output stream.
Kokkos::View< impl_scalar_type **, Kokkos::LayoutRight, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > > little_block_type
The type used to access nonconst matrix blocks.
LO local_ordinal_type
The type of local indices.
virtual void getGlobalRowCopy(GO GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Get a copy of the given global row's entries.
LO getLocalRowOffsets(const LO localRowInd, ptrdiff_t offsets[], const LO colInds[], const LO numColInds) const
Get relative offsets corresponding to the given rows, given by local row index.
virtual void copyAndPermute(const SrcDistObject &sourceObj, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
Teuchos::RCP< const map_type > getRangeMap() const override
Get the (point) range Map of this matrix.
LO sumIntoLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like sumIntoLocalValues, but avoids computing row offsets.
virtual bool hasColMap() const override
Whether this matrix has a well-defined column Map.
size_t getNodeMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
impl_scalar_type_dualview::t_host getValuesHostNonConst() const
Get the host or device View of the matrix's values (val_).
Teuchos::RCP< const map_type > getColMap() const override
get the (mesh) map for the columns of this block matrix.
virtual typename::Tpetra::RowMatrix< Scalar, LO, GO, Node >::mag_type getFrobeniusNorm() const override
The Frobenius norm of the matrix.
virtual Teuchos::RCP< const ::Tpetra::RowGraph< LO, GO, Node > > getGraph() const override
Get the (mesh) graph.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode) override
void apply(const mv_type &X, mv_type &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const override
For this matrix A, compute Y := beta * Y + alpha * Op(A) * X.
virtual global_size_t getGlobalNumCols() const override
The global number of columns of this matrix.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the matrix.
Teuchos::RCP< const map_type > getRowMap() const override
get the (mesh) map for the rows of this block matrix.
std::string description() const override
One-line description of this object.
void getLocalRowView(LO LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const override
Get a view of the (mesh, i.e., block) row, using local (mesh, i.e., block) indices.
Kokkos::View< const impl_scalar_type **, Kokkos::LayoutRight, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > > const_little_block_type
The type used to access const matrix blocks.
void getLocalDiagCopy(const Kokkos::View< impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &diag, const Kokkos::View< const size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Variant of getLocalDiagCopy() that uses precomputed offsets and puts diagonal blocks in a 3-D Kokkos:...
size_t getNumEntriesInLocalRow(const LO localRowInd) const override
Return the number of entries in the given row on the calling process.
virtual bool supportsRowViews() const override
Whether this object implements getLocalRowView() and getGlobalRowView().
LO replaceLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Replace values at the given (mesh, i.e., block) column indices, in the given (mesh,...
virtual size_t getNumEntriesInGlobalRow(GO globalRow) const override
The current number of entries on the calling process in the specified global row.
size_t getNodeNumRows() const override
get the local number of block rows
Node::device_type device_type
The Kokkos::Device specialization that this class uses.
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
void getLocalRowViewNonConst(LO LocalRow, local_inds_host_view_type &indices, nonconst_values_host_view_type &values) const
char packet_type
Implementation detail; tells.
typename DistObject< Scalar, LO, GO, Node >::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
virtual void packAndPrepare(const SrcDistObject &sourceObj, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets) override
virtual void rightScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x) override
Scale the RowMatrix on the right with the given Vector x.
virtual GO getIndexBase() const override
The index base for global indices in this matrix.
virtual global_size_t getGlobalNumEntries() const override
The global number of stored (structurally nonzero) entries.
typename BMV::impl_scalar_type impl_scalar_type
The implementation type of entries in the matrix.
virtual Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which this matrix is distributed.
global_size_t getGlobalNumRows() const override
get the global number of block rows
virtual void leftScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x) override
Scale the RowMatrix on the left with the given Vector x.
Teuchos::RCP< const map_type > getDomainMap() const override
Get the (point) domain Map of this matrix.
virtual bool isGloballyIndexed() const override
Whether matrix indices are globally indexed.
BlockCrsMatrix()
Default constructor: Makes an empty block matrix.
virtual size_t getNodeNumCols() const override
The number of columns needed to apply the forward operator on this node.
MultiVector for multiple degrees of freedom per mesh point.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
Struct that holds views of the contents of a CrsMatrix.
One or more distributed dense vectors.
int local_ordinal_type
Default value of Scalar template parameter.
KOKKOS_INLINE_FUNCTION void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
size_t global_size_t
Global size_t object.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
CombineMode
Rule for combining data in an Import or Export.
@ REPLACE
Replace existing values with new values.
@ ABSMAX
Replace old value with maximum of magnitudes of old and new values.
@ INSERT
Insert new values that don't currently exist.
@ ZERO
Replace old values with zero.
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...