40 #ifndef TPETRA_BLOCKCRSMATRIX_DEF_HPP
41 #define TPETRA_BLOCKCRSMATRIX_DEF_HPP
49 #include "Tpetra_BlockMultiVector.hpp"
52 #include "Teuchos_TimeMonitor.hpp"
53 #ifdef HAVE_TPETRA_DEBUG
69 #if defined(__CUDACC__)
71 # if defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
72 # undef TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA
75 #elif defined(__GNUC__)
77 # if defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
78 # undef TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA
84 # if ! defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
85 # define TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA 1
95 struct BlockCrsRowStruct {
96 T totalNumEntries, totalNumBytes, maxRowLength;
97 KOKKOS_DEFAULTED_FUNCTION BlockCrsRowStruct() =
default;
98 KOKKOS_DEFAULTED_FUNCTION BlockCrsRowStruct(
const BlockCrsRowStruct &b) =
default;
99 KOKKOS_INLINE_FUNCTION BlockCrsRowStruct(
const T& numEnt,
const T& numBytes,
const T& rowLength)
100 : totalNumEntries(numEnt), totalNumBytes(numBytes), maxRowLength(rowLength) {}
105 KOKKOS_INLINE_FUNCTION
106 void operator+=(
volatile BlockCrsRowStruct<T> &a,
107 volatile const BlockCrsRowStruct<T> &b) {
108 a.totalNumEntries += b.totalNumEntries;
109 a.totalNumBytes += b.totalNumBytes;
110 a.maxRowLength = a.maxRowLength > b.maxRowLength ? a.maxRowLength : b.maxRowLength;
115 KOKKOS_INLINE_FUNCTION
116 void operator+=(BlockCrsRowStruct<T> &a,
const BlockCrsRowStruct<T> &b) {
117 a.totalNumEntries += b.totalNumEntries;
118 a.totalNumBytes += b.totalNumBytes;
119 a.maxRowLength = a.maxRowLength > b.maxRowLength ? a.maxRowLength : b.maxRowLength;
122 template<
typename T,
typename ExecSpace>
123 struct BlockCrsReducer {
124 typedef BlockCrsReducer reducer;
125 typedef T value_type;
126 typedef Kokkos::View<value_type,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type;
129 KOKKOS_INLINE_FUNCTION
130 BlockCrsReducer(value_type &val) : value(&val) {}
132 KOKKOS_INLINE_FUNCTION
void join(value_type &dst, value_type &src)
const { dst += src; }
133 KOKKOS_INLINE_FUNCTION
void join(
volatile value_type &dst,
const volatile value_type &src)
const { dst += src; }
134 KOKKOS_INLINE_FUNCTION
void init(value_type &val)
const { val = value_type(); }
135 KOKKOS_INLINE_FUNCTION value_type& reference() {
return *value; }
136 KOKKOS_INLINE_FUNCTION result_view_type view()
const {
return result_view_type(value); }
139 template<
class AlphaCoeffType,
141 class MatrixValuesType,
146 class BcrsApplyNoTransFunctor {
148 static_assert (Kokkos::Impl::is_view<MatrixValuesType>::value,
149 "MatrixValuesType must be a Kokkos::View.");
150 static_assert (Kokkos::Impl::is_view<OutVecType>::value,
151 "OutVecType must be a Kokkos::View.");
152 static_assert (Kokkos::Impl::is_view<InVecType>::value,
153 "InVecType must be a Kokkos::View.");
154 static_assert (std::is_same<MatrixValuesType,
155 typename MatrixValuesType::const_type>::value,
156 "MatrixValuesType must be a const Kokkos::View.");
157 static_assert (std::is_same<OutVecType,
158 typename OutVecType::non_const_type>::value,
159 "OutVecType must be a nonconst Kokkos::View.");
160 static_assert (std::is_same<InVecType, typename InVecType::const_type>::value,
161 "InVecType must be a const Kokkos::View.");
162 static_assert (
static_cast<int> (MatrixValuesType::rank) == 1,
163 "MatrixValuesType must be a rank-1 Kokkos::View.");
164 static_assert (
static_cast<int> (InVecType::rank) == 1,
165 "InVecType must be a rank-1 Kokkos::View.");
166 static_assert (
static_cast<int> (OutVecType::rank) == 1,
167 "OutVecType must be a rank-1 Kokkos::View.");
168 typedef typename MatrixValuesType::non_const_value_type scalar_type;
171 typedef typename GraphType::device_type device_type;
174 typedef typename std::remove_const<typename GraphType::data_type>::type
183 void setX (
const InVecType& X) { X_ = X; }
191 void setY (
const OutVecType& Y) { Y_ = Y; }
193 typedef typename Kokkos::ArithTraits<typename std::decay<AlphaCoeffType>::type>::val_type alpha_coeff_type;
194 typedef typename Kokkos::ArithTraits<typename std::decay<BetaCoeffType>::type>::val_type beta_coeff_type;
197 BcrsApplyNoTransFunctor (
const alpha_coeff_type& alpha,
198 const GraphType& graph,
199 const MatrixValuesType& val,
200 const local_ordinal_type blockSize,
202 const beta_coeff_type& beta,
203 const OutVecType& Y) :
205 ptr_ (graph.row_map),
206 ind_ (graph.entries),
208 blockSize_ (blockSize),
215 KOKKOS_INLINE_FUNCTION
void
216 operator () (
const typename Kokkos::TeamPolicy<typename device_type::execution_space>::member_type & member)
const
218 Kokkos::abort(
"Tpetra::BcrsApplyNoTransFunctor:: this should not be called");
222 KOKKOS_INLINE_FUNCTION
void
223 operator () (
const local_ordinal_type& lclRow)
const
229 using Kokkos::Details::ArithTraits;
235 using Kokkos::parallel_for;
236 using Kokkos::subview;
237 typedef typename decltype (ptr_)::non_const_value_type offset_type;
238 typedef Kokkos::View<
typename MatrixValuesType::const_value_type**,
241 Kokkos::MemoryTraits<Kokkos::Unmanaged> >
244 const offset_type Y_ptBeg = lclRow * blockSize_;
245 const offset_type Y_ptEnd = Y_ptBeg + blockSize_;
246 auto Y_cur = subview (Y_, ::Kokkos::make_pair (Y_ptBeg, Y_ptEnd));
250 if (beta_ == ArithTraits<beta_coeff_type>::zero ()) {
251 FILL (Y_cur, ArithTraits<beta_coeff_type>::zero ());
253 else if (beta_ != ArithTraits<beta_coeff_type>::one ()) {
257 if (alpha_ != ArithTraits<alpha_coeff_type>::zero ()) {
258 const offset_type blkBeg = ptr_[lclRow];
259 const offset_type blkEnd = ptr_[lclRow+1];
261 const offset_type bs2 = blockSize_ * blockSize_;
262 for (offset_type absBlkOff = blkBeg; absBlkOff < blkEnd;
264 little_block_type A_cur (val_.data () + absBlkOff * bs2,
265 blockSize_, blockSize_);
266 const offset_type X_blkCol = ind_[absBlkOff];
267 const offset_type X_ptBeg = X_blkCol * blockSize_;
268 const offset_type X_ptEnd = X_ptBeg + blockSize_;
269 auto X_cur = subview (X_, ::Kokkos::make_pair (X_ptBeg, X_ptEnd));
271 GEMV (alpha_, A_cur, X_cur, Y_cur);
277 alpha_coeff_type alpha_;
278 typename GraphType::row_map_type::const_type ptr_;
279 typename GraphType::entries_type::const_type ind_;
280 MatrixValuesType val_;
283 beta_coeff_type beta_;
287 template<
class AlphaCoeffType,
289 class MatrixValuesType,
293 class BcrsApplyNoTransFunctor<AlphaCoeffType,
301 static_assert (Kokkos::Impl::is_view<MatrixValuesType>::value,
302 "MatrixValuesType must be a Kokkos::View.");
303 static_assert (Kokkos::Impl::is_view<OutVecType>::value,
304 "OutVecType must be a Kokkos::View.");
305 static_assert (Kokkos::Impl::is_view<InVecType>::value,
306 "InVecType must be a Kokkos::View.");
307 static_assert (std::is_same<MatrixValuesType,
308 typename MatrixValuesType::const_type>::value,
309 "MatrixValuesType must be a const Kokkos::View.");
310 static_assert (std::is_same<OutVecType,
311 typename OutVecType::non_const_type>::value,
312 "OutVecType must be a nonconst Kokkos::View.");
313 static_assert (std::is_same<InVecType, typename InVecType::const_type>::value,
314 "InVecType must be a const Kokkos::View.");
315 static_assert (
static_cast<int> (MatrixValuesType::rank) == 1,
316 "MatrixValuesType must be a rank-1 Kokkos::View.");
317 static_assert (
static_cast<int> (InVecType::rank) == 1,
318 "InVecType must be a rank-1 Kokkos::View.");
319 static_assert (
static_cast<int> (OutVecType::rank) == 1,
320 "OutVecType must be a rank-1 Kokkos::View.");
321 typedef typename MatrixValuesType::non_const_value_type scalar_type;
324 typedef typename GraphType::device_type device_type;
327 typedef typename std::remove_const<typename GraphType::data_type>::type
336 void setX (
const InVecType& X) { X_ = X; }
344 void setY (
const OutVecType& Y) { Y_ = Y; }
346 typedef typename Kokkos::ArithTraits<typename std::decay<AlphaCoeffType>::type>::val_type alpha_coeff_type;
347 typedef typename Kokkos::ArithTraits<typename std::decay<BetaCoeffType>::type>::val_type beta_coeff_type;
350 BcrsApplyNoTransFunctor (
const alpha_coeff_type& alpha,
351 const GraphType& graph,
352 const MatrixValuesType& val,
353 const local_ordinal_type blockSize,
355 const beta_coeff_type& beta,
356 const OutVecType& Y) :
358 ptr_ (graph.row_map),
359 ind_ (graph.entries),
361 blockSize_ (blockSize),
368 KOKKOS_INLINE_FUNCTION
void
369 operator () (
const local_ordinal_type& lclRow)
const
371 Kokkos::abort(
"Tpetra::BcrsApplyNoTransFunctor:: this should not be called");
375 KOKKOS_INLINE_FUNCTION
void
376 operator () (
const typename Kokkos::TeamPolicy<typename device_type::execution_space>::member_type & member)
const
380 using Kokkos::Details::ArithTraits;
386 using Kokkos::parallel_for;
387 using Kokkos::subview;
388 typedef typename decltype (ptr_)::non_const_value_type offset_type;
389 typedef Kokkos::View<
typename MatrixValuesType::const_value_type**,
392 Kokkos::MemoryTraits<Kokkos::Unmanaged> >
395 const offset_type Y_ptBeg = lclRow * blockSize_;
396 const offset_type Y_ptEnd = Y_ptBeg + blockSize_;
397 auto Y_cur = subview (Y_, ::Kokkos::make_pair (Y_ptBeg, Y_ptEnd));
401 if (beta_ == ArithTraits<beta_coeff_type>::zero ()) {
402 Kokkos::parallel_for(Kokkos::TeamVectorRange(member, blockSize_),
403 [&](
const local_ordinal_type &i) {
404 Y_cur(i) = ArithTraits<beta_coeff_type>::zero ();
407 else if (beta_ != ArithTraits<beta_coeff_type>::one ()) {
408 Kokkos::parallel_for(Kokkos::TeamVectorRange(member, blockSize_),
409 [&](
const local_ordinal_type &i) {
413 member.team_barrier();
415 if (alpha_ != ArithTraits<alpha_coeff_type>::zero ()) {
416 const offset_type blkBeg = ptr_[lclRow];
417 const offset_type blkEnd = ptr_[lclRow+1];
419 const offset_type bs2 = blockSize_ * blockSize_;
420 little_block_type A_cur (val_.data (), blockSize_, blockSize_);
421 auto X_cur = subview (X_, ::Kokkos::make_pair (0, blockSize_));
423 (Kokkos::TeamThreadRange(member, blkBeg, blkEnd),
424 [&](
const local_ordinal_type &absBlkOff) {
425 A_cur.assign_data(val_.data () + absBlkOff * bs2);
426 const offset_type X_blkCol = ind_[absBlkOff];
427 const offset_type X_ptBeg = X_blkCol * blockSize_;
428 X_cur.assign_data(&X_(X_ptBeg));
431 (Kokkos::ThreadVectorRange(member, blockSize_),
432 [&](
const local_ordinal_type &k0) {
434 for (local_ordinal_type k1=0;k1<blockSize_;++k1)
435 val += A_cur(k0,k1)*X_cur(k1);
436 #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
438 Y_cur(k0) += alpha_*val;
444 Kokkos::atomic_add(&Y_cur(k0), alpha_*val);
452 alpha_coeff_type alpha_;
453 typename GraphType::row_map_type::const_type ptr_;
454 typename GraphType::entries_type::const_type ind_;
455 MatrixValuesType val_;
458 beta_coeff_type beta_;
463 template<
class AlphaCoeffType,
465 class MatrixValuesType,
466 class InMultiVecType,
468 class OutMultiVecType>
470 bcrsLocalApplyNoTrans (
const AlphaCoeffType& alpha,
471 const GraphType& graph,
472 const MatrixValuesType& val,
473 const typename std::remove_const<typename GraphType::data_type>::type blockSize,
474 const InMultiVecType& X,
475 const BetaCoeffType& beta,
476 const OutMultiVecType& Y
479 static_assert (Kokkos::Impl::is_view<MatrixValuesType>::value,
480 "MatrixValuesType must be a Kokkos::View.");
481 static_assert (Kokkos::Impl::is_view<OutMultiVecType>::value,
482 "OutMultiVecType must be a Kokkos::View.");
483 static_assert (Kokkos::Impl::is_view<InMultiVecType>::value,
484 "InMultiVecType must be a Kokkos::View.");
485 static_assert (
static_cast<int> (MatrixValuesType::rank) == 1,
486 "MatrixValuesType must be a rank-1 Kokkos::View.");
487 static_assert (
static_cast<int> (OutMultiVecType::rank) == 2,
488 "OutMultiVecType must be a rank-2 Kokkos::View.");
489 static_assert (
static_cast<int> (InMultiVecType::rank) == 2,
490 "InMultiVecType must be a rank-2 Kokkos::View.");
492 typedef typename GraphType::device_type::execution_space execution_space;
493 typedef typename MatrixValuesType::const_type matrix_values_type;
494 typedef typename OutMultiVecType::non_const_type out_multivec_type;
495 typedef typename InMultiVecType::const_type in_multivec_type;
496 typedef typename Kokkos::ArithTraits<typename std::decay<AlphaCoeffType>::type>::val_type alpha_type;
497 typedef typename Kokkos::ArithTraits<typename std::decay<BetaCoeffType>::type>::val_type beta_type;
498 typedef typename std::remove_const<typename GraphType::data_type>::type LO;
500 constexpr
bool is_builtin_type_enabled = std::is_arithmetic<typename InMultiVecType::non_const_value_type>::value;
502 const LO numLocalMeshRows = graph.row_map.extent (0) == 0 ?
503 static_cast<LO
> (0) :
504 static_cast<LO
> (graph.row_map.extent (0) - 1);
505 const LO numVecs = Y.extent (1);
506 if (numLocalMeshRows == 0 || numVecs == 0) {
513 in_multivec_type X_in = X;
514 out_multivec_type Y_out = Y;
519 typedef decltype (Kokkos::subview (X_in, Kokkos::ALL (), 0)) in_vec_type;
520 typedef decltype (Kokkos::subview (Y_out, Kokkos::ALL (), 0)) out_vec_type;
521 typedef BcrsApplyNoTransFunctor<alpha_type, GraphType,
522 matrix_values_type, in_vec_type, beta_type, out_vec_type,
523 is_builtin_type_enabled> functor_type;
525 auto X_0 = Kokkos::subview (X_in, Kokkos::ALL (), 0);
526 auto Y_0 = Kokkos::subview (Y_out, Kokkos::ALL (), 0);
529 if (is_builtin_type_enabled) {
530 functor_type functor (alpha, graph, val, blockSize, X_0, beta, Y_0);
532 typedef Kokkos::TeamPolicy<execution_space> policy_type;
533 policy_type policy(1,1);
534 #if defined(KOKKOS_ENABLE_CUDA)
535 constexpr
bool is_cuda = std::is_same<execution_space, Kokkos::Cuda>::value;
537 constexpr
bool is_cuda =
false;
540 LO team_size = 8, vector_size = 1;
541 if (blockSize <= 5) vector_size = 4;
542 else if (blockSize <= 9) vector_size = 8;
543 else if (blockSize <= 12) vector_size = 12;
544 else if (blockSize <= 20) vector_size = 20;
545 else vector_size = 20;
546 policy = policy_type(numLocalMeshRows, team_size, vector_size);
548 policy = policy_type(numLocalMeshRows, 1, 1);
550 Kokkos::parallel_for (policy, functor);
553 for (LO j = 1; j < numVecs; ++j) {
554 auto X_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
555 auto Y_j = Kokkos::subview (Y_out, Kokkos::ALL (), j);
558 Kokkos::parallel_for (policy, functor);
561 functor_type functor (alpha, graph, val, blockSize, X_0, beta, Y_0);
562 Kokkos::RangePolicy<execution_space,LO> policy(0, numLocalMeshRows);
563 Kokkos::parallel_for (policy, functor);
564 for (LO j = 1; j < numVecs; ++j) {
565 auto X_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
566 auto Y_j = Kokkos::subview (Y_out, Kokkos::ALL (), j);
569 Kokkos::parallel_for (policy, functor);
579 template<
class Scalar,
class LO,
class GO,
class Node>
580 class GetLocalDiagCopy {
582 typedef typename Node::device_type device_type;
583 typedef size_t diag_offset_type;
584 typedef Kokkos::View<
const size_t*, device_type,
585 Kokkos::MemoryUnmanaged> diag_offsets_type;
586 typedef typename ::Tpetra::CrsGraph<LO, GO, Node> global_graph_type;
588 typedef typename local_graph_device_type::row_map_type row_offsets_type;
589 typedef typename ::Tpetra::BlockMultiVector<Scalar, LO, GO, Node>::impl_scalar_type IST;
590 typedef Kokkos::View<IST***, device_type, Kokkos::MemoryUnmanaged> diag_type;
591 typedef Kokkos::View<const IST*, device_type, Kokkos::MemoryUnmanaged> values_type;
594 GetLocalDiagCopy (
const diag_type& diag,
595 const values_type& val,
596 const diag_offsets_type& diagOffsets,
597 const row_offsets_type& ptr,
598 const LO blockSize) :
600 diagOffsets_ (diagOffsets),
602 blockSize_ (blockSize),
603 offsetPerBlock_ (blockSize_*blockSize_),
607 KOKKOS_INLINE_FUNCTION
void
608 operator() (
const LO& lclRowInd)
const
613 const size_t absOffset = ptr_[lclRowInd];
616 const size_t relOffset = diagOffsets_[lclRowInd];
619 const size_t pointOffset = (absOffset+relOffset)*offsetPerBlock_;
623 typedef Kokkos::View<
const IST**, Kokkos::LayoutRight,
624 device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
625 const_little_block_type;
626 const_little_block_type D_in (val_.data () + pointOffset,
627 blockSize_, blockSize_);
628 auto D_out = Kokkos::subview (diag_, lclRowInd, ALL (), ALL ());
634 diag_offsets_type diagOffsets_;
635 row_offsets_type ptr_;
642 template<
class Scalar,
class LO,
class GO,
class Node>
644 BlockCrsMatrix<Scalar, LO, GO, Node>::
645 markLocalErrorAndGetStream ()
647 * (this->localError_) =
true;
648 if ((*errs_).is_null ()) {
649 *errs_ = Teuchos::rcp (
new std::ostringstream ());
654 template<
class Scalar,
class LO,
class GO,
class Node>
658 graph_ (Teuchos::rcp (new
map_type ()), 0),
659 blockSize_ (static_cast<LO> (0)),
660 X_colMap_ (new Teuchos::RCP<
BMV> ()),
661 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
662 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
664 localError_ (new bool (false)),
665 errs_ (new Teuchos::RCP<std::ostringstream> ())
669 template<
class Scalar,
class LO,
class GO,
class Node>
672 const LO blockSize) :
675 rowMeshMap_ (* (graph.getRowMap ())),
676 blockSize_ (blockSize),
677 X_colMap_ (new Teuchos::RCP<
BMV> ()),
678 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
679 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
680 offsetPerBlock_ (blockSize * blockSize),
681 localError_ (new bool (false)),
682 errs_ (new Teuchos::RCP<std::ostringstream> ())
685 TEUCHOS_TEST_FOR_EXCEPTION(
686 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
687 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
688 "rows (isSorted() is false). This class assumes sorted rows.");
690 graphRCP_ = Teuchos::rcpFromRef(graph_);
696 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
697 TEUCHOS_TEST_FOR_EXCEPTION(
698 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::"
699 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
700 " <= 0. The block size must be positive.");
708 const auto colPointMap = Teuchos::rcp
710 *pointImporter_ = Teuchos::rcp
714 auto local_graph_h = graph.getLocalGraphHost ();
715 auto ptr_h = local_graph_h.row_map;
716 ptrHost_ = decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"), ptr_h.extent(0));
719 auto ind_h = local_graph_h.entries;
720 indHost_ = decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"), ind_h.extent(0));
723 const auto numValEnt = ind_h.extent(0) * offsetPerBlock ();
724 val_ = decltype (val_) (impl_scalar_type_dualview(
"val", numValEnt));
728 template<
class Scalar,
class LO,
class GO,
class Node>
733 const LO blockSize) :
736 rowMeshMap_ (* (graph.getRowMap ())),
737 domainPointMap_ (domainPointMap),
738 rangePointMap_ (rangePointMap),
739 blockSize_ (blockSize),
740 X_colMap_ (new Teuchos::RCP<
BMV> ()),
741 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
742 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
743 offsetPerBlock_ (blockSize * blockSize),
744 localError_ (new bool (false)),
745 errs_ (new Teuchos::RCP<std::ostringstream> ())
747 TEUCHOS_TEST_FOR_EXCEPTION(
748 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
749 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
750 "rows (isSorted() is false). This class assumes sorted rows.");
752 graphRCP_ = Teuchos::rcpFromRef(graph_);
758 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
759 TEUCHOS_TEST_FOR_EXCEPTION(
760 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::"
761 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
762 " <= 0. The block size must be positive.");
766 const auto colPointMap = Teuchos::rcp
768 *pointImporter_ = Teuchos::rcp
772 auto local_graph_h = graph.getLocalGraphHost ();
773 auto ptr_h = local_graph_h.row_map;
774 ptrHost_ = decltype(ptrHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph row offset"), ptr_h.extent(0));
777 auto ind_h = local_graph_h.entries;
778 indHost_ = decltype(indHost_)(Kokkos::ViewAllocateWithoutInitializing(
"graph column indices"), ind_h.extent(0));
781 const auto numValEnt = ind_h.extent(0) * offsetPerBlock ();
782 val_ = decltype (val_) (impl_scalar_type_dualview(
"val", numValEnt));
786 template<
class Scalar,
class LO,
class GO,
class Node>
787 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
792 return Teuchos::rcp (
new map_type (domainPointMap_));
795 template<
class Scalar,
class LO,
class GO,
class Node>
796 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
801 return Teuchos::rcp (
new map_type (rangePointMap_));
804 template<
class Scalar,
class LO,
class GO,
class Node>
805 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
809 return graph_.getRowMap();
812 template<
class Scalar,
class LO,
class GO,
class Node>
813 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
817 return graph_.getColMap();
820 template<
class Scalar,
class LO,
class GO,
class Node>
825 return graph_.getGlobalNumRows();
828 template<
class Scalar,
class LO,
class GO,
class Node>
833 return graph_.getNodeNumRows();
836 template<
class Scalar,
class LO,
class GO,
class Node>
841 return graph_.getNodeMaxNumRowEntries();
844 template<
class Scalar,
class LO,
class GO,
class Node>
849 Teuchos::ETransp mode,
854 TEUCHOS_TEST_FOR_EXCEPTION(
855 mode != Teuchos::NO_TRANS && mode != Teuchos::TRANS && mode != Teuchos::CONJ_TRANS,
856 std::invalid_argument,
"Tpetra::BlockCrsMatrix::apply: "
857 "Invalid 'mode' argument. Valid values are Teuchos::NO_TRANS, "
858 "Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
862 const LO blockSize = getBlockSize ();
864 X_view =
BMV (X, * (graph_.getDomainMap ()), blockSize);
865 Y_view =
BMV (Y, * (graph_.getRangeMap ()), blockSize);
867 catch (std::invalid_argument& e) {
868 TEUCHOS_TEST_FOR_EXCEPTION(
869 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
870 "apply: Either the input MultiVector X or the output MultiVector Y "
871 "cannot be viewed as a BlockMultiVector, given this BlockCrsMatrix's "
872 "graph. BlockMultiVector's constructor threw the following exception: "
881 const_cast<this_type&
> (*this).applyBlock (X_view, Y_view, mode, alpha, beta);
882 }
catch (std::invalid_argument& e) {
883 TEUCHOS_TEST_FOR_EXCEPTION(
884 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
885 "apply: The implementation method applyBlock complained about having "
886 "an invalid argument. It reported the following: " << e.what ());
887 }
catch (std::logic_error& e) {
888 TEUCHOS_TEST_FOR_EXCEPTION(
889 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
890 "apply: The implementation method applyBlock complained about a "
891 "possible bug in its implementation. It reported the following: "
893 }
catch (std::exception& e) {
894 TEUCHOS_TEST_FOR_EXCEPTION(
895 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
896 "apply: The implementation method applyBlock threw an exception which "
897 "is neither std::invalid_argument nor std::logic_error, but is a "
898 "subclass of std::exception. It reported the following: "
901 TEUCHOS_TEST_FOR_EXCEPTION(
902 true, std::logic_error,
"Tpetra::BlockCrsMatrix::"
903 "apply: The implementation method applyBlock threw an exception which "
904 "is not an instance of a subclass of std::exception. This probably "
905 "indicates a bug. Please report this to the Tpetra developers.");
909 template<
class Scalar,
class LO,
class GO,
class Node>
914 Teuchos::ETransp mode,
918 TEUCHOS_TEST_FOR_EXCEPTION(
920 "Tpetra::BlockCrsMatrix::applyBlock: "
921 "X and Y have different block sizes. X.getBlockSize() = "
925 if (mode == Teuchos::NO_TRANS) {
926 applyBlockNoTrans (X, Y, alpha, beta);
927 }
else if (mode == Teuchos::TRANS || mode == Teuchos::CONJ_TRANS) {
928 applyBlockTrans (X, Y, mode, alpha, beta);
930 TEUCHOS_TEST_FOR_EXCEPTION(
931 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
932 "applyBlock: Invalid 'mode' argument. Valid values are "
933 "Teuchos::NO_TRANS, Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
937 template<
class Scalar,
class LO,
class GO,
class Node>
942 auto val_d = val_.getDeviceView(Access::OverwriteAll);
946 template<
class Scalar,
class LO,
class GO,
class Node>
952 const LO numColInds)
const
954 Kokkos::View<ptrdiff_t*,Kokkos::HostSpace>
955 offsets_host_view(Kokkos::ViewAllocateWithoutInitializing(
"offsets"), numColInds);
956 ptrdiff_t * offsets = offsets_host_view.data();
957 const LO numOffsets = this->getLocalRowOffsets(localRowInd, offsets, colInds, numColInds);
958 const LO validCount = this->replaceLocalValuesByOffsets(localRowInd, offsets, vals, numOffsets);
962 template <
class Scalar,
class LO,
class GO,
class Node>
966 Kokkos::MemoryUnmanaged>& offsets)
const
968 graph_.getLocalDiagOffsets (offsets);
971 template <
class Scalar,
class LO,
class GO,
class Node>
975 Kokkos::MemoryUnmanaged>& diag,
977 Kokkos::MemoryUnmanaged>& offsets)
const
979 using Kokkos::parallel_for;
980 const char prefix[] =
"Tpetra::BlockCrsMatrix::getLocalDiagCopy (2-arg): ";
982 const LO lclNumMeshRows =
static_cast<LO
> (rowMeshMap_.getNodeNumElements ());
983 const LO blockSize = this->getBlockSize ();
984 TEUCHOS_TEST_FOR_EXCEPTION
985 (
static_cast<LO
> (diag.extent (0)) < lclNumMeshRows ||
986 static_cast<LO
> (diag.extent (1)) < blockSize ||
987 static_cast<LO
> (diag.extent (2)) < blockSize,
988 std::invalid_argument, prefix <<
989 "The input Kokkos::View is not big enough to hold all the data.");
990 TEUCHOS_TEST_FOR_EXCEPTION
991 (
static_cast<LO
> (offsets.size ()) < lclNumMeshRows, std::invalid_argument,
992 prefix <<
"offsets.size() = " << offsets.size () <<
" < local number of "
993 "diagonal blocks " << lclNumMeshRows <<
".");
995 typedef Kokkos::RangePolicy<execution_space, LO> policy_type;
996 typedef GetLocalDiagCopy<Scalar, LO, GO, Node> functor_type;
1002 auto val_d = val_.getDeviceView(Access::ReadOnly);
1003 parallel_for (policy_type (0, lclNumMeshRows),
1004 functor_type (diag, val_d, offsets,
1005 graph_.getLocalGraphDevice ().row_map, blockSize_));
1008 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
1009 template <
class Scalar,
class LO,
class GO,
class Node>
1013 Kokkos::MemoryUnmanaged>& diag,
1014 const Teuchos::ArrayView<const size_t>& offsets)
const
1016 auto offsets_view_host = Kokkos::View<size_t*,Kokkos::HostSpace>(
const_cast<size_t*
>(offsets.getRawPtr()), offsets.size());
1017 auto offsets_view_device = Kokkos::create_mirror_view_and_copy(
typename device_type::memory_space(), offsets_view_host);
1018 getLocalDiagCopy(diag, offsets_view_device);
1023 template<
class Scalar,
class LO,
class GO,
class Node>
1028 const Scalar vals[],
1029 const LO numColInds)
const
1031 Kokkos::View<ptrdiff_t*,Kokkos::HostSpace>
1032 offsets_host_view(Kokkos::ViewAllocateWithoutInitializing(
"offsets"), numColInds);
1033 ptrdiff_t * offsets = offsets_host_view.data();
1034 const LO numOffsets = this->getLocalRowOffsets(localRowInd, offsets, colInds, numColInds);
1035 const LO validCount = this->absMaxLocalValuesByOffsets(localRowInd, offsets, vals, numOffsets);
1040 template<
class Scalar,
class LO,
class GO,
class Node>
1045 const Scalar vals[],
1046 const LO numColInds)
const
1048 Kokkos::View<ptrdiff_t*,Kokkos::HostSpace>
1049 offsets_host_view(Kokkos::ViewAllocateWithoutInitializing(
"offsets"), numColInds);
1050 ptrdiff_t * offsets = offsets_host_view.data();
1051 const LO numOffsets = this->getLocalRowOffsets(localRowInd, offsets, colInds, numColInds);
1052 const LO validCount = this->sumIntoLocalValuesByOffsets(localRowInd, offsets, vals, numOffsets);
1055 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
1056 template<
class Scalar,
class LO,
class GO,
class Node>
1065 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1069 return Teuchos::OrdinalTraits<LO>::invalid ();
1072 const size_t absBlockOffsetStart = ptrHost_[localRowInd];
1073 colInds = indHost_.data () + absBlockOffsetStart;
1075 auto vals_host_out = getValuesHost (localRowInd);
1076 impl_scalar_type* vals_host_out_raw =
const_cast<impl_scalar_type*
>(vals_host_out.data ());
1077 vals =
reinterpret_cast<Scalar*
> (vals_host_out_raw);
1078 numInds = ptrHost_[localRowInd + 1] - absBlockOffsetStart;
1084 template<
class Scalar,
class LO,
class GO,
class Node>
1088 nonconst_local_inds_host_view_type &Indices,
1089 nonconst_values_host_view_type &Values,
1090 size_t& NumEntries)
const
1092 auto vals = getValuesHost(LocalRow);
1093 const LO numInds = ptrHost_(LocalRow+1) - ptrHost_(LocalRow);
1094 if (numInds > (LO)Indices.extent(0) || numInds*blockSize_*blockSize_ > (LO)Values.extent(0)) {
1095 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
1096 "Tpetra::BlockCrsMatrix::getLocalRowCopy : Column and/or values array is not large enough to hold "
1097 << numInds <<
" row entries");
1099 const LO * colInds = indHost_.data() + ptrHost_(LocalRow);
1100 for (LO i=0; i<numInds; ++i) {
1101 Indices[i] = colInds[i];
1103 for (LO i=0; i<numInds*blockSize_*blockSize_; ++i) {
1104 Values[i] = vals[i];
1106 NumEntries = numInds;
1109 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
1110 template<
class Scalar,
class LO,
class GO,
class Node>
1114 const Teuchos::ArrayView<LO>& Indices,
1115 const Teuchos::ArrayView<Scalar>& Values,
1116 size_t &NumEntries)
const
1118 auto vals = getValuesHost(LocalRow);
1119 const LO numInds = ptrHost_(LocalRow+1) - ptrHost_(LocalRow);
1120 if (numInds > (LO)Indices.size() || numInds*blockSize_*blockSize_ > (LO)Values.size()) {
1121 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
1122 "Tpetra::BlockCrsMatrix::getLocalRowCopy : Column and/or values array is not large enough to hold "
1123 << numInds <<
" row entries");
1125 const LO * colInds = indHost_.data() + ptrHost_(LocalRow);
1126 for (LO i=0; i<numInds; ++i) {
1127 Indices[i] = colInds[i];
1129 for (LO i=0; i<numInds*blockSize_*blockSize_; ++i) {
1130 Values[i] = vals[i];
1132 NumEntries = numInds;
1136 template<
class Scalar,
class LO,
class GO,
class Node>
1140 ptrdiff_t offsets[],
1142 const LO numColInds)
const
1144 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1149 return static_cast<LO
> (0);
1152 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1156 for (LO k = 0; k < numColInds; ++k) {
1157 const LO relBlockOffset =
1158 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1159 if (relBlockOffset != LINV) {
1160 offsets[k] =
static_cast<ptrdiff_t
> (relBlockOffset);
1161 hint = relBlockOffset + 1;
1169 template<
class Scalar,
class LO,
class GO,
class Node>
1173 const ptrdiff_t offsets[],
1174 const Scalar vals[],
1175 const LO numOffsets)
const
1177 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1182 return static_cast<LO
> (0);
1186 auto val_out =
const_cast<this_type&
>(*this).getValuesHostNonConst(localRowInd);
1189 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1190 const ptrdiff_t STINV = Teuchos::OrdinalTraits<ptrdiff_t>::invalid ();
1191 size_t pointOffset = 0;
1194 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1195 const size_t blockOffset = offsets[k]*perBlockSize;
1196 if (offsets[k] != STINV) {
1199 COPY (A_new, A_old);
1207 template<
class Scalar,
class LO,
class GO,
class Node>
1211 const ptrdiff_t offsets[],
1212 const Scalar vals[],
1213 const LO numOffsets)
const
1215 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1220 return static_cast<LO
> (0);
1222 const impl_scalar_type*
const vIn =
reinterpret_cast<const impl_scalar_type*
> (vals);
1223 auto val_out = getValuesHost(localRowInd);
1224 impl_scalar_type* vOut =
const_cast<impl_scalar_type*
>(val_out.data());
1226 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1227 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1228 size_t pointOffset = 0;
1231 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1232 const size_t blockOffset = offsets[k]*perBlockSize;
1233 if (blockOffset != STINV) {
1234 little_block_type A_old = getNonConstLocalBlockFromInput (vOut, blockOffset);
1235 const_little_block_type A_new = getConstLocalBlockFromInput (vIn, pointOffset);
1244 template<
class Scalar,
class LO,
class GO,
class Node>
1248 const ptrdiff_t offsets[],
1249 const Scalar vals[],
1250 const LO numOffsets)
const
1252 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1257 return static_cast<LO
> (0);
1262 auto val_out =
const_cast<this_type&
>(*this).getValuesHostNonConst(localRowInd);
1265 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1266 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1267 size_t pointOffset = 0;
1270 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1271 const size_t blockOffset = offsets[k]*perBlockSize;
1272 if (blockOffset != STINV) {
1275 AXPY (ONE, A_new, A_old);
1282 template<
class Scalar,
class LO,
class GO,
class Node>
1284 impl_scalar_type_dualview::t_host::const_type
1288 return val_.getHostView(Access::ReadOnly);
1291 template<
class Scalar,
class LO,
class GO,
class Node>
1292 typename BlockCrsMatrix<Scalar, LO, GO, Node>::
1293 impl_scalar_type_dualview::t_dev::const_type
1294 BlockCrsMatrix<Scalar, LO, GO, Node>::
1295 getValuesDevice ()
const
1297 return val_.getDeviceView(Access::ReadOnly);
1300 template<
class Scalar,
class LO,
class GO,
class Node>
1301 typename BlockCrsMatrix<Scalar, LO, GO, Node>::
1302 impl_scalar_type_dualview::t_host
1306 return val_.getHostView(Access::ReadWrite);
1309 template<
class Scalar,
class LO,
class GO,
class Node>
1311 impl_scalar_type_dualview::t_dev
1315 return val_.getDeviceView(Access::ReadWrite);
1318 template<
class Scalar,
class LO,
class GO,
class Node>
1319 typename BlockCrsMatrix<Scalar, LO, GO, Node>::
1320 impl_scalar_type_dualview::t_host::const_type
1324 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1325 auto val = val_.getHostView(Access::ReadOnly);
1326 auto r_val = Kokkos::subview(val, Kokkos::pair<LO,LO>(ptrHost_(lclRow)*perBlockSize, ptrHost_(lclRow+1)*perBlockSize));
1330 template<
class Scalar,
class LO,
class GO,
class Node>
1332 impl_scalar_type_dualview::t_dev::const_type
1336 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1337 auto val = val_.getDeviceView(Access::ReadOnly);
1338 auto r_val = Kokkos::subview(val, Kokkos::pair<LO,LO>(ptrHost_(lclRow)*perBlockSize, ptrHost_(lclRow+1)*perBlockSize));
1342 template<
class Scalar,
class LO,
class GO,
class Node>
1347 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1348 auto val = val_.getHostView(Access::ReadWrite);
1349 auto r_val = Kokkos::subview(val, Kokkos::pair<LO,LO>(ptrHost_(lclRow)*perBlockSize, ptrHost_(lclRow+1)*perBlockSize));
1353 template<
class Scalar,
class LO,
class GO,
class Node>
1358 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1359 auto val = val_.getDeviceView(Access::ReadWrite);
1360 auto r_val = Kokkos::subview(val, Kokkos::pair<LO,LO>(ptrHost_(lclRow)*perBlockSize, ptrHost_(lclRow+1)*perBlockSize));
1364 template<
class Scalar,
class LO,
class GO,
class Node>
1369 const size_t numEntInGraph = graph_.getNumEntriesInLocalRow (localRowInd);
1370 if (numEntInGraph == Teuchos::OrdinalTraits<size_t>::invalid ()) {
1371 return static_cast<LO
> (0);
1373 return static_cast<LO
> (numEntInGraph);
1377 template<
class Scalar,
class LO,
class GO,
class Node>
1382 const Teuchos::ETransp mode,
1392 TEUCHOS_TEST_FOR_EXCEPTION(
1393 true, std::logic_error,
"Tpetra::BlockCrsMatrix::apply: "
1394 "transpose and conjugate transpose modes are not yet implemented.");
1397 template<
class Scalar,
class LO,
class GO,
class Node>
1399 BlockCrsMatrix<Scalar, LO, GO, Node>::
1400 applyBlockNoTrans (
const BlockMultiVector<Scalar, LO, GO, Node>& X,
1401 BlockMultiVector<Scalar, LO, GO, Node>& Y,
1407 typedef ::Tpetra::Import<LO, GO, Node> import_type;
1408 typedef ::Tpetra::Export<LO, GO, Node> export_type;
1409 const Scalar zero = STS::zero ();
1410 const Scalar one = STS::one ();
1411 RCP<const import_type>
import = graph_.getImporter ();
1413 RCP<const export_type> theExport = graph_.getExporter ();
1414 const char prefix[] =
"Tpetra::BlockCrsMatrix::applyBlockNoTrans: ";
1416 if (alpha == zero) {
1420 else if (beta != one) {
1425 const BMV* X_colMap = NULL;
1426 if (
import.is_null ()) {
1430 catch (std::exception& e) {
1431 TEUCHOS_TEST_FOR_EXCEPTION
1432 (
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1433 "operator= threw an exception: " << std::endl << e.what ());
1443 if ((*X_colMap_).is_null () ||
1444 (**X_colMap_).getNumVectors () != X.getNumVectors () ||
1445 (**X_colMap_).getBlockSize () != X.getBlockSize ()) {
1446 *X_colMap_ = rcp (
new BMV (* (graph_.getColMap ()), getBlockSize (),
1447 static_cast<LO
> (X.getNumVectors ())));
1449 (*X_colMap_)->getMultiVectorView().doImport (X.getMultiVectorView (),
1453 X_colMap = &(**X_colMap_);
1455 catch (std::exception& e) {
1456 TEUCHOS_TEST_FOR_EXCEPTION
1457 (
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1458 "operator= threw an exception: " << std::endl << e.what ());
1462 BMV* Y_rowMap = NULL;
1463 if (theExport.is_null ()) {
1466 else if ((*Y_rowMap_).is_null () ||
1467 (**Y_rowMap_).getNumVectors () != Y.getNumVectors () ||
1468 (**Y_rowMap_).getBlockSize () != Y.getBlockSize ()) {
1469 *Y_rowMap_ = rcp (
new BMV (* (graph_.getRowMap ()), getBlockSize (),
1470 static_cast<LO
> (X.getNumVectors ())));
1472 Y_rowMap = &(**Y_rowMap_);
1474 catch (std::exception& e) {
1475 TEUCHOS_TEST_FOR_EXCEPTION(
1476 true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1477 "operator= threw an exception: " << std::endl << e.what ());
1482 localApplyBlockNoTrans (*X_colMap, *Y_rowMap, alpha, beta);
1484 catch (std::exception& e) {
1485 TEUCHOS_TEST_FOR_EXCEPTION
1486 (
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1487 "an exception: " << e.what ());
1490 TEUCHOS_TEST_FOR_EXCEPTION
1491 (
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1492 "an exception not a subclass of std::exception.");
1495 if (! theExport.is_null ()) {
1501 template<
class Scalar,
class LO,
class GO,
class Node>
1503 BlockCrsMatrix<Scalar, LO, GO, Node>::
1504 localApplyBlockNoTrans (
const BlockMultiVector<Scalar, LO, GO, Node>& X,
1505 BlockMultiVector<Scalar, LO, GO, Node>& Y,
1509 using ::Tpetra::Impl::bcrsLocalApplyNoTrans;
1511 const impl_scalar_type alpha_impl = alpha;
1512 const auto graph = this->graph_.getLocalGraphDevice ();
1513 const impl_scalar_type beta_impl = beta;
1514 const LO blockSize = this->getBlockSize ();
1516 auto X_mv = X.getMultiVectorView ();
1517 auto Y_mv = Y.getMultiVectorView ();
1521 auto X_lcl = X_mv.getLocalViewDevice (Access::ReadOnly);
1522 auto Y_lcl = Y_mv.getLocalViewDevice (Access::ReadWrite);
1523 auto val = val_.getDeviceView(Access::ReadWrite);
1525 bcrsLocalApplyNoTrans (alpha_impl, graph, val, blockSize, X_lcl,
1529 template<
class Scalar,
class LO,
class GO,
class Node>
1531 BlockCrsMatrix<Scalar, LO, GO, Node>::
1532 findRelOffsetOfColumnIndex (
const LO localRowIndex,
1533 const LO colIndexToFind,
1534 const LO hint)
const
1536 const size_t absStartOffset = ptrHost_[localRowIndex];
1537 const size_t absEndOffset = ptrHost_[localRowIndex+1];
1538 const LO numEntriesInRow =
static_cast<LO
> (absEndOffset - absStartOffset);
1540 const LO*
const curInd = indHost_.data () + absStartOffset;
1543 if (hint < numEntriesInRow && curInd[hint] == colIndexToFind) {
1550 LO relOffset = Teuchos::OrdinalTraits<LO>::invalid ();
1555 const LO maxNumEntriesForLinearSearch = 32;
1556 if (numEntriesInRow > maxNumEntriesForLinearSearch) {
1561 const LO* beg = curInd;
1562 const LO* end = curInd + numEntriesInRow;
1563 std::pair<const LO*, const LO*> p =
1564 std::equal_range (beg, end, colIndexToFind);
1565 if (p.first != p.second) {
1567 relOffset =
static_cast<LO
> (p.first - beg);
1571 for (LO k = 0; k < numEntriesInRow; ++k) {
1572 if (colIndexToFind == curInd[k]) {
1582 template<
class Scalar,
class LO,
class GO,
class Node>
1584 BlockCrsMatrix<Scalar, LO, GO, Node>::
1585 offsetPerBlock ()
const
1587 return offsetPerBlock_;
1590 template<
class Scalar,
class LO,
class GO,
class Node>
1592 BlockCrsMatrix<Scalar, LO, GO, Node>::
1593 getConstLocalBlockFromInput (
const impl_scalar_type* val,
1594 const size_t pointOffset)
const
1597 const LO rowStride = blockSize_;
1598 return const_little_block_type (val + pointOffset, blockSize_, rowStride);
1601 template<
class Scalar,
class LO,
class GO,
class Node>
1603 BlockCrsMatrix<Scalar, LO, GO, Node>::
1604 getNonConstLocalBlockFromInput (impl_scalar_type* val,
1605 const size_t pointOffset)
const
1608 const LO rowStride = blockSize_;
1609 return little_block_type (val + pointOffset, blockSize_, rowStride);
1612 template<
class Scalar,
class LO,
class GO,
class Node>
1613 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_host_type
1614 BlockCrsMatrix<Scalar, LO, GO, Node>::
1615 getNonConstLocalBlockFromInputHost (impl_scalar_type* val,
1616 const size_t pointOffset)
const
1619 const LO rowStride = blockSize_;
1620 const size_t bs2 = blockSize_ * blockSize_;
1621 return little_block_host_type (val + bs2 * pointOffset, blockSize_, rowStride);
1624 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
1625 template<
class Scalar,
class LO,
class GO,
class Node>
1627 BlockCrsMatrix<Scalar, LO, GO, Node>::
1628 getLocalBlock (
const LO localRowInd,
const LO localColInd)
const
1630 using this_type = BlockCrsMatrix<Scalar, LO, GO, Node>;
1632 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1633 const LO relBlockOffset = this->findRelOffsetOfColumnIndex (localRowInd, localColInd);
1635 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid ()) {
1636 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1637 auto vals =
const_cast<this_type&
>(*this).getValuesDeviceNonConst();
1638 return getNonConstLocalBlockFromInput (vals.data(), absBlockOffset);
1641 return little_block_type ();
1646 template<
class Scalar,
class LO,
class GO,
class Node>
1648 BlockCrsMatrix<Scalar, LO, GO, Node>::
1649 getLocalBlockDeviceNonConst (
const LO localRowInd,
const LO localColInd)
const
1651 using this_type = BlockCrsMatrix<Scalar, LO, GO, Node>;
1653 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1654 const LO relBlockOffset = this->findRelOffsetOfColumnIndex (localRowInd, localColInd);
1655 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid ()) {
1656 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1657 auto vals =
const_cast<this_type&
>(*this).getValuesDeviceNonConst();
1658 auto r_val = getNonConstLocalBlockFromInput (vals.data(), absBlockOffset);
1662 return little_block_type ();
1666 template<
class Scalar,
class LO,
class GO,
class Node>
1667 typename BlockCrsMatrix<Scalar, LO, GO, Node>::little_block_host_type
1668 BlockCrsMatrix<Scalar, LO, GO, Node>::
1669 getLocalBlockHostNonConst (
const LO localRowInd,
const LO localColInd)
const
1671 using this_type = BlockCrsMatrix<Scalar, LO, GO, Node>;
1673 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1674 const LO relBlockOffset = this->findRelOffsetOfColumnIndex (localRowInd, localColInd);
1675 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid ()) {
1676 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1677 auto vals =
const_cast<this_type&
>(*this).getValuesHostNonConst();
1678 auto r_val = getNonConstLocalBlockFromInputHost (vals.data(), absBlockOffset);
1682 return little_block_host_type ();
1687 template<
class Scalar,
class LO,
class GO,
class Node>
1689 BlockCrsMatrix<Scalar, LO, GO, Node>::
1690 checkSizes (const ::Tpetra::SrcDistObject& source)
1693 typedef BlockCrsMatrix<Scalar, LO, GO, Node> this_type;
1694 const this_type* src =
dynamic_cast<const this_type*
> (&source);
1697 std::ostream& err = markLocalErrorAndGetStream ();
1698 err <<
"checkSizes: The source object of the Import or Export "
1699 "must be a BlockCrsMatrix with the same template parameters as the "
1700 "target object." << endl;
1705 if (src->getBlockSize () != this->getBlockSize ()) {
1706 std::ostream& err = markLocalErrorAndGetStream ();
1707 err <<
"checkSizes: The source and target objects of the Import or "
1708 <<
"Export must have the same block sizes. The source's block "
1709 <<
"size = " << src->getBlockSize () <<
" != the target's block "
1710 <<
"size = " << this->getBlockSize () <<
"." << endl;
1712 if (! src->graph_.isFillComplete ()) {
1713 std::ostream& err = markLocalErrorAndGetStream ();
1714 err <<
"checkSizes: The source object of the Import or Export is "
1715 "not fill complete. Both source and target objects must be fill "
1716 "complete." << endl;
1718 if (! this->graph_.isFillComplete ()) {
1719 std::ostream& err = markLocalErrorAndGetStream ();
1720 err <<
"checkSizes: The target object of the Import or Export is "
1721 "not fill complete. Both source and target objects must be fill "
1722 "complete." << endl;
1724 if (src->graph_.getColMap ().is_null ()) {
1725 std::ostream& err = markLocalErrorAndGetStream ();
1726 err <<
"checkSizes: The source object of the Import or Export does "
1727 "not have a column Map. Both source and target objects must have "
1728 "column Maps." << endl;
1730 if (this->graph_.getColMap ().is_null ()) {
1731 std::ostream& err = markLocalErrorAndGetStream ();
1732 err <<
"checkSizes: The target object of the Import or Export does "
1733 "not have a column Map. Both source and target objects must have "
1734 "column Maps." << endl;
1738 return ! (* (this->localError_));
1741 template<
class Scalar,
class LO,
class GO,
class Node>
1745 (const ::Tpetra::SrcDistObject& source,
1746 const size_t numSameIDs,
1753 using ::Tpetra::Details::Behavior;
1755 using ::Tpetra::Details::ProfilingRegion;
1759 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::copyAndPermute");
1760 const bool debug = Behavior::debug();
1761 const bool verbose = Behavior::verbose();
1766 std::ostringstream os;
1767 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1768 os <<
"Proc " << myRank <<
": BlockCrsMatrix::copyAndPermute : " << endl;
1773 if (* (this->localError_)) {
1774 std::ostream& err = this->markLocalErrorAndGetStream ();
1776 <<
"The target object of the Import or Export is already in an error state."
1785 std::ostringstream os;
1786 os << prefix << endl
1789 std::cerr << os.str ();
1795 if (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0)) {
1796 std::ostream& err = this->markLocalErrorAndGetStream ();
1798 <<
"permuteToLIDs.extent(0) = " << permuteToLIDs.extent (0)
1799 <<
" != permuteFromLIDs.extent(0) = " << permuteFromLIDs.extent(0)
1803 if (permuteToLIDs.need_sync_host () || permuteFromLIDs.need_sync_host ()) {
1804 std::ostream& err = this->markLocalErrorAndGetStream ();
1806 <<
"Both permuteToLIDs and permuteFromLIDs must be sync'd to host."
1813 std::ostream& err = this->markLocalErrorAndGetStream ();
1815 <<
"The source (input) object of the Import or "
1816 "Export is either not a BlockCrsMatrix, or does not have the right "
1817 "template parameters. checkSizes() should have caught this. "
1818 "Please report this bug to the Tpetra developers." << endl;
1822 bool lclErr =
false;
1823 #ifdef HAVE_TPETRA_DEBUG
1824 std::set<LO> invalidSrcCopyRows;
1825 std::set<LO> invalidDstCopyRows;
1826 std::set<LO> invalidDstCopyCols;
1827 std::set<LO> invalidDstPermuteCols;
1828 std::set<LO> invalidPermuteFromRows;
1838 #ifdef HAVE_TPETRA_DEBUG
1839 const map_type& srcRowMap = * (src->graph_.getRowMap ());
1841 const map_type& dstRowMap = * (this->graph_.getRowMap ());
1842 const map_type& srcColMap = * (src->graph_.getColMap ());
1843 const map_type& dstColMap = * (this->graph_.getColMap ());
1844 const bool canUseLocalColumnIndices = srcColMap.
locallySameAs (dstColMap);
1846 const size_t numPermute =
static_cast<size_t> (permuteFromLIDs.extent(0));
1848 std::ostringstream os;
1850 <<
"canUseLocalColumnIndices: "
1851 << (canUseLocalColumnIndices ?
"true" :
"false")
1852 <<
", numPermute: " << numPermute
1854 std::cerr << os.str ();
1857 const auto permuteToLIDsHost = permuteToLIDs.view_host();
1858 const auto permuteFromLIDsHost = permuteFromLIDs.view_host();
1860 if (canUseLocalColumnIndices) {
1862 for (LO localRow = 0; localRow < static_cast<LO> (numSameIDs); ++localRow) {
1863 #ifdef HAVE_TPETRA_DEBUG
1866 invalidSrcCopyRows.insert (localRow);
1871 local_inds_host_view_type lclSrcCols;
1872 values_host_view_type vals;
1876 src->getLocalRowView (localRow, lclSrcCols, vals); numEntries = lclSrcCols.extent(0);
1877 if (numEntries > 0) {
1878 LO err = this->replaceLocalValues (localRow, lclSrcCols.data(),
reinterpret_cast<const scalar_type*
>(vals.data()), numEntries);
1879 if (err != numEntries) {
1882 #ifdef HAVE_TPETRA_DEBUG
1883 invalidDstCopyRows.insert (localRow);
1892 for (LO k = 0; k < numEntries; ++k) {
1895 #ifdef HAVE_TPETRA_DEBUG
1896 (void) invalidDstCopyCols.insert (lclSrcCols[k]);
1906 for (
size_t k = 0; k < numPermute; ++k) {
1907 const LO srcLclRow =
static_cast<LO
> (permuteFromLIDsHost(k));
1908 const LO dstLclRow =
static_cast<LO
> (permuteToLIDsHost(k));
1910 local_inds_host_view_type lclSrcCols;
1911 values_host_view_type vals;
1913 src->getLocalRowView (srcLclRow, lclSrcCols, vals); numEntries = lclSrcCols.extent(0);
1914 if (numEntries > 0) {
1915 LO err = this->replaceLocalValues (dstLclRow, lclSrcCols.data(),
reinterpret_cast<const scalar_type*
>(vals.data()), numEntries);
1916 if (err != numEntries) {
1918 #ifdef HAVE_TPETRA_DEBUG
1919 for (LO c = 0; c < numEntries; ++c) {
1921 invalidDstPermuteCols.insert (lclSrcCols[c]);
1931 const size_t maxNumEnt = src->graph_.getNodeMaxNumRowEntries ();
1932 Teuchos::Array<LO> lclDstCols (maxNumEnt);
1935 for (LO localRow = 0; localRow < static_cast<LO> (numSameIDs); ++localRow) {
1936 local_inds_host_view_type lclSrcCols;
1937 values_host_view_type vals;
1943 src->getLocalRowView (localRow, lclSrcCols, vals); numEntries = lclSrcCols.extent(0);
1944 }
catch (std::exception& e) {
1946 std::ostringstream os;
1947 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1948 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
1949 << localRow <<
", src->getLocalRowView() threw an exception: "
1951 std::cerr << os.str ();
1956 if (numEntries > 0) {
1957 if (
static_cast<size_t> (numEntries) >
static_cast<size_t> (lclDstCols.size ())) {
1960 std::ostringstream os;
1961 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1962 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
1963 << localRow <<
", numEntries = " << numEntries <<
" > maxNumEnt = "
1964 << maxNumEnt << endl;
1965 std::cerr << os.str ();
1971 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries);
1972 for (LO j = 0; j < numEntries; ++j) {
1974 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid ()) {
1976 #ifdef HAVE_TPETRA_DEBUG
1977 invalidDstCopyCols.insert (lclDstColsView[j]);
1983 err = this->replaceLocalValues (localRow, lclDstColsView.getRawPtr (),
reinterpret_cast<const scalar_type*
>(vals.data()), numEntries);
1984 }
catch (std::exception& e) {
1986 std::ostringstream os;
1987 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
1988 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
1989 << localRow <<
", this->replaceLocalValues() threw an exception: "
1991 std::cerr << os.str ();
1995 if (err != numEntries) {
1998 std::ostringstream os;
1999 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2000 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" "
2001 "localRow " << localRow <<
", this->replaceLocalValues "
2002 "returned " << err <<
" instead of numEntries = "
2003 << numEntries << endl;
2004 std::cerr << os.str ();
2012 for (
size_t k = 0; k < numPermute; ++k) {
2013 const LO srcLclRow =
static_cast<LO
> (permuteFromLIDsHost(k));
2014 const LO dstLclRow =
static_cast<LO
> (permuteToLIDsHost(k));
2016 local_inds_host_view_type lclSrcCols;
2017 values_host_view_type vals;
2021 src->getLocalRowView (srcLclRow, lclSrcCols, vals); numEntries = lclSrcCols.extent(0);
2022 }
catch (std::exception& e) {
2024 std::ostringstream os;
2025 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2026 os <<
"Proc " << myRank <<
": copyAndPermute: At \"permute\" "
2027 "srcLclRow " << srcLclRow <<
" and dstLclRow " << dstLclRow
2028 <<
", src->getLocalRowView() threw an exception: " << e.what ();
2029 std::cerr << os.str ();
2034 if (numEntries > 0) {
2035 if (
static_cast<size_t> (numEntries) >
static_cast<size_t> (lclDstCols.size ())) {
2041 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries);
2042 for (LO j = 0; j < numEntries; ++j) {
2044 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid ()) {
2046 #ifdef HAVE_TPETRA_DEBUG
2047 invalidDstPermuteCols.insert (lclDstColsView[j]);
2051 LO err = this->replaceLocalValues (dstLclRow, lclDstColsView.getRawPtr (),
reinterpret_cast<const scalar_type*
>(vals.data()), numEntries);
2052 if (err != numEntries) {
2061 std::ostream& err = this->markLocalErrorAndGetStream ();
2062 #ifdef HAVE_TPETRA_DEBUG
2063 err <<
"copyAndPermute: The graph structure of the source of the "
2064 "Import or Export must be a subset of the graph structure of the "
2066 err <<
"invalidSrcCopyRows = [";
2067 for (
typename std::set<LO>::const_iterator it = invalidSrcCopyRows.begin ();
2068 it != invalidSrcCopyRows.end (); ++it) {
2070 typename std::set<LO>::const_iterator itp1 = it;
2072 if (itp1 != invalidSrcCopyRows.end ()) {
2076 err <<
"], invalidDstCopyRows = [";
2077 for (
typename std::set<LO>::const_iterator it = invalidDstCopyRows.begin ();
2078 it != invalidDstCopyRows.end (); ++it) {
2080 typename std::set<LO>::const_iterator itp1 = it;
2082 if (itp1 != invalidDstCopyRows.end ()) {
2086 err <<
"], invalidDstCopyCols = [";
2087 for (
typename std::set<LO>::const_iterator it = invalidDstCopyCols.begin ();
2088 it != invalidDstCopyCols.end (); ++it) {
2090 typename std::set<LO>::const_iterator itp1 = it;
2092 if (itp1 != invalidDstCopyCols.end ()) {
2096 err <<
"], invalidDstPermuteCols = [";
2097 for (
typename std::set<LO>::const_iterator it = invalidDstPermuteCols.begin ();
2098 it != invalidDstPermuteCols.end (); ++it) {
2100 typename std::set<LO>::const_iterator itp1 = it;
2102 if (itp1 != invalidDstPermuteCols.end ()) {
2106 err <<
"], invalidPermuteFromRows = [";
2107 for (
typename std::set<LO>::const_iterator it = invalidPermuteFromRows.begin ();
2108 it != invalidPermuteFromRows.end (); ++it) {
2110 typename std::set<LO>::const_iterator itp1 = it;
2112 if (itp1 != invalidPermuteFromRows.end ()) {
2118 err <<
"copyAndPermute: The graph structure of the source of the "
2119 "Import or Export must be a subset of the graph structure of the "
2125 std::ostringstream os;
2126 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2127 const bool lclSuccess = ! (* (this->localError_));
2128 os <<
"*** Proc " << myRank <<
": copyAndPermute "
2129 << (lclSuccess ?
"succeeded" :
"FAILED");
2133 os <<
": error messages: " << this->errorMessages ();
2135 std::cerr << os.str ();
2159 template<
class LO,
class GO>
2161 packRowCount (
const size_t numEnt,
2162 const size_t numBytesPerValue,
2163 const size_t blkSize)
2165 using ::Tpetra::Details::PackTraits;
2176 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
2177 const size_t valsLen = numEnt * numBytesPerValue * blkSize * blkSize;
2178 return numEntLen + gidsLen + valsLen;
2192 template<
class ST,
class LO,
class GO>
2194 unpackRowCount (
const typename ::Tpetra::Details::PackTraits<LO>::input_buffer_type& imports,
2195 const size_t offset,
2196 const size_t numBytes,
2199 using Kokkos::subview;
2200 using ::Tpetra::Details::PackTraits;
2202 if (numBytes == 0) {
2204 return static_cast<size_t> (0);
2209 TEUCHOS_TEST_FOR_EXCEPTION
2210 (theNumBytes > numBytes, std::logic_error,
"unpackRowCount: "
2211 "theNumBytes = " << theNumBytes <<
" < numBytes = " << numBytes
2213 const char*
const inBuf = imports.data () + offset;
2215 TEUCHOS_TEST_FOR_EXCEPTION
2216 (actualNumBytes > numBytes, std::logic_error,
"unpackRowCount: "
2217 "actualNumBytes = " << actualNumBytes <<
" < numBytes = " << numBytes
2219 return static_cast<size_t> (numEntLO);
2226 template<
class ST,
class LO,
class GO>
2228 packRowForBlockCrs (
const typename ::Tpetra::Details::PackTraits<LO>::output_buffer_type exports,
2229 const size_t offset,
2230 const size_t numEnt,
2231 const typename ::Tpetra::Details::PackTraits<GO>::input_array_type& gidsIn,
2232 const typename ::Tpetra::Details::PackTraits<ST>::input_array_type& valsIn,
2233 const size_t numBytesPerValue,
2234 const size_t blockSize)
2236 using Kokkos::subview;
2237 using ::Tpetra::Details::PackTraits;
2243 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2246 const LO numEntLO =
static_cast<size_t> (numEnt);
2248 const size_t numEntBeg = offset;
2250 const size_t gidsBeg = numEntBeg + numEntLen;
2251 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
2252 const size_t valsBeg = gidsBeg + gidsLen;
2253 const size_t valsLen = numScalarEnt * numBytesPerValue;
2255 char*
const numEntOut = exports.data () + numEntBeg;
2256 char*
const gidsOut = exports.data () + gidsBeg;
2257 char*
const valsOut = exports.data () + valsBeg;
2259 size_t numBytesOut = 0;
2264 Kokkos::pair<int, size_t> p;
2265 p = PackTraits<GO>::packArray (gidsOut, gidsIn.data (), numEnt);
2266 errorCode += p.first;
2267 numBytesOut += p.second;
2269 p = PackTraits<ST>::packArray (valsOut, valsIn.data (), numScalarEnt);
2270 errorCode += p.first;
2271 numBytesOut += p.second;
2274 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
2275 TEUCHOS_TEST_FOR_EXCEPTION
2276 (numBytesOut != expectedNumBytes, std::logic_error,
2277 "packRowForBlockCrs: numBytesOut = " << numBytesOut
2278 <<
" != expectedNumBytes = " << expectedNumBytes <<
".");
2280 TEUCHOS_TEST_FOR_EXCEPTION
2281 (errorCode != 0, std::runtime_error,
"packRowForBlockCrs: "
2282 "PackTraits::packArray returned a nonzero error code");
2288 template<
class ST,
class LO,
class GO>
2290 unpackRowForBlockCrs (
const typename ::Tpetra::Details::PackTraits<GO>::output_array_type& gidsOut,
2291 const typename ::Tpetra::Details::PackTraits<ST>::output_array_type& valsOut,
2292 const typename ::Tpetra::Details::PackTraits<int>::input_buffer_type& imports,
2293 const size_t offset,
2294 const size_t numBytes,
2295 const size_t numEnt,
2296 const size_t numBytesPerValue,
2297 const size_t blockSize)
2299 using ::Tpetra::Details::PackTraits;
2301 if (numBytes == 0) {
2305 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2306 TEUCHOS_TEST_FOR_EXCEPTION
2307 (
static_cast<size_t> (imports.extent (0)) <= offset,
2308 std::logic_error,
"unpackRowForBlockCrs: imports.extent(0) = "
2309 << imports.extent (0) <<
" <= offset = " << offset <<
".");
2310 TEUCHOS_TEST_FOR_EXCEPTION
2311 (
static_cast<size_t> (imports.extent (0)) < offset + numBytes,
2312 std::logic_error,
"unpackRowForBlockCrs: imports.extent(0) = "
2313 << imports.extent (0) <<
" < offset + numBytes = "
2314 << (offset + numBytes) <<
".");
2319 const size_t numEntBeg = offset;
2321 const size_t gidsBeg = numEntBeg + numEntLen;
2322 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
2323 const size_t valsBeg = gidsBeg + gidsLen;
2324 const size_t valsLen = numScalarEnt * numBytesPerValue;
2326 const char*
const numEntIn = imports.data () + numEntBeg;
2327 const char*
const gidsIn = imports.data () + gidsBeg;
2328 const char*
const valsIn = imports.data () + valsBeg;
2330 size_t numBytesOut = 0;
2334 TEUCHOS_TEST_FOR_EXCEPTION
2335 (
static_cast<size_t> (numEntOut) != numEnt, std::logic_error,
2336 "unpackRowForBlockCrs: Expected number of entries " << numEnt
2337 <<
" != actual number of entries " << numEntOut <<
".");
2340 Kokkos::pair<int, size_t> p;
2341 p = PackTraits<GO>::unpackArray (gidsOut.data (), gidsIn, numEnt);
2342 errorCode += p.first;
2343 numBytesOut += p.second;
2345 p = PackTraits<ST>::unpackArray (valsOut.data (), valsIn, numScalarEnt);
2346 errorCode += p.first;
2347 numBytesOut += p.second;
2350 TEUCHOS_TEST_FOR_EXCEPTION
2351 (numBytesOut != numBytes, std::logic_error,
2352 "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
2353 <<
" != numBytes = " << numBytes <<
".");
2355 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
2356 TEUCHOS_TEST_FOR_EXCEPTION
2357 (numBytesOut != expectedNumBytes, std::logic_error,
2358 "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
2359 <<
" != expectedNumBytes = " << expectedNumBytes <<
".");
2361 TEUCHOS_TEST_FOR_EXCEPTION
2362 (errorCode != 0, std::runtime_error,
"unpackRowForBlockCrs: "
2363 "PackTraits::unpackArray returned a nonzero error code");
2369 template<
class Scalar,
class LO,
class GO,
class Node>
2373 (const ::Tpetra::SrcDistObject& source,
2378 Kokkos::DualView<
size_t*,
2380 size_t& constantNumPackets)
2382 using ::Tpetra::Details::Behavior;
2384 using ::Tpetra::Details::ProfilingRegion;
2385 using ::Tpetra::Details::PackTraits;
2387 typedef typename Kokkos::View<int*, device_type>::HostMirror::execution_space host_exec;
2391 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::packAndPrepare");
2393 const bool debug = Behavior::debug();
2394 const bool verbose = Behavior::verbose();
2399 std::ostringstream os;
2400 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2401 os <<
"Proc " << myRank <<
": BlockCrsMatrix::packAndPrepare: " << std::endl;
2406 if (* (this->localError_)) {
2407 std::ostream& err = this->markLocalErrorAndGetStream ();
2409 <<
"The target object of the Import or Export is already in an error state."
2418 std::ostringstream os;
2419 os << prefix << std::endl
2423 std::cerr << os.str ();
2429 if (exportLIDs.extent (0) != numPacketsPerLID.extent (0)) {
2430 std::ostream& err = this->markLocalErrorAndGetStream ();
2432 <<
"exportLIDs.extent(0) = " << exportLIDs.extent (0)
2433 <<
" != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0)
2434 <<
"." << std::endl;
2437 if (exportLIDs.need_sync_host ()) {
2438 std::ostream& err = this->markLocalErrorAndGetStream ();
2439 err << prefix <<
"exportLIDs be sync'd to host." << std::endl;
2445 std::ostream& err = this->markLocalErrorAndGetStream ();
2447 <<
"The source (input) object of the Import or "
2448 "Export is either not a BlockCrsMatrix, or does not have the right "
2449 "template parameters. checkSizes() should have caught this. "
2450 "Please report this bug to the Tpetra developers." << std::endl;
2462 constantNumPackets = 0;
2466 const size_t blockSize =
static_cast<size_t> (src->getBlockSize ());
2467 const size_t numExportLIDs = exportLIDs.extent (0);
2468 size_t numBytesPerValue(0);
2470 auto val_host = val_.getHostView(Access::ReadOnly);
2472 PackTraits<impl_scalar_type>
2480 Impl::BlockCrsRowStruct<size_t> rowReducerStruct;
2483 auto exportLIDsHost = exportLIDs.view_host();
2484 auto numPacketsPerLIDHost = numPacketsPerLID.view_host();
2485 numPacketsPerLID.modify_host ();
2487 using reducer_type = Impl::BlockCrsReducer<Impl::BlockCrsRowStruct<size_t>,host_exec>;
2488 const auto policy = Kokkos::RangePolicy<host_exec>(
size_t(0), numExportLIDs);
2489 Kokkos::parallel_reduce
2491 [=](
const size_t &i,
typename reducer_type::value_type &update) {
2492 const LO lclRow = exportLIDsHost(i);
2494 numEnt = (numEnt == Teuchos::OrdinalTraits<size_t>::invalid () ? 0 : numEnt);
2496 const size_t numBytes =
2497 packRowCount<LO, GO> (numEnt, numBytesPerValue, blockSize);
2498 numPacketsPerLIDHost(i) = numBytes;
2499 update +=
typename reducer_type::value_type(numEnt, numBytes, numEnt);
2500 }, rowReducerStruct);
2506 const size_t totalNumBytes = rowReducerStruct.totalNumBytes;
2507 const size_t totalNumEntries = rowReducerStruct.totalNumEntries;
2508 const size_t maxRowLength = rowReducerStruct.maxRowLength;
2511 std::ostringstream os;
2513 <<
"totalNumBytes = " << totalNumBytes <<
", totalNumEntries = " << totalNumEntries
2515 std::cerr << os.str ();
2521 if(exports.extent(0) != totalNumBytes)
2523 const std::string oldLabel = exports.d_view.label ();
2524 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
2525 exports = Kokkos::DualView<packet_type*, buffer_device_type>(newLabel, totalNumBytes);
2527 if (totalNumEntries > 0) {
2529 Kokkos::View<size_t*, host_exec> offset(
"offset", numExportLIDs+1);
2531 const auto policy = Kokkos::RangePolicy<host_exec>(
size_t(0), numExportLIDs+1);
2532 Kokkos::parallel_scan
2534 [=](
const size_t &i,
size_t &update,
const bool &
final) {
2535 if (
final) offset(i) = update;
2536 update += (i == numExportLIDs ? 0 : numPacketsPerLIDHost(i));
2539 if (offset(numExportLIDs) != totalNumBytes) {
2540 std::ostream& err = this->markLocalErrorAndGetStream ();
2542 <<
"At end of method, the final offset (in bytes) "
2543 << offset(numExportLIDs) <<
" does not equal the total number of bytes packed "
2544 << totalNumBytes <<
". "
2545 <<
"Please report this bug to the Tpetra developers." << std::endl;
2559 exports.modify_host();
2561 typedef Kokkos::TeamPolicy<host_exec> policy_type;
2563 policy_type(numExportLIDs, 1, 1)
2564 .set_scratch_size(0, Kokkos::PerTeam(
sizeof(GO)*maxRowLength));
2565 Kokkos::parallel_for
2567 [=](
const typename policy_type::member_type &member) {
2568 const size_t i = member.league_rank();
2569 Kokkos::View<GO*, typename host_exec::scratch_memory_space>
2570 gblColInds(member.team_scratch(0), maxRowLength);
2572 const LO lclRowInd = exportLIDsHost(i);
2573 local_inds_host_view_type lclColInds;
2574 values_host_view_type vals;
2579 src->getLocalRowView (lclRowInd, lclColInds, vals);
2580 const size_t numEnt = lclColInds.extent(0);
2583 for (
size_t j = 0; j < numEnt; ++j)
2590 const size_t numBytes =
2591 packRowForBlockCrs<impl_scalar_type, LO, GO>
2592 (exports.view_host(),
2595 Kokkos::View<const GO*, host_exec>(gblColInds.data(), maxRowLength),
2602 const size_t offsetDiff = offset(i+1) - offset(i);
2603 if (numBytes != offsetDiff) {
2604 std::ostringstream os;
2606 <<
"numBytes computed from packRowForBlockCrs is different from "
2607 <<
"precomputed offset values, LID = " << i << std::endl;
2608 std::cerr << os.str ();
2616 std::ostringstream os;
2617 const bool lclSuccess = ! (* (this->localError_));
2619 << (lclSuccess ?
"succeeded" :
"FAILED")
2621 std::cerr << os.str ();
2625 template<
class Scalar,
class LO,
class GO,
class Node>
2633 Kokkos::DualView<
size_t*,
2638 using ::Tpetra::Details::Behavior;
2640 using ::Tpetra::Details::ProfilingRegion;
2641 using ::Tpetra::Details::PackTraits;
2644 typename Kokkos::View<int*, device_type>::HostMirror::execution_space;
2646 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::unpackAndCombine");
2647 const bool verbose = Behavior::verbose ();
2652 std::ostringstream os;
2653 auto map = this->graph_.getRowMap ();
2654 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
2655 const int myRank = comm.is_null () ? -1 : comm->getRank ();
2656 os <<
"Proc " << myRank <<
": Tpetra::BlockCrsMatrix::unpackAndCombine: ";
2659 os <<
"Start" << endl;
2660 std::cerr << os.str ();
2665 if (* (this->localError_)) {
2666 std::ostream& err = this->markLocalErrorAndGetStream ();
2667 std::ostringstream os;
2668 os << prefix <<
"{Im/Ex}port target is already in error." << endl;
2670 std::cerr << os.str ();
2679 if (importLIDs.extent (0) != numPacketsPerLID.extent (0)) {
2680 std::ostream& err = this->markLocalErrorAndGetStream ();
2681 std::ostringstream os;
2682 os << prefix <<
"importLIDs.extent(0) = " << importLIDs.extent (0)
2683 <<
" != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0)
2686 std::cerr << os.str ();
2692 if (combineMode !=
ADD && combineMode !=
INSERT &&
2694 combineMode !=
ZERO) {
2695 std::ostream& err = this->markLocalErrorAndGetStream ();
2696 std::ostringstream os;
2698 <<
"Invalid CombineMode value " << combineMode <<
". Valid "
2699 <<
"values include ADD, INSERT, REPLACE, ABSMAX, and ZERO."
2702 std::cerr << os.str ();
2708 if (this->graph_.getColMap ().is_null ()) {
2709 std::ostream& err = this->markLocalErrorAndGetStream ();
2710 std::ostringstream os;
2711 os << prefix <<
"Target matrix's column Map is null." << endl;
2713 std::cerr << os.str ();
2722 const map_type& tgtColMap = * (this->graph_.getColMap ());
2725 const size_t blockSize = this->getBlockSize ();
2726 const size_t numImportLIDs = importLIDs.extent(0);
2733 size_t numBytesPerValue(0);
2735 auto val_host = val_.getHostView(Access::ReadOnly);
2737 PackTraits<impl_scalar_type>::packValueCount
2740 const size_t maxRowNumEnt = graph_.getNodeMaxNumRowEntries ();
2741 const size_t maxRowNumScalarEnt = maxRowNumEnt * blockSize * blockSize;
2744 std::ostringstream os;
2745 os << prefix <<
"combineMode: "
2746 << ::Tpetra::combineModeToString (combineMode)
2747 <<
", blockSize: " << blockSize
2748 <<
", numImportLIDs: " << numImportLIDs
2749 <<
", numBytesPerValue: " << numBytesPerValue
2750 <<
", maxRowNumEnt: " << maxRowNumEnt
2751 <<
", maxRowNumScalarEnt: " << maxRowNumScalarEnt << endl;
2752 std::cerr << os.str ();
2755 if (combineMode ==
ZERO || numImportLIDs == 0) {
2757 std::ostringstream os;
2758 os << prefix <<
"Nothing to unpack. Done!" << std::endl;
2759 std::cerr << os.str ();
2766 if (imports.need_sync_host ()) {
2767 imports.sync_host ();
2777 if (imports.need_sync_host () || numPacketsPerLID.need_sync_host () ||
2778 importLIDs.need_sync_host ()) {
2779 std::ostream& err = this->markLocalErrorAndGetStream ();
2780 std::ostringstream os;
2781 os << prefix <<
"All input DualViews must be sync'd to host by now. "
2782 <<
"imports_nc.need_sync_host()="
2783 << (imports.need_sync_host () ?
"true" :
"false")
2784 <<
", numPacketsPerLID.need_sync_host()="
2785 << (numPacketsPerLID.need_sync_host () ?
"true" :
"false")
2786 <<
", importLIDs.need_sync_host()="
2787 << (importLIDs.need_sync_host () ?
"true" :
"false")
2790 std::cerr << os.str ();
2796 const auto importLIDsHost = importLIDs.view_host ();
2797 const auto numPacketsPerLIDHost = numPacketsPerLID.view_host ();
2803 Kokkos::View<size_t*, host_exec> offset (
"offset", numImportLIDs+1);
2805 const auto policy = Kokkos::RangePolicy<host_exec>(
size_t(0), numImportLIDs+1);
2806 Kokkos::parallel_scan
2807 (
"Tpetra::BlockCrsMatrix::unpackAndCombine: offsets", policy,
2808 [=] (
const size_t &i,
size_t &update,
const bool &
final) {
2809 if (
final) offset(i) = update;
2810 update += (i == numImportLIDs ? 0 : numPacketsPerLIDHost(i));
2820 Kokkos::View<int, host_exec, Kokkos::MemoryTraits<Kokkos::Atomic> >
2821 errorDuringUnpack (
"errorDuringUnpack");
2822 errorDuringUnpack () = 0;
2824 using policy_type = Kokkos::TeamPolicy<host_exec>;
2825 const auto policy = policy_type (numImportLIDs, 1, 1)
2826 .set_scratch_size (0, Kokkos::PerTeam (
sizeof (GO) * maxRowNumEnt +
2827 sizeof (LO) * maxRowNumEnt +
2828 numBytesPerValue * maxRowNumScalarEnt));
2829 using host_scratch_space =
typename host_exec::scratch_memory_space;
2830 using pair_type = Kokkos::pair<size_t, size_t>;
2831 Kokkos::parallel_for
2832 (
"Tpetra::BlockCrsMatrix::unpackAndCombine: unpack", policy,
2833 [=] (
const typename policy_type::member_type& member) {
2834 const size_t i = member.league_rank();
2836 Kokkos::View<GO*, host_scratch_space> gblColInds
2837 (member.team_scratch (0), maxRowNumEnt);
2838 Kokkos::View<LO*, host_scratch_space> lclColInds
2839 (member.team_scratch (0), maxRowNumEnt);
2840 Kokkos::View<impl_scalar_type*, host_scratch_space> vals
2841 (member.team_scratch (0), maxRowNumScalarEnt);
2843 const size_t offval = offset(i);
2844 const LO lclRow = importLIDsHost(i);
2845 const size_t numBytes = numPacketsPerLIDHost(i);
2846 const size_t numEnt =
2847 unpackRowCount<impl_scalar_type, LO, GO>
2848 (imports.view_host (), offval, numBytes, numBytesPerValue);
2851 if (numEnt > maxRowNumEnt) {
2852 errorDuringUnpack() = 1;
2854 std::ostringstream os;
2856 <<
"At i = " << i <<
", numEnt = " << numEnt
2857 <<
" > maxRowNumEnt = " << maxRowNumEnt
2859 std::cerr << os.str ();
2864 const size_t numScalarEnt = numEnt*blockSize*blockSize;
2865 auto gidsOut = Kokkos::subview(gblColInds, pair_type(0, numEnt));
2866 auto lidsOut = Kokkos::subview(lclColInds, pair_type(0, numEnt));
2867 auto valsOut = Kokkos::subview(vals, pair_type(0, numScalarEnt));
2872 size_t numBytesOut = 0;
2875 unpackRowForBlockCrs<impl_scalar_type, LO, GO>
2876 (Kokkos::View<GO*,host_exec>(gidsOut.data(), numEnt),
2877 Kokkos::View<impl_scalar_type*,host_exec>(valsOut.data(), numScalarEnt),
2878 imports.view_host(),
2879 offval, numBytes, numEnt,
2880 numBytesPerValue, blockSize);
2882 catch (std::exception& e) {
2883 errorDuringUnpack() = 1;
2885 std::ostringstream os;
2886 os << prefix <<
"At i = " << i <<
", unpackRowForBlockCrs threw: "
2887 << e.what () << endl;
2888 std::cerr << os.str ();
2893 if (numBytes != numBytesOut) {
2894 errorDuringUnpack() = 1;
2896 std::ostringstream os;
2898 <<
"At i = " << i <<
", numBytes = " << numBytes
2899 <<
" != numBytesOut = " << numBytesOut <<
"."
2901 std::cerr << os.str();
2907 for (
size_t k = 0; k < numEnt; ++k) {
2909 if (lidsOut(k) == Teuchos::OrdinalTraits<LO>::invalid ()) {
2911 std::ostringstream os;
2913 <<
"At i = " << i <<
", GID " << gidsOut(k)
2914 <<
" is not owned by the calling process."
2916 std::cerr << os.str();
2924 const LO*
const lidsRaw =
const_cast<const LO*
> (lidsOut.data ());
2925 const Scalar*
const valsRaw =
reinterpret_cast<const Scalar*
>
2927 if (combineMode ==
ADD) {
2928 numCombd = this->sumIntoLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
2929 }
else if (combineMode ==
INSERT || combineMode ==
REPLACE) {
2930 numCombd = this->replaceLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
2931 }
else if (combineMode ==
ABSMAX) {
2932 numCombd = this->absMaxLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
2935 if (
static_cast<LO
> (numEnt) != numCombd) {
2936 errorDuringUnpack() = 1;
2938 std::ostringstream os;
2939 os << prefix <<
"At i = " << i <<
", numEnt = " << numEnt
2940 <<
" != numCombd = " << numCombd <<
"."
2942 std::cerr << os.str ();
2949 if (errorDuringUnpack () != 0) {
2950 std::ostream& err = this->markLocalErrorAndGetStream ();
2951 err << prefix <<
"Unpacking failed.";
2953 err <<
" Please run again with the environment variable setting "
2954 "TPETRA_VERBOSE=1 to get more verbose diagnostic output.";
2960 std::ostringstream os;
2961 const bool lclSuccess = ! (* (this->localError_));
2963 << (lclSuccess ?
"succeeded" :
"FAILED")
2965 std::cerr << os.str ();
2969 template<
class Scalar,
class LO,
class GO,
class Node>
2973 using Teuchos::TypeNameTraits;
2974 std::ostringstream os;
2975 os <<
"\"Tpetra::BlockCrsMatrix\": { "
2976 <<
"Template parameters: { "
2977 <<
"Scalar: " << TypeNameTraits<Scalar>::name ()
2978 <<
"LO: " << TypeNameTraits<LO>::name ()
2979 <<
"GO: " << TypeNameTraits<GO>::name ()
2980 <<
"Node: " << TypeNameTraits<Node>::name ()
2982 <<
", Label: \"" << this->getObjectLabel () <<
"\""
2983 <<
", Global dimensions: ["
2984 << graph_.getDomainMap ()->getGlobalNumElements () <<
", "
2985 << graph_.getRangeMap ()->getGlobalNumElements () <<
"]"
2986 <<
", Block size: " << getBlockSize ()
2992 template<
class Scalar,
class LO,
class GO,
class Node>
2995 describe (Teuchos::FancyOStream& out,
2996 const Teuchos::EVerbosityLevel verbLevel)
const
2998 using Teuchos::ArrayRCP;
2999 using Teuchos::CommRequest;
3000 using Teuchos::FancyOStream;
3001 using Teuchos::getFancyOStream;
3002 using Teuchos::ireceive;
3003 using Teuchos::isend;
3004 using Teuchos::outArg;
3005 using Teuchos::TypeNameTraits;
3006 using Teuchos::VERB_DEFAULT;
3007 using Teuchos::VERB_NONE;
3008 using Teuchos::VERB_LOW;
3009 using Teuchos::VERB_MEDIUM;
3011 using Teuchos::VERB_EXTREME;
3013 using Teuchos::wait;
3017 const Teuchos::EVerbosityLevel vl =
3018 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
3020 if (vl == VERB_NONE) {
3025 Teuchos::OSTab tab0 (out);
3027 out <<
"\"Tpetra::BlockCrsMatrix\":" << endl;
3028 Teuchos::OSTab tab1 (out);
3030 out <<
"Template parameters:" << endl;
3032 Teuchos::OSTab tab2 (out);
3033 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
3034 <<
"LO: " << TypeNameTraits<LO>::name () << endl
3035 <<
"GO: " << TypeNameTraits<GO>::name () << endl
3036 <<
"Node: " << TypeNameTraits<Node>::name () << endl;
3038 out <<
"Label: \"" << this->getObjectLabel () <<
"\"" << endl
3039 <<
"Global dimensions: ["
3040 << graph_.getDomainMap ()->getGlobalNumElements () <<
", "
3041 << graph_.getRangeMap ()->getGlobalNumElements () <<
"]" << endl;
3043 const LO blockSize = getBlockSize ();
3044 out <<
"Block size: " << blockSize << endl;
3047 if (vl >= VERB_MEDIUM) {
3048 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ());
3049 const int myRank = comm.getRank ();
3051 out <<
"Row Map:" << endl;
3053 getRowMap()->describe (out, vl);
3055 if (! getColMap ().is_null ()) {
3056 if (getColMap() == getRowMap()) {
3058 out <<
"Column Map: same as row Map" << endl;
3063 out <<
"Column Map:" << endl;
3065 getColMap ()->describe (out, vl);
3068 if (! getDomainMap ().is_null ()) {
3069 if (getDomainMap () == getRowMap ()) {
3071 out <<
"Domain Map: same as row Map" << endl;
3074 else if (getDomainMap () == getColMap ()) {
3076 out <<
"Domain Map: same as column Map" << endl;
3081 out <<
"Domain Map:" << endl;
3083 getDomainMap ()->describe (out, vl);
3086 if (! getRangeMap ().is_null ()) {
3087 if (getRangeMap () == getDomainMap ()) {
3089 out <<
"Range Map: same as domain Map" << endl;
3092 else if (getRangeMap () == getRowMap ()) {
3094 out <<
"Range Map: same as row Map" << endl;
3099 out <<
"Range Map: " << endl;
3101 getRangeMap ()->describe (out, vl);
3106 if (vl >= VERB_EXTREME) {
3107 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ());
3108 const int myRank = comm.getRank ();
3109 const int numProcs = comm.getSize ();
3112 RCP<std::ostringstream> lclOutStrPtr (
new std::ostringstream ());
3113 RCP<FancyOStream> osPtr = getFancyOStream (lclOutStrPtr);
3114 FancyOStream& os = *osPtr;
3115 os <<
"Process " << myRank <<
":" << endl;
3116 Teuchos::OSTab tab2 (os);
3118 const map_type& meshRowMap = * (graph_.getRowMap ());
3119 const map_type& meshColMap = * (graph_.getColMap ());
3124 os <<
"Row " << meshGblRow <<
": {";
3126 local_inds_host_view_type lclColInds;
3127 values_host_view_type vals;
3129 this->getLocalRowView (meshLclRow, lclColInds, vals); numInds = lclColInds.extent(0);
3131 for (LO k = 0; k < numInds; ++k) {
3134 os <<
"Col " << gblCol <<
": [";
3135 for (LO i = 0; i < blockSize; ++i) {
3136 for (LO j = 0; j < blockSize; ++j) {
3137 os << vals[blockSize*blockSize*k + i*blockSize + j];
3138 if (j + 1 < blockSize) {
3142 if (i + 1 < blockSize) {
3147 if (k + 1 < numInds) {
3157 out << lclOutStrPtr->str ();
3158 lclOutStrPtr = Teuchos::null;
3161 const int sizeTag = 1337;
3162 const int dataTag = 1338;
3164 ArrayRCP<char> recvDataBuf;
3168 for (
int p = 1; p < numProcs; ++p) {
3171 ArrayRCP<size_t> recvSize (1);
3173 RCP<CommRequest<int> > recvSizeReq =
3174 ireceive<int, size_t> (recvSize, p, sizeTag, comm);
3175 wait<int> (comm, outArg (recvSizeReq));
3176 const size_t numCharsToRecv = recvSize[0];
3183 if (
static_cast<size_t>(recvDataBuf.size()) < numCharsToRecv + 1) {
3184 recvDataBuf.resize (numCharsToRecv + 1);
3186 ArrayRCP<char> recvData = recvDataBuf.persistingView (0, numCharsToRecv);
3188 RCP<CommRequest<int> > recvDataReq =
3189 ireceive<int, char> (recvData, p, dataTag, comm);
3190 wait<int> (comm, outArg (recvDataReq));
3195 recvDataBuf[numCharsToRecv] =
'\0';
3196 out << recvDataBuf.getRawPtr ();
3198 else if (myRank == p) {
3202 const std::string stringToSend = lclOutStrPtr->str ();
3203 lclOutStrPtr = Teuchos::null;
3206 const size_t numCharsToSend = stringToSend.size ();
3207 ArrayRCP<size_t> sendSize (1);
3208 sendSize[0] = numCharsToSend;
3209 RCP<CommRequest<int> > sendSizeReq =
3210 isend<int, size_t> (sendSize, 0, sizeTag, comm);
3211 wait<int> (comm, outArg (sendSizeReq));
3219 ArrayRCP<const char> sendData (&stringToSend[0], 0, numCharsToSend,
false);
3220 RCP<CommRequest<int> > sendDataReq =
3221 isend<int, char> (sendData, 0, dataTag, comm);
3222 wait<int> (comm, outArg (sendDataReq));
3228 template<
class Scalar,
class LO,
class GO,
class Node>
3229 Teuchos::RCP<const Teuchos::Comm<int> >
3233 return graph_.getComm();
3237 template<
class Scalar,
class LO,
class GO,
class Node>
3242 return graph_.getGlobalNumCols();
3245 template<
class Scalar,
class LO,
class GO,
class Node>
3250 return graph_.getNodeNumCols();
3253 template<
class Scalar,
class LO,
class GO,
class Node>
3258 return graph_.getIndexBase();
3261 template<
class Scalar,
class LO,
class GO,
class Node>
3266 return graph_.getGlobalNumEntries();
3269 template<
class Scalar,
class LO,
class GO,
class Node>
3274 return graph_.getNodeNumEntries();
3277 template<
class Scalar,
class LO,
class GO,
class Node>
3282 return graph_.getNumEntriesInGlobalRow(globalRow);
3286 template<
class Scalar,
class LO,
class GO,
class Node>
3291 return graph_.getGlobalMaxNumRowEntries();
3294 template<
class Scalar,
class LO,
class GO,
class Node>
3299 return graph_.hasColMap();
3303 template<
class Scalar,
class LO,
class GO,
class Node>
3308 return graph_.isLocallyIndexed();
3311 template<
class Scalar,
class LO,
class GO,
class Node>
3316 return graph_.isGloballyIndexed();
3319 template<
class Scalar,
class LO,
class GO,
class Node>
3324 return graph_.isFillComplete ();
3327 template<
class Scalar,
class LO,
class GO,
class Node>
3335 template<
class Scalar,
class LO,
class GO,
class Node>
3339 nonconst_global_inds_host_view_type &,
3340 nonconst_values_host_view_type &,
3343 TEUCHOS_TEST_FOR_EXCEPTION(
3344 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getGlobalRowCopy: "
3345 "This class doesn't support global matrix indexing.");
3349 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
3350 template<
class Scalar,
class LO,
class GO,
class Node>
3354 const Teuchos::ArrayView<GO> &,
3355 const Teuchos::ArrayView<Scalar> &,
3358 TEUCHOS_TEST_FOR_EXCEPTION(
3359 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getGlobalRowCopy: "
3360 "This class doesn't support global matrix indexing.");
3365 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
3366 template<
class Scalar,
class LO,
class GO,
class Node>
3370 Teuchos::ArrayView<const GO> &,
3371 Teuchos::ArrayView<const Scalar> &)
const
3373 TEUCHOS_TEST_FOR_EXCEPTION(
3374 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getGlobalRowView: "
3375 "This class doesn't support global matrix indexing.");
3379 template<
class Scalar,
class LO,
class GO,
class Node>
3383 Teuchos::ArrayView<const LO>& ,
3384 Teuchos::ArrayView<const Scalar>& )
const
3386 TEUCHOS_TEST_FOR_EXCEPTION(
3387 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getLocalRowView: "
3388 "This class doesn't support local matrix indexing.");
3392 template<
class Scalar,
class LO,
class GO,
class Node>
3396 global_inds_host_view_type &,
3397 values_host_view_type &)
const
3399 TEUCHOS_TEST_FOR_EXCEPTION(
3400 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getGlobalRowView: "
3401 "This class doesn't support global matrix indexing.");
3405 template<
class Scalar,
class LO,
class GO,
class Node>
3409 local_inds_host_view_type &colInds,
3410 values_host_view_type &vals)
const
3412 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
3413 colInds = local_inds_host_view_type();
3414 vals = values_host_view_type();
3417 const size_t absBlockOffsetStart = ptrHost_[localRowInd];
3418 const size_t numInds = ptrHost_[localRowInd + 1] - absBlockOffsetStart;
3419 colInds = local_inds_host_view_type(indHost_.data()+absBlockOffsetStart, numInds);
3421 vals = getValuesHost (localRowInd);
3425 template<
class Scalar,
class LO,
class GO,
class Node>
3429 local_inds_host_view_type &colInds,
3430 nonconst_values_host_view_type &vals)
const
3432 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
3433 colInds = nonconst_local_inds_host_view_type();
3434 vals = nonconst_values_host_view_type();
3437 const size_t absBlockOffsetStart = ptrHost_[localRowInd];
3438 const size_t numInds = ptrHost_[localRowInd + 1] - absBlockOffsetStart;
3439 colInds = local_inds_host_view_type(indHost_.data()+absBlockOffsetStart, numInds);
3442 vals =
const_cast<this_type&
>(*this).getValuesHostNonConst(localRowInd);
3446 template<
class Scalar,
class LO,
class GO,
class Node>
3451 const size_t lclNumMeshRows = graph_.getNodeNumRows ();
3453 Kokkos::View<size_t*, device_type> diagOffsets (
"diagOffsets", lclNumMeshRows);
3454 graph_.getLocalDiagOffsets (diagOffsets);
3457 auto diagOffsetsHost = Kokkos::create_mirror_view (diagOffsets);
3460 auto vals_host_out = val_.getHostView(Access::ReadOnly);
3464 size_t rowOffset = 0;
3466 LO bs = getBlockSize();
3467 for(
size_t r=0; r<getNodeNumRows(); r++)
3470 offset = rowOffset + diagOffsetsHost(r)*bs*bs;
3471 for(
int b=0; b<bs; b++)
3476 rowOffset += getNumEntriesInLocalRow(r)*bs*bs;
3482 template<
class Scalar,
class LO,
class GO,
class Node>
3485 leftScale (const ::Tpetra::Vector<Scalar, LO, GO, Node>& )
3487 TEUCHOS_TEST_FOR_EXCEPTION(
3488 true, std::logic_error,
"Tpetra::BlockCrsMatrix::leftScale: "
3489 "not implemented.");
3493 template<
class Scalar,
class LO,
class GO,
class Node>
3496 rightScale (const ::Tpetra::Vector<Scalar, LO, GO, Node>& )
3498 TEUCHOS_TEST_FOR_EXCEPTION(
3499 true, std::logic_error,
"Tpetra::BlockCrsMatrix::rightScale: "
3500 "not implemented.");
3504 template<
class Scalar,
class LO,
class GO,
class Node>
3505 Teuchos::RCP<const ::Tpetra::RowGraph<LO, GO, Node> >
3512 template<
class Scalar,
class LO,
class GO,
class Node>
3513 typename ::Tpetra::RowMatrix<Scalar, LO, GO, Node>::mag_type
3517 TEUCHOS_TEST_FOR_EXCEPTION(
3518 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getFrobeniusNorm: "
3519 "not implemented.");
3529 #define TPETRA_BLOCKCRSMATRIX_INSTANT(S,LO,GO,NODE) \
3530 template class BlockCrsMatrix< S, LO, GO, NODE >;
Linear algebra kernels for small dense matrices and vectors.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Sparse matrix whose entries are small dense square blocks, all of the same dimensions.
LO sumIntoLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Sum into values at the given (mesh, i.e., block) column indices, in the given (mesh,...
virtual bool isLocallyIndexed() const override
Whether matrix indices are locally indexed.
virtual bool isFillComplete() const override
Whether fillComplete() has been called.
virtual size_t getGlobalMaxNumRowEntries() const override
The maximum number of entries in any row over all processes in the matrix's communicator.
void applyBlock(const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar beta=Teuchos::ScalarTraits< Scalar >::zero())
Version of apply() that takes BlockMultiVector input and output.
LO replaceLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like replaceLocalValues, but avoids computing row offsets.
virtual size_t getNodeNumEntries() const override
The local number of stored (structurally nonzero) entries.
virtual void getLocalRowCopy(LO LocalRow, nonconst_local_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Not implemented.
LO absMaxLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Variant of getLocalDiagCopy() that uses precomputed offsets and puts diagonal blocks in a 3-D Kokkos:...
Scalar scalar_type
The type of entries in the matrix (that is, of each entry in each block).
virtual void getGlobalRowView(GO GlobalRow, global_inds_host_view_type &indices, values_host_view_type &values) const override
Get a constant, nonpersisting, globally indexed view of the given row of the matrix.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const override
Print a description of this object to the given output stream.
Kokkos::View< impl_scalar_type **, Kokkos::LayoutRight, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > > little_block_type
The type used to access nonconst matrix blocks.
LO local_ordinal_type
The type of local indices.
virtual void getGlobalRowCopy(GO GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const override
Get a copy of the given global row's entries.
LO getLocalRowOffsets(const LO localRowInd, ptrdiff_t offsets[], const LO colInds[], const LO numColInds) const
Get relative offsets corresponding to the given rows, given by local row index.
virtual void copyAndPermute(const SrcDistObject &sourceObj, const size_t numSameIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteToLIDs, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &permuteFromLIDs, const CombineMode CM) override
Teuchos::RCP< const map_type > getRangeMap() const override
Get the (point) range Map of this matrix.
LO sumIntoLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like sumIntoLocalValues, but avoids computing row offsets.
virtual bool hasColMap() const override
Whether this matrix has a well-defined column Map.
size_t getNodeMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
impl_scalar_type_dualview::t_host getValuesHostNonConst() const
Get the host or device View of the matrix's values (val_).
Teuchos::RCP< const map_type > getColMap() const override
get the (mesh) map for the columns of this block matrix.
virtual typename ::Tpetra::RowMatrix< Scalar, LO, GO, Node >::mag_type getFrobeniusNorm() const override
The Frobenius norm of the matrix.
virtual Teuchos::RCP< const ::Tpetra::RowGraph< LO, GO, Node > > getGraph() const override
Get the (mesh) graph.
virtual void unpackAndCombine(const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &importLIDs, Kokkos::DualView< packet_type *, buffer_device_type > imports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode) override
void apply(const mv_type &X, mv_type &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const override
For this matrix A, compute Y := beta * Y + alpha * Op(A) * X.
virtual global_size_t getGlobalNumCols() const override
The global number of columns of this matrix.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the matrix.
Teuchos::RCP< const map_type > getRowMap() const override
get the (mesh) map for the rows of this block matrix.
std::string description() const override
One-line description of this object.
void getLocalRowView(LO LocalRow, local_inds_host_view_type &indices, values_host_view_type &values) const override
Get a view of the (mesh, i.e., block) row, using local (mesh, i.e., block) indices.
Kokkos::View< const impl_scalar_type **, Kokkos::LayoutRight, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > > const_little_block_type
The type used to access const matrix blocks.
void getLocalDiagCopy(const Kokkos::View< impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &diag, const Kokkos::View< const size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Variant of getLocalDiagCopy() that uses precomputed offsets and puts diagonal blocks in a 3-D Kokkos:...
size_t getNumEntriesInLocalRow(const LO localRowInd) const override
Return the number of entries in the given row on the calling process.
virtual bool supportsRowViews() const override
Whether this object implements getLocalRowView() and getGlobalRowView().
LO replaceLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Replace values at the given (mesh, i.e., block) column indices, in the given (mesh,...
virtual size_t getNumEntriesInGlobalRow(GO globalRow) const override
The current number of entries on the calling process in the specified global row.
size_t getNodeNumRows() const override
get the local number of block rows
Node::device_type device_type
The Kokkos::Device specialization that this class uses.
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
void getLocalRowViewNonConst(LO LocalRow, local_inds_host_view_type &indices, nonconst_values_host_view_type &values) const
char packet_type
Implementation detail; tells.
typename DistObject< Scalar, LO, GO, Node >::buffer_device_type buffer_device_type
Kokkos::Device specialization for communication buffers.
virtual void packAndPrepare(const SrcDistObject &sourceObj, const Kokkos::DualView< const local_ordinal_type *, buffer_device_type > &exportLIDs, Kokkos::DualView< packet_type *, buffer_device_type > &exports, Kokkos::DualView< size_t *, buffer_device_type > numPacketsPerLID, size_t &constantNumPackets) override
virtual void rightScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x) override
Scale the RowMatrix on the right with the given Vector x.
virtual GO getIndexBase() const override
The index base for global indices in this matrix.
virtual global_size_t getGlobalNumEntries() const override
The global number of stored (structurally nonzero) entries.
typename BMV::impl_scalar_type impl_scalar_type
The implementation type of entries in the matrix.
virtual Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which this matrix is distributed.
global_size_t getGlobalNumRows() const override
get the global number of block rows
virtual void leftScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x) override
Scale the RowMatrix on the left with the given Vector x.
Teuchos::RCP< const map_type > getDomainMap() const override
Get the (point) domain Map of this matrix.
virtual bool isGloballyIndexed() const override
Whether matrix indices are globally indexed.
BlockCrsMatrix()
Default constructor: Makes an empty block matrix.
virtual size_t getNodeNumCols() const override
The number of columns needed to apply the forward operator on this node.
MultiVector for multiple degrees of freedom per mesh point.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
LO getBlockSize() const
Get the number of degrees of freedom per mesh point.
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
Kokkos::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type, void, size_t > local_graph_device_type
The type of the part of the sparse graph on each MPI process.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
bool locallySameAs(const Map< local_ordinal_type, global_ordinal_type, node_type > &map) const
Is this Map locally the same as the input Map?
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
local_ordinal_type getMinLocalIndex() const
The minimum local index.
local_ordinal_type getMaxLocalIndex() const
The maximum local index on the calling process.
One or more distributed dense vectors.
A distributed dense vector.
void replaceLocalValue(const LocalOrdinal myRow, const Scalar &value)
Replace current value at the specified location with specified values.
int local_ordinal_type
Default value of Scalar template parameter.
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
KOKKOS_INLINE_FUNCTION void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
size_t global_size_t
Global size_t object.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
CombineMode
Rule for combining data in an Import or Export.
@ REPLACE
Replace existing values with new values.
@ ABSMAX
Replace old value with maximum of magnitudes of old and new values.
@ INSERT
Insert new values that don't currently exist.
@ ZERO
Replace old values with zero.
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
static KOKKOS_INLINE_FUNCTION size_t unpackValue(LO &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const LO &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const LO &)
Number of bytes required to pack or unpack the given value of type value_type.