42 #ifndef TPETRA_EXPERIMENTAL_BLOCKCRSMATRIX_DEF_HPP 43 #define TPETRA_EXPERIMENTAL_BLOCKCRSMATRIX_DEF_HPP 50 #include "Teuchos_TimeMonitor.hpp" 64 #if defined(__CUDACC__) 66 # if defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA) 67 # undef TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA 68 # endif // defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA) 70 #elif defined(__GNUC__) 72 # if defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA) 73 # undef TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA 74 # endif // defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA) 76 #else // some other compiler 79 # if ! defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA) 80 # define TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA 1 81 # endif // ! defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA) 82 #endif // defined(__CUDACC__), defined(__GNUC__) 86 namespace Experimental {
91 template<
class AlphaCoeffType,
93 class MatrixValuesType,
97 class BcrsApplyNoTrans1VecTeamFunctor {
99 static_assert (Kokkos::Impl::is_view<MatrixValuesType>::value,
100 "MatrixValuesType must be a Kokkos::View.");
101 static_assert (Kokkos::Impl::is_view<OutVecType>::value,
102 "OutVecType must be a Kokkos::View.");
103 static_assert (Kokkos::Impl::is_view<InVecType>::value,
104 "InVecType must be a Kokkos::View.");
105 static_assert (std::is_same<MatrixValuesType,
106 typename MatrixValuesType::const_type>::value,
107 "MatrixValuesType must be a const Kokkos::View.");
108 static_assert (std::is_same<OutVecType,
109 typename OutVecType::non_const_type>::value,
110 "OutVecType must be a nonconst Kokkos::View.");
111 static_assert (std::is_same<InVecType, typename InVecType::const_type>::value,
112 "InVecType must be a const Kokkos::View.");
113 static_assert (static_cast<int> (MatrixValuesType::rank) == 1,
114 "MatrixValuesType must be a rank-1 Kokkos::View.");
115 static_assert (static_cast<int> (InVecType::rank) == 1,
116 "InVecType must be a rank-1 Kokkos::View.");
117 static_assert (static_cast<int> (OutVecType::rank) == 1,
118 "OutVecType must be a rank-1 Kokkos::View.");
119 typedef typename MatrixValuesType::non_const_value_type
scalar_type;
120 typedef typename GraphType::device_type device_type;
121 typedef typename device_type::execution_space execution_space;
122 typedef typename execution_space::scratch_memory_space shmem_space;
126 typedef typename std::remove_const<typename GraphType::data_type>::type
129 typedef Kokkos::TeamPolicy<Kokkos::Schedule<Kokkos::Dynamic>,
131 local_ordinal_type> policy_type;
138 void setX (
const InVecType& X) { X_ = X; }
146 void setY (
const OutVecType& Y) { Y_ = Y; }
151 KOKKOS_INLINE_FUNCTION local_ordinal_type
152 getScratchSizePerTeam ()
const 156 typedef typename std::decay<decltype (Y_(0))>::type y_value_type;
157 return blockSize_ *
sizeof (y_value_type);
163 KOKKOS_INLINE_FUNCTION local_ordinal_type
164 getScratchSizePerThread ()
const 168 typedef typename std::decay<decltype (Y_(0))>::type y_value_type;
169 return blockSize_ *
sizeof (y_value_type);
173 KOKKOS_INLINE_FUNCTION local_ordinal_type
174 getNumLclMeshRows ()
const 176 return ptr_.dimension_0 () == 0 ?
177 static_cast<local_ordinal_type
> (0) :
178 static_cast<local_ordinal_type> (ptr_.dimension_0 () - 1);
181 static constexpr local_ordinal_type defaultRowsPerTeam = 20;
185 local_ordinal_type getNumTeams ()
const {
192 BcrsApplyNoTrans1VecTeamFunctor (
const typename std::decay<AlphaCoeffType>::type& alpha,
193 const GraphType& graph,
194 const MatrixValuesType& val,
195 const local_ordinal_type blockSize,
197 const typename std::decay<BetaCoeffType>::type& beta,
199 const local_ordinal_type rowsPerTeam = defaultRowsPerTeam) :
201 ptr_ (graph.row_map),
202 ind_ (graph.entries),
204 blockSize_ (blockSize),
208 rowsPerTeam_ (rowsPerTeam)
215 KOKKOS_INLINE_FUNCTION
void 216 operator () (
const typename policy_type::member_type& member)
const 222 using Kokkos::Details::ArithTraits;
228 using Kokkos::parallel_for;
229 using Kokkos::subview;
230 typedef local_ordinal_type LO;
231 typedef typename decltype (ptr_)::non_const_value_type offset_type;
232 typedef Kokkos::View<
typename OutVecType::non_const_value_type*,
233 shmem_space, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
235 typedef Kokkos::View<
typename OutVecType::non_const_value_type*,
238 Kokkos::MemoryTraits<Kokkos::Unmanaged> >
240 typedef Kokkos::View<
typename MatrixValuesType::const_value_type**,
243 Kokkos::MemoryTraits<Kokkos::Unmanaged> >
246 const LO leagueRank = member.league_rank();
251 shared_array_type threadLocalMem =
252 shared_array_type (member.thread_scratch (1), blockSize_ * rowsPerTeam_);
257 const LO numLclMeshRows = getNumLclMeshRows ();
258 const LO rowBeg = leagueRank * rowsPerTeam_;
259 const LO rowTmp = rowBeg + rowsPerTeam_;
260 const LO rowEnd = rowTmp < numLclMeshRows ? rowTmp : numLclMeshRows;
271 parallel_for (Kokkos::TeamThreadRange (member, rowBeg, rowEnd),
272 [&] (
const LO& lclRow) {
274 out_little_vec_type Y_tlm (threadLocalMem.ptr_on_device () + blockSize_ * member.team_rank (), blockSize_);
276 const offset_type Y_ptBeg = lclRow * blockSize_;
277 const offset_type Y_ptEnd = Y_ptBeg + blockSize_;
279 subview (Y_, ::Kokkos::make_pair (Y_ptBeg, Y_ptEnd));
280 if (beta_ == ArithTraits<BetaCoeffType>::zero ()) {
281 FILL (Y_tlm, ArithTraits<BetaCoeffType>::zero ());
283 else if (beta_ == ArithTraits<BetaCoeffType>::one ()) {
291 if (alpha_ != ArithTraits<AlphaCoeffType>::zero ()) {
292 const offset_type blkBeg = ptr_[lclRow];
293 const offset_type blkEnd = ptr_[lclRow+1];
295 const offset_type bs2 = blockSize_ * blockSize_;
296 for (offset_type absBlkOff = blkBeg; absBlkOff < blkEnd;
298 little_block_type A_cur (val_.ptr_on_device () + absBlkOff * bs2,
299 blockSize_, blockSize_);
300 const offset_type X_blkCol = ind_[absBlkOff];
301 const offset_type X_ptBeg = X_blkCol * blockSize_;
302 const offset_type X_ptEnd = X_ptBeg + blockSize_;
304 subview (X_, ::Kokkos::make_pair (X_ptBeg, X_ptEnd));
306 GEMV (alpha_, A_cur, X_cur, Y_tlm);
314 typename std::decay<AlphaCoeffType>::type alpha_;
315 typename GraphType::row_map_type::const_type ptr_;
316 typename GraphType::entries_type::const_type ind_;
317 MatrixValuesType val_;
318 local_ordinal_type blockSize_;
320 typename std::decay<BetaCoeffType>::type beta_;
322 local_ordinal_type rowsPerTeam_;
326 template<
class AlphaCoeffType,
328 class MatrixValuesType,
332 class BcrsApplyNoTrans1VecFunctor {
334 static_assert (Kokkos::Impl::is_view<MatrixValuesType>::value,
335 "MatrixValuesType must be a Kokkos::View.");
336 static_assert (Kokkos::Impl::is_view<OutVecType>::value,
337 "OutVecType must be a Kokkos::View.");
338 static_assert (Kokkos::Impl::is_view<InVecType>::value,
339 "InVecType must be a Kokkos::View.");
340 static_assert (std::is_same<MatrixValuesType,
341 typename MatrixValuesType::const_type>::value,
342 "MatrixValuesType must be a const Kokkos::View.");
343 static_assert (std::is_same<OutVecType,
344 typename OutVecType::non_const_type>::value,
345 "OutVecType must be a nonconst Kokkos::View.");
346 static_assert (std::is_same<InVecType, typename InVecType::const_type>::value,
347 "InVecType must be a const Kokkos::View.");
348 static_assert (static_cast<int> (MatrixValuesType::rank) == 1,
349 "MatrixValuesType must be a rank-1 Kokkos::View.");
350 static_assert (static_cast<int> (InVecType::rank) == 1,
351 "InVecType must be a rank-1 Kokkos::View.");
352 static_assert (static_cast<int> (OutVecType::rank) == 1,
353 "OutVecType must be a rank-1 Kokkos::View.");
354 typedef typename MatrixValuesType::non_const_value_type scalar_type;
357 typedef typename GraphType::device_type device_type;
360 typedef typename std::remove_const<typename GraphType::data_type>::type
363 typedef Kokkos::RangePolicy<Kokkos::Schedule<Kokkos::Dynamic>,
364 typename device_type::execution_space,
365 local_ordinal_type> policy_type;
372 void setX (
const InVecType& X) { X_ = X; }
380 void setY (
const OutVecType& Y) { Y_ = Y; }
382 typedef typename Kokkos::ArithTraits<typename std::decay<AlphaCoeffType>::type>::val_type alpha_coeff_type;
383 typedef typename Kokkos::ArithTraits<typename std::decay<BetaCoeffType>::type>::val_type beta_coeff_type;
386 BcrsApplyNoTrans1VecFunctor (
const alpha_coeff_type& alpha,
387 const GraphType& graph,
388 const MatrixValuesType& val,
389 const local_ordinal_type blockSize,
391 const beta_coeff_type& beta,
392 const OutVecType& Y) :
394 ptr_ (graph.row_map),
395 ind_ (graph.entries),
397 blockSize_ (blockSize),
403 KOKKOS_INLINE_FUNCTION
void 404 operator () (
const local_ordinal_type& lclRow)
const 410 using Kokkos::Details::ArithTraits;
416 using Kokkos::parallel_for;
417 using Kokkos::subview;
418 typedef typename decltype (ptr_)::non_const_value_type offset_type;
419 typedef Kokkos::View<
typename MatrixValuesType::const_value_type**,
422 Kokkos::MemoryTraits<Kokkos::Unmanaged> >
425 const offset_type Y_ptBeg = lclRow * blockSize_;
426 const offset_type Y_ptEnd = Y_ptBeg + blockSize_;
427 auto Y_cur = subview (Y_, ::Kokkos::make_pair (Y_ptBeg, Y_ptEnd));
431 if (beta_ == ArithTraits<beta_coeff_type>::zero ()) {
432 FILL (Y_cur, ArithTraits<beta_coeff_type>::zero ());
434 else if (beta_ != ArithTraits<beta_coeff_type>::one ()) {
438 if (alpha_ != ArithTraits<alpha_coeff_type>::zero ()) {
439 const offset_type blkBeg = ptr_[lclRow];
440 const offset_type blkEnd = ptr_[lclRow+1];
442 const offset_type bs2 = blockSize_ * blockSize_;
443 for (offset_type absBlkOff = blkBeg; absBlkOff < blkEnd;
445 little_block_type A_cur (val_.ptr_on_device () + absBlkOff * bs2,
446 blockSize_, blockSize_);
447 const offset_type X_blkCol = ind_[absBlkOff];
448 const offset_type X_ptBeg = X_blkCol * blockSize_;
449 const offset_type X_ptEnd = X_ptBeg + blockSize_;
450 auto X_cur = subview (X_, ::Kokkos::make_pair (X_ptBeg, X_ptEnd));
452 GEMV (alpha_, A_cur, X_cur, Y_cur);
458 alpha_coeff_type alpha_;
459 typename GraphType::row_map_type::const_type ptr_;
460 typename GraphType::entries_type::const_type ind_;
461 MatrixValuesType val_;
462 local_ordinal_type blockSize_;
464 beta_coeff_type beta_;
468 template<
class AlphaCoeffType,
470 class MatrixValuesType,
471 class InMultiVecType,
473 class OutMultiVecType>
475 bcrsLocalApplyNoTrans (
const AlphaCoeffType& alpha,
476 const GraphType& graph,
477 const MatrixValuesType& val,
478 const typename std::remove_const<typename GraphType::data_type>::type blockSize,
479 const InMultiVecType& X,
480 const BetaCoeffType& beta,
481 const OutMultiVecType& Y
483 ,
const typename std::remove_const<typename GraphType::data_type>::type rowsPerTeam = 20
487 static_assert (Kokkos::Impl::is_view<MatrixValuesType>::value,
488 "MatrixValuesType must be a Kokkos::View.");
489 static_assert (Kokkos::Impl::is_view<OutMultiVecType>::value,
490 "OutMultiVecType must be a Kokkos::View.");
491 static_assert (Kokkos::Impl::is_view<InMultiVecType>::value,
492 "InMultiVecType must be a Kokkos::View.");
493 static_assert (static_cast<int> (MatrixValuesType::rank) == 1,
494 "MatrixValuesType must be a rank-1 Kokkos::View.");
495 static_assert (static_cast<int> (OutMultiVecType::rank) == 2,
496 "OutMultiVecType must be a rank-2 Kokkos::View.");
497 static_assert (static_cast<int> (InMultiVecType::rank) == 2,
498 "InMultiVecType must be a rank-2 Kokkos::View.");
500 typedef typename MatrixValuesType::const_type matrix_values_type;
501 typedef typename OutMultiVecType::non_const_type out_multivec_type;
502 typedef typename InMultiVecType::const_type in_multivec_type;
503 typedef typename Kokkos::ArithTraits<typename std::decay<AlphaCoeffType>::type>::val_type alpha_type;
504 typedef typename Kokkos::ArithTraits<typename std::decay<BetaCoeffType>::type>::val_type beta_type;
505 typedef typename std::remove_const<typename GraphType::data_type>::type LO;
507 const LO numLocalMeshRows = graph.row_map.dimension_0 () == 0 ?
508 static_cast<LO
> (0) :
509 static_cast<LO> (graph.row_map.dimension_0 () - 1);
510 const LO numVecs = Y.dimension_1 ();
511 if (numLocalMeshRows == 0 || numVecs == 0) {
518 in_multivec_type X_in = X;
519 out_multivec_type Y_out = Y;
524 typedef decltype (Kokkos::subview (X_in, Kokkos::ALL (), 0)) in_vec_type;
525 typedef decltype (
Kokkos::subview (Y_out,
Kokkos::ALL (), 0)) out_vec_type;
527 typedef BcrsApplyNoTrans1VecTeamFunctor<alpha_type, GraphType,
528 matrix_values_type, in_vec_type, beta_type, out_vec_type> functor_type;
530 typedef BcrsApplyNoTrans1VecFunctor<alpha_type, GraphType,
531 matrix_values_type, in_vec_type, beta_type, out_vec_type> functor_type;
533 typedef typename functor_type::policy_type policy_type;
535 auto X_0 = Kokkos::subview (X_in, Kokkos::ALL (), 0);
536 auto Y_0 = Kokkos::subview (Y_out, Kokkos::ALL (), 0);
538 functor_type functor (alpha, graph, val, blockSize, X_0, beta, Y_0, rowsPerTeam);
539 const LO numTeams = functor.getNumTeams ();
540 policy_type policy (numTeams, Kokkos::AUTO ());
546 const LO scratchSizePerTeam = functor.getScratchSizePerTeam ();
547 const LO scratchSizePerThread = functor.getScratchSizePerThread ();
549 policy.set_scratch_size (level,
550 Kokkos::PerTeam (scratchSizePerTeam),
551 Kokkos::PerThread (scratchSizePerThread));
554 functor_type functor (alpha, graph, val, blockSize, X_0, beta, Y_0);
555 policy_type policy (0, numLocalMeshRows);
559 Kokkos::parallel_for (policy, functor);
562 for (LO j = 1; j < numVecs; ++j) {
563 auto X_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
564 auto Y_j = Kokkos::subview (Y_out, Kokkos::ALL (), j);
567 Kokkos::parallel_for (policy, functor);
577 template<
class Scalar,
class LO,
class GO,
class Node>
578 class GetLocalDiagCopy {
580 typedef typename Node::device_type device_type;
581 typedef size_t diag_offset_type;
582 typedef Kokkos::View<
const size_t*, device_type,
583 Kokkos::MemoryUnmanaged> diag_offsets_type;
584 typedef typename ::Tpetra::CrsGraph<LO, GO, Node> global_graph_type;
585 typedef typename global_graph_type::local_graph_type local_graph_type;
586 typedef typename local_graph_type::row_map_type row_offsets_type;
587 typedef typename ::Tpetra::Experimental::BlockMultiVector<Scalar, LO, GO, Node>::impl_scalar_type IST;
588 typedef Kokkos::View<IST***, device_type, Kokkos::MemoryUnmanaged> diag_type;
589 typedef Kokkos::View<const IST*, device_type, Kokkos::MemoryUnmanaged> values_type;
592 GetLocalDiagCopy (
const diag_type& diag,
593 const values_type& val,
594 const diag_offsets_type& diagOffsets,
595 const row_offsets_type& ptr,
596 const LO blockSize) :
598 diagOffsets_ (diagOffsets),
600 blockSize_ (blockSize),
601 offsetPerBlock_ (blockSize_*blockSize_),
605 KOKKOS_INLINE_FUNCTION
void 606 operator() (
const LO& lclRowInd)
const 611 const size_t absOffset = ptr_[lclRowInd];
614 const size_t relOffset = diagOffsets_[lclRowInd];
617 const size_t pointOffset = (absOffset+relOffset)*offsetPerBlock_;
621 typedef Kokkos::View<
const IST**, Kokkos::LayoutRight,
622 device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
623 const_little_block_type;
624 const_little_block_type D_in (val_.ptr_on_device () + pointOffset,
625 blockSize_, blockSize_);
626 auto D_out = Kokkos::subview (diag_, lclRowInd, ALL (), ALL ());
632 diag_offsets_type diagOffsets_;
633 row_offsets_type ptr_;
640 template<
class Scalar,
class LO,
class GO,
class Node>
642 BlockCrsMatrix<Scalar, LO, GO, Node>::
643 markLocalErrorAndGetStream ()
645 * (this->localError_) =
true;
646 if ((*errs_).is_null ()) {
647 *errs_ = Teuchos::rcp (
new std::ostringstream ());
652 template<
class Scalar,
class LO,
class GO,
class Node>
657 blockSize_ (static_cast<LO> (0)),
662 localError_ (new bool (false)),
663 errs_ (new
Teuchos::RCP<std::ostringstream> ())
667 template<
class Scalar,
class LO,
class GO,
class Node>
670 const LO blockSize) :
674 blockSize_ (blockSize),
678 offsetPerBlock_ (blockSize * blockSize),
679 localError_ (new bool (false)),
680 errs_ (new
Teuchos::RCP<std::ostringstream> ())
682 TEUCHOS_TEST_FOR_EXCEPTION(
683 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::Experimental::" 684 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted " 685 "rows (isSorted() is false). This class assumes sorted rows.");
687 graphRCP_ = Teuchos::rcpFromRef(graph_);
693 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
694 TEUCHOS_TEST_FOR_EXCEPTION(
695 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::Experimental::" 696 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
697 " <= 0. The block size must be positive.");
703 typedef typename crs_graph_type::local_graph_type::row_map_type row_map_type;
704 typedef typename row_map_type::HostMirror::non_const_type nc_host_row_map_type;
707 nc_host_row_map_type ptr_h_nc = Kokkos::create_mirror_view (ptr_d);
712 typedef typename crs_graph_type::local_graph_type::entries_type entries_type;
713 typedef typename entries_type::HostMirror::non_const_type nc_host_entries_type;
716 nc_host_entries_type ind_h_nc = Kokkos::create_mirror_view (ind_d);
722 val_ = decltype (val_) (
"val", numValEnt);
725 template<
class Scalar,
class LO,
class GO,
class Node>
730 const LO blockSize) :
734 domainPointMap_ (domainPointMap),
735 rangePointMap_ (rangePointMap),
736 blockSize_ (blockSize),
740 offsetPerBlock_ (blockSize * blockSize),
741 localError_ (new bool (false)),
742 errs_ (new
Teuchos::RCP<std::ostringstream> ())
744 TEUCHOS_TEST_FOR_EXCEPTION(
745 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::Experimental::" 746 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted " 747 "rows (isSorted() is false). This class assumes sorted rows.");
749 graphRCP_ = Teuchos::rcpFromRef(graph_);
755 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
756 TEUCHOS_TEST_FOR_EXCEPTION(
757 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::Experimental::" 758 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
759 " <= 0. The block size must be positive.");
762 typedef typename crs_graph_type::local_graph_type::row_map_type row_map_type;
763 typedef typename row_map_type::HostMirror::non_const_type nc_host_row_map_type;
766 nc_host_row_map_type ptr_h_nc = Kokkos::create_mirror_view (ptr_d);
771 typedef typename crs_graph_type::local_graph_type::entries_type entries_type;
772 typedef typename entries_type::HostMirror::non_const_type nc_host_entries_type;
775 nc_host_entries_type ind_h_nc = Kokkos::create_mirror_view (ind_d);
781 val_ = decltype (val_) (
"val", numValEnt);
784 template<
class Scalar,
class LO,
class GO,
class Node>
785 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
790 return Teuchos::rcp (
new map_type (domainPointMap_));
793 template<
class Scalar,
class LO,
class GO,
class Node>
794 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
799 return Teuchos::rcp (
new map_type (rangePointMap_));
802 template<
class Scalar,
class LO,
class GO,
class Node>
803 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
810 template<
class Scalar,
class LO,
class GO,
class Node>
811 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
818 template<
class Scalar,
class LO,
class GO,
class Node>
826 template<
class Scalar,
class LO,
class GO,
class Node>
834 template<
class Scalar,
class LO,
class GO,
class Node>
842 template<
class Scalar,
class LO,
class GO,
class Node>
847 Teuchos::ETransp mode,
852 TEUCHOS_TEST_FOR_EXCEPTION(
853 mode != Teuchos::NO_TRANS && mode != Teuchos::TRANS && mode != Teuchos::CONJ_TRANS,
854 std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::apply: " 855 "Invalid 'mode' argument. Valid values are Teuchos::NO_TRANS, " 856 "Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
865 catch (std::invalid_argument& e) {
866 TEUCHOS_TEST_FOR_EXCEPTION(
867 true, std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::" 868 "apply: Either the input MultiVector X or the output MultiVector Y " 869 "cannot be viewed as a BlockMultiVector, given this BlockCrsMatrix's " 870 "graph. BlockMultiVector's constructor threw the following exception: " 879 const_cast<this_type*
> (
this)->
applyBlock (X_view, Y_view, mode, alpha, beta);
880 }
catch (std::invalid_argument& e) {
881 TEUCHOS_TEST_FOR_EXCEPTION(
882 true, std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::" 883 "apply: The implementation method applyBlock complained about having " 884 "an invalid argument. It reported the following: " << e.what ());
885 }
catch (std::logic_error& e) {
886 TEUCHOS_TEST_FOR_EXCEPTION(
887 true, std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::" 888 "apply: The implementation method applyBlock complained about a " 889 "possible bug in its implementation. It reported the following: " 891 }
catch (std::exception& e) {
892 TEUCHOS_TEST_FOR_EXCEPTION(
893 true, std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::" 894 "apply: The implementation method applyBlock threw an exception which " 895 "is neither std::invalid_argument nor std::logic_error, but is a " 896 "subclass of std::exception. It reported the following: " 899 TEUCHOS_TEST_FOR_EXCEPTION(
900 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::" 901 "apply: The implementation method applyBlock threw an exception which " 902 "is not an instance of a subclass of std::exception. This probably " 903 "indicates a bug. Please report this to the Tpetra developers.");
907 template<
class Scalar,
class LO,
class GO,
class Node>
912 Teuchos::ETransp mode,
916 TEUCHOS_TEST_FOR_EXCEPTION(
918 "Tpetra::Experimental::BlockCrsMatrix::applyBlock: " 919 "X and Y have different block sizes. X.getBlockSize() = " 923 if (mode == Teuchos::NO_TRANS) {
924 applyBlockNoTrans (X, Y, alpha, beta);
925 }
else if (mode == Teuchos::TRANS || mode == Teuchos::CONJ_TRANS) {
926 applyBlockTrans (X, Y, mode, alpha, beta);
928 TEUCHOS_TEST_FOR_EXCEPTION(
929 true, std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::" 930 "applyBlock: Invalid 'mode' argument. Valid values are " 931 "Teuchos::NO_TRANS, Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
935 template<
class Scalar,
class LO,
class GO,
class Node>
940 #ifdef HAVE_TPETRA_DEBUG 941 const char prefix[] =
"Tpetra::Experimental::BlockCrsMatrix::setAllToScalar: ";
942 #endif // HAVE_TPETRA_DEBUG 944 if (this->
template need_sync<device_type> ()) {
948 #ifdef HAVE_TPETRA_DEBUG 949 TEUCHOS_TEST_FOR_EXCEPTION
950 (this->
template need_sync<Kokkos::HostSpace> (), std::runtime_error,
951 prefix <<
"The matrix's values need sync on both device and host.");
952 #endif // HAVE_TPETRA_DEBUG 953 this->
template modify<Kokkos::HostSpace> ();
956 else if (this->
template need_sync<Kokkos::HostSpace> ()) {
960 #ifdef HAVE_TPETRA_DEBUG 961 TEUCHOS_TEST_FOR_EXCEPTION
962 (this->
template need_sync<device_type> (), std::runtime_error,
963 prefix <<
"The matrix's values need sync on both host and device.");
964 #endif // HAVE_TPETRA_DEBUG 965 this->
template modify<device_type> ();
969 this->
template modify<device_type> ();
974 template<
class Scalar,
class LO,
class GO,
class Node>
980 const LO numColInds)
const 982 #ifdef HAVE_TPETRA_DEBUG 983 const char prefix[] =
984 "Tpetra::Experimental::BlockCrsMatrix::replaceLocalValues: ";
985 #endif // HAVE_TPETRA_DEBUG 992 return static_cast<LO
> (0);
996 const size_t absRowBlockOffset = ptrHost_[localRowInd];
997 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
998 const LO perBlockSize = this->offsetPerBlock ();
1003 #ifdef HAVE_TPETRA_DEBUG 1004 TEUCHOS_TEST_FOR_EXCEPTION
1005 (this->
template need_sync<Kokkos::HostSpace> (), std::runtime_error,
1006 prefix <<
"The matrix's data were last modified on device, but have " 1007 "not been sync'd to host. Please sync to host (by calling " 1008 "sync<Kokkos::HostSpace>() on this matrix) before calling this " 1010 #endif // HAVE_TPETRA_DEBUG 1016 auto vals_host_out =
1017 const_cast<this_type*
> (
this)->
template getValues<Kokkos::HostSpace> ();
1020 for (LO k = 0; k < numColInds; ++k, pointOffset += perBlockSize) {
1021 const LO relBlockOffset =
1022 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1023 if (relBlockOffset != LINV) {
1032 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1036 vals_host_out_raw + absBlockOffset * perBlockSize;
1041 for (LO i = 0; i < perBlockSize; ++i) {
1042 A_old[i] = A_new[i];
1044 hint = relBlockOffset + 1;
1051 template <
class Scalar,
class LO,
class GO,
class Node>
1055 Kokkos::MemoryUnmanaged>& offsets)
const 1060 template <
class Scalar,
class LO,
class GO,
class Node>
1061 void TPETRA_DEPRECATED
1071 if (static_cast<size_t> (offsets.size ()) < lclNumRows) {
1072 offsets.resize (lclNumRows);
1078 typedef typename device_type::memory_space
memory_space;
1079 if (std::is_same<memory_space, Kokkos::HostSpace>::value) {
1084 Kokkos::MemoryUnmanaged> output_type;
1085 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
1089 Kokkos::View<size_t*, device_type> offsetsTmp (
"diagOffsets", lclNumRows);
1091 typedef Kokkos::View<
size_t*, Kokkos::HostSpace,
1092 Kokkos::MemoryUnmanaged> output_type;
1093 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
1098 template <
class Scalar,
class LO,
class GO,
class Node>
1104 Kokkos::MemoryUnmanaged>& D_inv,
1105 const Scalar& omega,
1110 Kokkos::Details::ArithTraits<impl_scalar_type>::zero ();
1112 Kokkos::Details::ArithTraits<impl_scalar_type>::one ();
1113 const LO numLocalMeshRows =
1122 Teuchos::Array<impl_scalar_type> localMem (blockSize);
1123 Teuchos::Array<impl_scalar_type> localMat (blockSize*blockSize);
1127 LO rowBegin = 0, rowEnd = 0, rowStride = 0;
1128 if (direction == Forward) {
1130 rowEnd = numLocalMeshRows+1;
1133 else if (direction == Backward) {
1134 rowBegin = numLocalMeshRows;
1138 else if (direction == Symmetric) {
1144 const Scalar one_minus_omega = Teuchos::ScalarTraits<Scalar>::one()-omega;
1145 const Scalar minus_omega = -omega;
1148 for (LO lclRow = rowBegin; lclRow != rowEnd; lclRow += rowStride) {
1149 const LO actlRow = lclRow - 1;
1152 COPY (B_cur, X_lcl);
1153 SCAL (static_cast<impl_scalar_type> (omega), X_lcl);
1155 const size_t meshBeg = ptrHost_[actlRow];
1156 const size_t meshEnd = ptrHost_[actlRow+1];
1157 for (
size_t absBlkOff = meshBeg; absBlkOff < meshEnd; ++absBlkOff) {
1158 const LO meshCol = indHost_[absBlkOff];
1159 const_little_block_type A_cur =
1160 getConstLocalBlockFromAbsOffset (absBlkOff);
1164 const Scalar alpha = meshCol == actlRow ? one_minus_omega : minus_omega;
1166 GEMV (static_cast<impl_scalar_type> (alpha), A_cur, X_cur, X_lcl);
1172 auto D_lcl = Kokkos::subview (D_inv, actlRow, ALL (), ALL ());
1174 FILL (X_update, zero);
1175 GEMV (one, D_lcl, X_lcl, X_update);
1179 for (LO lclRow = rowBegin; lclRow != rowEnd; lclRow += rowStride) {
1180 for (LO j = 0; j < numVecs; ++j) {
1181 LO actlRow = lclRow-1;
1184 COPY (B_cur, X_lcl);
1185 SCAL (static_cast<impl_scalar_type> (omega), X_lcl);
1187 const size_t meshBeg = ptrHost_[actlRow];
1188 const size_t meshEnd = ptrHost_[actlRow+1];
1189 for (
size_t absBlkOff = meshBeg; absBlkOff < meshEnd; ++absBlkOff) {
1190 const LO meshCol = indHost_[absBlkOff];
1191 const_little_block_type A_cur =
1192 getConstLocalBlockFromAbsOffset (absBlkOff);
1196 const Scalar alpha = meshCol == actlRow ? one_minus_omega : minus_omega;
1197 GEMV (static_cast<impl_scalar_type> (alpha), A_cur, X_cur, X_lcl);
1200 auto D_lcl = Kokkos::subview (D_inv, actlRow, ALL (), ALL ());
1202 FILL (X_update, zero);
1203 GEMV (one, D_lcl, X_lcl, X_update);
1209 template <
class Scalar,
class LO,
class GO,
class Node>
1215 const Scalar& dampingFactor,
1217 const int numSweeps,
1218 const bool zeroInitialGuess)
const 1222 TEUCHOS_TEST_FOR_EXCEPTION(
1223 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::" 1224 "gaussSeidelCopy: Not implemented.");
1227 template <
class Scalar,
class LO,
class GO,
class Node>
1233 const Teuchos::ArrayView<LO>& rowIndices,
1234 const Scalar& dampingFactor,
1236 const int numSweeps,
1237 const bool zeroInitialGuess)
const 1241 TEUCHOS_TEST_FOR_EXCEPTION(
1242 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::" 1243 "reorderedGaussSeidelCopy: Not implemented.");
1246 template <
class Scalar,
class LO,
class GO,
class Node>
1247 void TPETRA_DEPRECATED
1250 const Teuchos::ArrayView<const size_t>& offsets)
const 1252 using Teuchos::ArrayRCP;
1253 using Teuchos::ArrayView;
1254 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
1257 const LO* columnIndices;
1260 Teuchos::Array<LO> cols(1);
1263 Teuchos::Array<Scalar> zeroMat (blockSize_*blockSize_, ZERO);
1264 for (
size_t i = 0; i < myNumRows; ++i) {
1266 if (offsets[i] == Teuchos::OrdinalTraits<size_t>::invalid ()) {
1271 diag.
replaceLocalValues (i, cols.getRawPtr(), &vals[offsets[i]*blockSize_*blockSize_], 1);
1277 template <
class Scalar,
class LO,
class GO,
class Node>
1281 Kokkos::MemoryUnmanaged>& diag,
1282 const Kokkos::View<
const size_t*, device_type,
1283 Kokkos::MemoryUnmanaged>& offsets)
const 1285 using Kokkos::parallel_for;
1287 const char prefix[] =
"Tpetra::BlockCrsMatrix::getLocalDiagCopy (2-arg): ";
1291 TEUCHOS_TEST_FOR_EXCEPTION
1292 (static_cast<LO> (diag.dimension_0 ()) < lclNumMeshRows ||
1293 static_cast<LO> (diag.dimension_1 ()) < blockSize ||
1294 static_cast<LO> (diag.dimension_2 ()) < blockSize,
1295 std::invalid_argument, prefix <<
1296 "The input Kokkos::View is not big enough to hold all the data.");
1297 TEUCHOS_TEST_FOR_EXCEPTION
1298 (static_cast<LO> (offsets.size ()) < lclNumMeshRows, std::invalid_argument,
1299 prefix <<
"offsets.size() = " << offsets.size () <<
" < local number of " 1300 "diagonal blocks " << lclNumMeshRows <<
".");
1302 #ifdef HAVE_TPETRA_DEBUG 1303 TEUCHOS_TEST_FOR_EXCEPTION
1304 (this->
template need_sync<device_type> (), std::runtime_error,
1305 prefix <<
"The matrix's data were last modified on host, but have " 1306 "not been sync'd to device. Please sync to device (by calling " 1307 "sync<device_type>() on this matrix) before calling this method.");
1308 #endif // HAVE_TPETRA_DEBUG 1310 typedef Kokkos::RangePolicy<execution_space, LO> policy_type;
1311 typedef GetLocalDiagCopy<Scalar, LO, GO, Node> functor_type;
1319 const_cast<this_type*
> (
this)->
template getValues<device_type> ();
1321 parallel_for (policy_type (0, lclNumMeshRows),
1322 functor_type (diag, vals_dev, offsets,
1327 template <
class Scalar,
class LO,
class GO,
class Node>
1331 Kokkos::MemoryUnmanaged>& diag,
1332 const Teuchos::ArrayView<const size_t>& offsets)
const 1335 using Kokkos::parallel_for;
1337 Kokkos::MemoryUnmanaged>::HostMirror::execution_space host_exec_space;
1341 TEUCHOS_TEST_FOR_EXCEPTION
1342 (static_cast<LO> (diag.dimension_0 ()) < lclNumMeshRows ||
1343 static_cast<LO> (diag.dimension_1 ()) < blockSize ||
1344 static_cast<LO> (diag.dimension_2 ()) < blockSize,
1345 std::invalid_argument,
"Tpetra::BlockCrsMatrix::getLocalDiagCopy: " 1346 "The input Kokkos::View is not big enough to hold all the data.");
1347 TEUCHOS_TEST_FOR_EXCEPTION
1348 (static_cast<LO> (offsets.size ()) < lclNumMeshRows,
1349 std::invalid_argument,
"Tpetra::BlockCrsMatrix::getLocalDiagCopy: " 1350 "offsets.size() = " << offsets.size () <<
" < local number of diagonal " 1351 "blocks " << lclNumMeshRows <<
".");
1355 typedef Kokkos::RangePolicy<host_exec_space, LO> policy_type;
1356 parallel_for (policy_type (0, lclNumMeshRows), [=] (
const LO& lclMeshRow) {
1357 auto D_in = this->getConstLocalBlockFromRelOffset (lclMeshRow, offsets[lclMeshRow]);
1358 auto D_out = Kokkos::subview (diag, lclMeshRow, ALL (), ALL ());
1364 template<
class Scalar,
class LO,
class GO,
class Node>
1369 const Scalar vals[],
1370 const LO numColInds)
const 1377 return static_cast<LO
> (0);
1381 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1382 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1383 const LO perBlockSize = this->offsetPerBlock ();
1388 for (LO k = 0; k < numColInds; ++k, pointOffset += perBlockSize) {
1389 const LO relBlockOffset =
1390 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1391 if (relBlockOffset != LINV) {
1392 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1393 little_block_type A_old =
1394 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1395 const_little_block_type A_new =
1396 getConstLocalBlockFromInput (vIn, pointOffset);
1398 Impl::absMax (A_old, A_new);
1399 hint = relBlockOffset + 1;
1407 template<
class Scalar,
class LO,
class GO,
class Node>
1412 const Scalar vals[],
1413 const LO numColInds)
const 1415 #ifdef HAVE_TPETRA_DEBUG 1416 const char prefix[] =
1417 "Tpetra::Experimental::BlockCrsMatrix::sumIntoLocalValues: ";
1418 #endif // HAVE_TPETRA_DEBUG 1425 return static_cast<LO
> (0);
1430 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1431 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1432 const LO perBlockSize = this->offsetPerBlock ();
1437 #ifdef HAVE_TPETRA_DEBUG 1438 TEUCHOS_TEST_FOR_EXCEPTION
1439 (this->
template need_sync<Kokkos::HostSpace> (), std::runtime_error,
1440 prefix <<
"The matrix's data were last modified on device, but have not " 1441 "been sync'd to host. Please sync to host (by calling " 1442 "sync<Kokkos::HostSpace>() on this matrix) before calling this method.");
1443 #endif // HAVE_TPETRA_DEBUG 1449 auto vals_host_out =
1450 const_cast<this_type*
> (
this)->
template getValues<Kokkos::HostSpace> ();
1453 for (LO k = 0; k < numColInds; ++k, pointOffset += perBlockSize) {
1454 const LO relBlockOffset =
1455 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1456 if (relBlockOffset != LINV) {
1465 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1469 vals_host_out_raw + absBlockOffset * perBlockSize;
1474 for (LO i = 0; i < perBlockSize; ++i) {
1475 A_old[i] += A_new[i];
1477 hint = relBlockOffset + 1;
1484 template<
class Scalar,
class LO,
class GO,
class Node>
1492 #ifdef HAVE_TPETRA_DEBUG 1493 const char prefix[] =
1494 "Tpetra::Experimental::BlockCrsMatrix::getLocalRowView: ";
1495 #endif // HAVE_TPETRA_DEBUG 1501 return Teuchos::OrdinalTraits<LO>::invalid ();
1504 const size_t absBlockOffsetStart = ptrHost_[localRowInd];
1505 colInds = indHost_.ptr_on_device () + absBlockOffsetStart;
1507 #ifdef HAVE_TPETRA_DEBUG 1508 TEUCHOS_TEST_FOR_EXCEPTION
1509 (this->
template need_sync<Kokkos::HostSpace> (), std::runtime_error,
1510 prefix <<
"The matrix's data were last modified on device, but have " 1511 "not been sync'd to host. Please sync to host (by calling " 1512 "sync<Kokkos::HostSpace>() on this matrix) before calling this " 1514 #endif // HAVE_TPETRA_DEBUG 1520 auto vals_host_out =
1521 const_cast<this_type*
> (
this)->
template getValues<Kokkos::HostSpace> ();
1524 absBlockOffsetStart * offsetPerBlock ();
1525 vals =
reinterpret_cast<Scalar*
> (vOut);
1527 numInds = ptrHost_[localRowInd + 1] - absBlockOffsetStart;
1532 template<
class Scalar,
class LO,
class GO,
class Node>
1536 const Teuchos::ArrayView<LO>& Indices,
1537 const Teuchos::ArrayView<Scalar>& Values,
1538 size_t &NumEntries)
const 1544 if (numInds > Indices.size() || numInds*blockSize_*blockSize_ > Values.size()) {
1545 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
1546 "Tpetra::BlockCrsMatrix::getLocalRowCopy : Column and/or values array is not large enough to hold " 1547 << numInds <<
" row entries");
1549 for (LO i=0; i<numInds; ++i) {
1550 Indices[i] = colInds[i];
1552 for (LO i=0; i<numInds*blockSize_*blockSize_; ++i) {
1553 Values[i] = vals[i];
1555 NumEntries = numInds;
1558 template<
class Scalar,
class LO,
class GO,
class Node>
1562 ptrdiff_t offsets[],
1564 const LO numColInds)
const 1571 return static_cast<LO
> (0);
1574 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1578 for (LO k = 0; k < numColInds; ++k) {
1579 const LO relBlockOffset =
1580 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1581 if (relBlockOffset != LINV) {
1582 offsets[k] =
static_cast<ptrdiff_t
> (relBlockOffset);
1583 hint = relBlockOffset + 1;
1591 template<
class Scalar,
class LO,
class GO,
class Node>
1595 const ptrdiff_t offsets[],
1596 const Scalar vals[],
1597 const LO numOffsets)
const 1604 return static_cast<LO
> (0);
1608 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1609 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1610 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1611 size_t pointOffset = 0;
1614 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1615 const size_t relBlockOffset = offsets[k];
1616 if (relBlockOffset != STINV) {
1617 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1618 little_block_type A_old =
1619 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1620 const_little_block_type A_new =
1621 getConstLocalBlockFromInput (vIn, pointOffset);
1622 COPY (A_new, A_old);
1630 template<
class Scalar,
class LO,
class GO,
class Node>
1634 const ptrdiff_t offsets[],
1635 const Scalar vals[],
1636 const LO numOffsets)
const 1643 return static_cast<LO
> (0);
1647 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1648 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1649 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1650 size_t pointOffset = 0;
1653 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1654 const size_t relBlockOffset = offsets[k];
1655 if (relBlockOffset != STINV) {
1656 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1657 little_block_type A_old =
1658 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1659 const_little_block_type A_new =
1660 getConstLocalBlockFromInput (vIn, pointOffset);
1661 Impl::absMax (A_old, A_new);
1669 template<
class Scalar,
class LO,
class GO,
class Node>
1673 const ptrdiff_t offsets[],
1674 const Scalar vals[],
1675 const LO numOffsets)
const 1682 return static_cast<LO
> (0);
1687 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1688 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1689 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1690 size_t pointOffset = 0;
1693 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1694 const size_t relBlockOffset = offsets[k];
1695 if (relBlockOffset != STINV) {
1696 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1697 little_block_type A_old =
1698 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1699 const_little_block_type A_new =
1700 getConstLocalBlockFromInput (vIn, pointOffset);
1702 AXPY (ONE, A_new, A_old);
1710 template<
class Scalar,
class LO,
class GO,
class Node>
1716 if (numEntInGraph == Teuchos::OrdinalTraits<size_t>::invalid ()) {
1717 return static_cast<LO
> (0);
1719 return static_cast<LO
> (numEntInGraph);
1723 template<
class Scalar,
class LO,
class GO,
class Node>
1728 const Teuchos::ETransp mode,
1738 TEUCHOS_TEST_FOR_EXCEPTION(
1739 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::apply: " 1740 "transpose and conjugate transpose modes are not yet implemented.");
1743 template<
class Scalar,
class LO,
class GO,
class Node>
1755 const Scalar zero = STS::zero ();
1756 const Scalar one = STS::one ();
1757 RCP<const import_type>
import = graph_.
getImporter ();
1759 RCP<const export_type> theExport = graph_.
getExporter ();
1763 TEUCHOS_TEST_FOR_EXCEPTION(
1765 "Tpetra::Experimental::BlockCrsMatrix::applyBlockNoTrans: " 1766 "The input BlockMultiVector X has deep copy semantics, " 1767 "not view semantics (as it should).");
1768 TEUCHOS_TEST_FOR_EXCEPTION(
1770 "Tpetra::Experimental::BlockCrsMatrix::applyBlockNoTrans: " 1771 "The output BlockMultiVector Y has deep copy semantics, " 1772 "not view semantics (as it should).");
1774 if (alpha == zero) {
1777 }
else if (beta != one) {
1781 const BMV* X_colMap = NULL;
1782 if (
import.is_null ()) {
1785 }
catch (std::exception& e) {
1786 TEUCHOS_TEST_FOR_EXCEPTION(
1787 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::" 1788 "applyBlockNoTrans:" << std::endl <<
"Tpetra::MultiVector::" 1789 "operator= threw an exception: " << std::endl << e.what ());
1798 if ((*X_colMap_).is_null () ||
1804 #ifdef HAVE_TPETRA_BCRS_DO_POINT_IMPORT 1805 if (pointImporter_->is_null ()) {
1808 const auto domainPointMap = rcp (
new typename BMV::map_type (domainPointMap_));
1813 domainPointMap, colPointMap));
1822 X_colMap = &(**X_colMap_);
1823 }
catch (std::exception& e) {
1824 TEUCHOS_TEST_FOR_EXCEPTION(
1825 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::" 1826 "applyBlockNoTrans:" << std::endl <<
"Tpetra::MultiVector::" 1827 "operator= threw an exception: " << std::endl << e.what ());
1831 BMV* Y_rowMap = NULL;
1832 if (theExport.is_null ()) {
1834 }
else if ((*Y_rowMap_).is_null () ||
1840 Y_rowMap = &(**Y_rowMap_);
1841 }
catch (std::exception& e) {
1842 TEUCHOS_TEST_FOR_EXCEPTION(
1843 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::" 1844 "applyBlockNoTrans:" << std::endl <<
"Tpetra::MultiVector::" 1845 "operator= threw an exception: " << std::endl << e.what ());
1850 localApplyBlockNoTrans (*X_colMap, *Y_rowMap, alpha, beta);
1852 catch (std::exception& e) {
1853 TEUCHOS_TEST_FOR_EXCEPTION
1854 (
true, std::runtime_error,
"Tpetra::Experimental::BlockCrsMatrix::" 1855 "applyBlockNoTrans: localApplyBlockNoTrans threw an exception: " 1859 TEUCHOS_TEST_FOR_EXCEPTION
1860 (
true, std::runtime_error,
"Tpetra::Experimental::BlockCrsMatrix::" 1861 "applyBlockNoTrans: localApplyBlockNoTrans threw some exception " 1862 "that is not a subclass of std::exception.");
1865 if (! theExport.is_null ()) {
1871 template<
class Scalar,
class LO,
class GO,
class Node>
1879 using Tpetra::Experimental::Impl::bcrsLocalApplyNoTrans;
1888 Y_mv.template modify<device_type> ();
1890 auto X_lcl = X_mv.template getLocalView<device_type> ();
1891 auto Y_lcl = Y_mv.template getLocalView<device_type> ();
1892 auto val = this->val_.template view<device_type> ();
1894 bcrsLocalApplyNoTrans (alpha_impl, graph, val, blockSize, X_lcl,
1898 template<
class Scalar,
class LO,
class GO,
class Node>
1902 const LO colIndexToFind,
1903 const LO hint)
const 1905 const size_t absStartOffset = ptrHost_[localRowIndex];
1906 const size_t absEndOffset = ptrHost_[localRowIndex+1];
1907 const LO numEntriesInRow =
static_cast<LO
> (absEndOffset - absStartOffset);
1909 const LO*
const curInd = indHost_.ptr_on_device () + absStartOffset;
1912 if (hint < numEntriesInRow && curInd[hint] == colIndexToFind) {
1919 LO relOffset = Teuchos::OrdinalTraits<LO>::invalid ();
1924 const LO maxNumEntriesForLinearSearch = 32;
1925 if (numEntriesInRow > maxNumEntriesForLinearSearch) {
1930 const LO* beg = curInd;
1931 const LO* end = curInd + numEntriesInRow;
1932 std::pair<const LO*, const LO*> p =
1933 std::equal_range (beg, end, colIndexToFind);
1934 if (p.first != p.second) {
1936 relOffset =
static_cast<LO
> (p.first - beg);
1940 for (LO k = 0; k < numEntriesInRow; ++k) {
1941 if (colIndexToFind == curInd[k]) {
1951 template<
class Scalar,
class LO,
class GO,
class Node>
1956 return offsetPerBlock_;
1959 template<
class Scalar,
class LO,
class GO,
class Node>
1963 const size_t pointOffset)
const 1966 const LO rowStride = blockSize_;
1970 template<
class Scalar,
class LO,
class GO,
class Node>
1974 const size_t pointOffset)
const 1977 const LO rowStride = blockSize_;
1981 template<
class Scalar,
class LO,
class GO,
class Node>
1986 #ifdef HAVE_TPETRA_DEBUG 1987 const char prefix[] =
1988 "Tpetra::Experimental::BlockCrsMatrix::getConstLocalBlockFromAbsOffset: ";
1989 #endif // HAVE_TPETRA_DEBUG 1998 #ifdef HAVE_TPETRA_DEBUG 1999 TEUCHOS_TEST_FOR_EXCEPTION
2000 (this->
template need_sync<Kokkos::HostSpace> (), std::runtime_error,
2001 prefix <<
"The matrix's data were last modified on device, but have " 2002 "not been sync'd to host. Please sync to host (by calling " 2003 "sync<Kokkos::HostSpace>() on this matrix) before calling this " 2005 #endif // HAVE_TPETRA_DEBUG 2006 const size_t absPointOffset = absBlockOffset * offsetPerBlock ();
2013 const_cast<this_type*
> (
this)->
template getValues<Kokkos::HostSpace> ();
2016 return getConstLocalBlockFromInput (vals_host_raw, absPointOffset);
2020 template<
class Scalar,
class LO,
class GO,
class Node>
2024 const size_t relMeshOffset)
const 2028 const LO* lclColInds = NULL;
2029 Scalar* lclVals = NULL;
2032 LO err = this->
getLocalRowView (lclMeshRow, lclColInds, lclVals, numEnt);
2040 const size_t relPointOffset = relMeshOffset * this->offsetPerBlock ();
2041 IST* lclValsImpl =
reinterpret_cast<IST*
> (lclVals);
2042 return this->getConstLocalBlockFromInput (const_cast<const IST*> (lclValsImpl),
2047 template<
class Scalar,
class LO,
class GO,
class Node>
2052 #ifdef HAVE_TPETRA_DEBUG 2053 const char prefix[] =
2054 "Tpetra::Experimental::BlockCrsMatrix::getNonConstLocalBlockFromAbsOffset: ";
2055 #endif // HAVE_TPETRA_DEBUG 2064 const size_t absPointOffset = absBlockOffset * offsetPerBlock ();
2065 #ifdef HAVE_TPETRA_DEBUG 2066 TEUCHOS_TEST_FOR_EXCEPTION
2067 (this->
template need_sync<Kokkos::HostSpace> (), std::runtime_error,
2068 prefix <<
"The matrix's data were last modified on device, but have " 2069 "not been sync'd to host. Please sync to host (by calling " 2070 "sync<Kokkos::HostSpace>() on this matrix) before calling this " 2072 #endif // HAVE_TPETRA_DEBUG 2078 const_cast<this_type*
> (
this)->
template getValues<Kokkos::HostSpace> ();
2080 return getNonConstLocalBlockFromInput (vals_host_raw, absPointOffset);
2084 template<
class Scalar,
class LO,
class GO,
class Node>
2087 getLocalBlock (
const LO localRowInd,
const LO localColInd)
const 2089 const size_t absRowBlockOffset = ptrHost_[localRowInd];
2090 const LO relBlockOffset =
2091 this->findRelOffsetOfColumnIndex (localRowInd, localColInd);
2093 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid ()) {
2094 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
2095 return getNonConstLocalBlockFromAbsOffset (absBlockOffset);
2112 template<
class Scalar,
class LO,
class GO,
class Node>
2119 const this_type* src =
dynamic_cast<const this_type*
> (&source);
2122 std::ostream& err = markLocalErrorAndGetStream ();
2123 err <<
"checkSizes: The source object of the Import or Export " 2124 "must be a BlockCrsMatrix with the same template parameters as the " 2125 "target object." << endl;
2131 std::ostream& err = markLocalErrorAndGetStream ();
2132 err <<
"checkSizes: The source and target objects of the Import or " 2133 <<
"Export must have the same block sizes. The source's block " 2134 <<
"size = " << src->getBlockSize () <<
" != the target's block " 2137 if (! src->graph_.isFillComplete ()) {
2138 std::ostream& err = markLocalErrorAndGetStream ();
2139 err <<
"checkSizes: The source object of the Import or Export is " 2140 "not fill complete. Both source and target objects must be fill " 2141 "complete." << endl;
2144 std::ostream& err = markLocalErrorAndGetStream ();
2145 err <<
"checkSizes: The target object of the Import or Export is " 2146 "not fill complete. Both source and target objects must be fill " 2147 "complete." << endl;
2149 if (src->graph_.getColMap ().is_null ()) {
2150 std::ostream& err = markLocalErrorAndGetStream ();
2151 err <<
"checkSizes: The source object of the Import or Export does " 2152 "not have a column Map. Both source and target objects must have " 2153 "column Maps." << endl;
2155 if (this->graph_.
getColMap ().is_null ()) {
2156 std::ostream& err = markLocalErrorAndGetStream ();
2157 err <<
"checkSizes: The target object of the Import or Export does " 2158 "not have a column Map. Both source and target objects must have " 2159 "column Maps." << endl;
2163 return ! (* (this->localError_));
2166 template<
class Scalar,
class LO,
class GO,
class Node>
2171 const Teuchos::ArrayView<const LO>& permuteToLIDs,
2172 const Teuchos::ArrayView<const LO>& permuteFromLIDs)
2176 const bool debug =
false;
2179 std::ostringstream os;
2180 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
2181 os <<
"Proc " << myRank <<
": copyAndPermute: " 2182 <<
"numSameIDs: " << numSameIDs
2183 <<
", permuteToLIDs.size(): " << permuteToLIDs.size ()
2184 <<
", permuteFromLIDs.size(): " << permuteFromLIDs.size ()
2186 std::cerr << os.str ();
2192 if (* (this->localError_)) {
2193 std::ostream& err = this->markLocalErrorAndGetStream ();
2194 err <<
"copyAndPermute: The target object of the Import or Export is " 2195 "already in an error state." << endl;
2198 if (permuteToLIDs.size () != permuteFromLIDs.size ()) {
2199 std::ostream& err = this->markLocalErrorAndGetStream ();
2200 err <<
"copyAndPermute: permuteToLIDs.size() = " << permuteToLIDs.size ()
2201 <<
" != permuteFromLIDs.size() = " << permuteFromLIDs.size () <<
"." 2206 const this_type* src =
dynamic_cast<const this_type*
> (&source);
2208 std::ostream& err = this->markLocalErrorAndGetStream ();
2209 err <<
"copyAndPermute: The source object of the Import or Export is " 2210 "either not a BlockCrsMatrix, or does not have the right template " 2211 "parameters. checkSizes() should have caught this. " 2212 "Please report this bug to the Tpetra developers." << endl;
2215 if (* (src->localError_)) {
2216 std::ostream& err = this->markLocalErrorAndGetStream ();
2217 err <<
"copyAndPermute: The source object of the Import or Export is " 2218 "already in an error state." << endl;
2222 bool lclErr =
false;
2223 #ifdef HAVE_TPETRA_DEBUG 2224 std::set<LO> invalidSrcCopyRows;
2225 std::set<LO> invalidDstCopyRows;
2226 std::set<LO> invalidDstCopyCols;
2227 std::set<LO> invalidDstPermuteCols;
2228 std::set<LO> invalidPermuteFromRows;
2229 #endif // HAVE_TPETRA_DEBUG 2238 #ifdef HAVE_TPETRA_DEBUG 2239 const map_type& srcRowMap = * (src->graph_.getRowMap ());
2240 #endif // HAVE_TPETRA_DEBUG 2242 const map_type& srcColMap = * (src->graph_.getColMap ());
2244 const bool canUseLocalColumnIndices = srcColMap.
locallySameAs (dstColMap);
2245 const size_t numPermute =
static_cast<size_t> (permuteFromLIDs.size ());
2248 std::ostringstream os;
2249 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
2250 os <<
"Proc " << myRank <<
": copyAndPermute: " 2251 <<
"canUseLocalColumnIndices: " 2252 << (canUseLocalColumnIndices ?
"true" :
"false")
2253 <<
", numPermute: " << numPermute
2255 std::cerr << os.str ();
2258 if (canUseLocalColumnIndices) {
2260 for (LO localRow = 0; localRow < static_cast<LO> (numSameIDs); ++localRow) {
2261 #ifdef HAVE_TPETRA_DEBUG 2264 invalidSrcCopyRows.insert (localRow);
2267 #endif // HAVE_TPETRA_DEBUG 2269 const LO* lclSrcCols;
2274 LO err = src->getLocalRowView (localRow, lclSrcCols, vals, numEntries);
2277 #ifdef HAVE_TPETRA_DEBUG 2278 (void) invalidSrcCopyRows.insert (localRow);
2279 #endif // HAVE_TPETRA_DEBUG 2283 if (err != numEntries) {
2286 #ifdef HAVE_TPETRA_DEBUG 2287 invalidDstCopyRows.insert (localRow);
2288 #endif // HAVE_TPETRA_DEBUG 2296 for (LO k = 0; k < numEntries; ++k) {
2299 #ifdef HAVE_TPETRA_DEBUG 2300 (void) invalidDstCopyCols.insert (lclSrcCols[k]);
2301 #endif // HAVE_TPETRA_DEBUG 2310 for (
size_t k = 0; k < numPermute; ++k) {
2311 const LO srcLclRow =
static_cast<LO
> (permuteFromLIDs[k]);
2312 const LO dstLclRow =
static_cast<LO
> (permuteToLIDs[k]);
2314 const LO* lclSrcCols;
2317 LO err = src->getLocalRowView (srcLclRow, lclSrcCols, vals, numEntries);
2320 #ifdef HAVE_TPETRA_DEBUG 2321 invalidPermuteFromRows.insert (srcLclRow);
2322 #endif // HAVE_TPETRA_DEBUG 2326 if (err != numEntries) {
2328 #ifdef HAVE_TPETRA_DEBUG 2329 for (LO c = 0; c < numEntries; ++c) {
2331 invalidDstPermuteCols.insert (lclSrcCols[c]);
2334 #endif // HAVE_TPETRA_DEBUG 2341 const size_t maxNumEnt = src->graph_.getNodeMaxNumRowEntries ();
2342 Teuchos::Array<LO> lclDstCols (maxNumEnt);
2345 for (LO localRow = 0; localRow < static_cast<LO> (numSameIDs); ++localRow) {
2346 const LO* lclSrcCols;
2353 err = src->getLocalRowView (localRow, lclSrcCols, vals, numEntries);
2354 }
catch (std::exception& e) {
2356 std::ostringstream os;
2357 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
2358 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow " 2359 << localRow <<
", src->getLocalRowView() threw an exception: " 2361 std::cerr << os.str ();
2368 #ifdef HAVE_TPETRA_DEBUG 2369 invalidSrcCopyRows.insert (localRow);
2370 #endif // HAVE_TPETRA_DEBUG 2373 if (static_cast<size_t> (numEntries) > static_cast<size_t> (lclDstCols.size ())) {
2376 std::ostringstream os;
2377 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
2378 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow " 2379 << localRow <<
", numEntries = " << numEntries <<
" > maxNumEnt = " 2380 << maxNumEnt << endl;
2381 std::cerr << os.str ();
2387 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries);
2388 for (LO j = 0; j < numEntries; ++j) {
2390 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid ()) {
2392 #ifdef HAVE_TPETRA_DEBUG 2393 invalidDstCopyCols.insert (lclDstColsView[j]);
2394 #endif // HAVE_TPETRA_DEBUG 2398 err = this->
replaceLocalValues (localRow, lclDstColsView.getRawPtr (), vals, numEntries);
2399 }
catch (std::exception& e) {
2401 std::ostringstream os;
2402 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
2403 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow " 2404 << localRow <<
", this->replaceLocalValues() threw an exception: " 2406 std::cerr << os.str ();
2410 if (err != numEntries) {
2413 std::ostringstream os;
2414 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
2415 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" " 2416 "localRow " << localRow <<
", this->replaceLocalValues " 2417 "returned " << err <<
" instead of numEntries = " 2418 << numEntries << endl;
2419 std::cerr << os.str ();
2427 for (
size_t k = 0; k < numPermute; ++k) {
2428 const LO srcLclRow =
static_cast<LO
> (permuteFromLIDs[k]);
2429 const LO dstLclRow =
static_cast<LO
> (permuteToLIDs[k]);
2431 const LO* lclSrcCols;
2436 err = src->getLocalRowView (srcLclRow, lclSrcCols, vals, numEntries);
2437 }
catch (std::exception& e) {
2439 std::ostringstream os;
2440 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
2441 os <<
"Proc " << myRank <<
": copyAndPermute: At \"permute\" " 2442 "srcLclRow " << srcLclRow <<
" and dstLclRow " << dstLclRow
2443 <<
", src->getLocalRowView() threw an exception: " << e.what ();
2444 std::cerr << os.str ();
2451 #ifdef HAVE_TPETRA_DEBUG 2452 invalidPermuteFromRows.insert (srcLclRow);
2453 #endif // HAVE_TPETRA_DEBUG 2456 if (static_cast<size_t> (numEntries) > static_cast<size_t> (lclDstCols.size ())) {
2462 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries);
2463 for (LO j = 0; j < numEntries; ++j) {
2465 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid ()) {
2467 #ifdef HAVE_TPETRA_DEBUG 2468 invalidDstPermuteCols.insert (lclDstColsView[j]);
2469 #endif // HAVE_TPETRA_DEBUG 2472 err = this->
replaceLocalValues (dstLclRow, lclDstColsView.getRawPtr (), vals, numEntries);
2473 if (err != numEntries) {
2482 std::ostream& err = this->markLocalErrorAndGetStream ();
2483 #ifdef HAVE_TPETRA_DEBUG 2484 err <<
"copyAndPermute: The graph structure of the source of the " 2485 "Import or Export must be a subset of the graph structure of the " 2487 err <<
"invalidSrcCopyRows = [";
2488 for (
typename std::set<LO>::const_iterator it = invalidSrcCopyRows.begin ();
2489 it != invalidSrcCopyRows.end (); ++it) {
2491 typename std::set<LO>::const_iterator itp1 = it;
2493 if (itp1 != invalidSrcCopyRows.end ()) {
2497 err <<
"], invalidDstCopyRows = [";
2498 for (
typename std::set<LO>::const_iterator it = invalidDstCopyRows.begin ();
2499 it != invalidDstCopyRows.end (); ++it) {
2501 typename std::set<LO>::const_iterator itp1 = it;
2503 if (itp1 != invalidDstCopyRows.end ()) {
2507 err <<
"], invalidDstCopyCols = [";
2508 for (
typename std::set<LO>::const_iterator it = invalidDstCopyCols.begin ();
2509 it != invalidDstCopyCols.end (); ++it) {
2511 typename std::set<LO>::const_iterator itp1 = it;
2513 if (itp1 != invalidDstCopyCols.end ()) {
2517 err <<
"], invalidDstPermuteCols = [";
2518 for (
typename std::set<LO>::const_iterator it = invalidDstPermuteCols.begin ();
2519 it != invalidDstPermuteCols.end (); ++it) {
2521 typename std::set<LO>::const_iterator itp1 = it;
2523 if (itp1 != invalidDstPermuteCols.end ()) {
2527 err <<
"], invalidPermuteFromRows = [";
2528 for (
typename std::set<LO>::const_iterator it = invalidPermuteFromRows.begin ();
2529 it != invalidPermuteFromRows.end (); ++it) {
2531 typename std::set<LO>::const_iterator itp1 = it;
2533 if (itp1 != invalidPermuteFromRows.end ()) {
2537 err <<
"]" << std::endl;
2539 err <<
"copyAndPermute: The graph structure of the source of the " 2540 "Import or Export must be a subset of the graph structure of the " 2541 "target." << std::endl;
2542 #endif // HAVE_TPETRA_DEBUG 2546 std::ostringstream os;
2547 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
2548 const bool lclSuccess = ! (* (this->localError_));
2549 os <<
"*** Proc " << myRank <<
": copyAndPermute " 2550 << (lclSuccess ?
"succeeded" :
"FAILED");
2556 std::cerr << os.str ();
2580 template<
class LO,
class GO,
class D>
2582 packRowCount (
const size_t numEnt,
2583 const size_t numBytesPerValue,
2584 const size_t blkSize)
2596 const size_t numEntLen = PackTraits<LO, D>::packValueCount (numEntLO);
2597 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
2598 const size_t valsLen = numEnt * numBytesPerValue * blkSize * blkSize;
2599 return numEntLen + gidsLen + valsLen;
2613 template<
class ST,
class LO,
class GO,
class D>
2616 const size_t offset,
2617 const size_t numBytes,
2618 const size_t numBytesPerValue)
2620 using Kokkos::subview;
2622 typedef typename PackTraits<LO, D>::input_buffer_type input_buffer_type;
2623 typedef typename input_buffer_type::size_type size_type;
2625 if (numBytes == 0) {
2627 return static_cast<size_t> (0);
2631 const size_t theNumBytes = PackTraits<LO, D>::packValueCount (numEntLO);
2632 #ifdef HAVE_TPETRA_DEBUG 2633 TEUCHOS_TEST_FOR_EXCEPTION(
2634 theNumBytes > numBytes, std::logic_error,
"unpackRowCount: " 2635 "theNumBytes = " << theNumBytes <<
" < numBytes = " << numBytes
2637 #endif // HAVE_TPETRA_DEBUG 2638 const std::pair<size_type, size_type> rng (offset, offset + theNumBytes);
2639 input_buffer_type inBuf = subview (imports, rng);
2640 const size_t actualNumBytes = PackTraits<LO, D>::unpackValue (numEntLO, inBuf);
2641 #ifdef HAVE_TPETRA_DEBUG 2642 TEUCHOS_TEST_FOR_EXCEPTION(
2643 theNumBytes > numBytes, std::logic_error,
"unpackRowCount: " 2644 "actualNumBytes = " << actualNumBytes <<
" < numBytes = " << numBytes
2647 (void) actualNumBytes;
2648 #endif // HAVE_TPETRA_DEBUG 2649 return static_cast<size_t> (numEntLO);
2660 template<
class ST,
class LO,
class GO,
class D>
2663 const size_t offset,
2664 const size_t numEnt,
2667 const size_t numBytesPerValue,
2668 const size_t blockSize)
2670 using Kokkos::subview;
2677 typedef typename PackTraits<LO, D>::output_buffer_type output_buffer_type;
2678 typedef typename output_buffer_type::size_type size_type;
2679 typedef typename std::pair<size_type, size_type> pair_type;
2685 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2688 const LO numEntLO =
static_cast<size_t> (numEnt);
2690 const size_t numEntBeg = offset;
2691 const size_t numEntLen = PackTraits<LO, D>::packValueCount (numEntLO);
2692 const size_t gidsBeg = numEntBeg + numEntLen;
2693 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
2694 const size_t valsBeg = gidsBeg + gidsLen;
2695 const size_t valsLen = numScalarEnt * numBytesPerValue;
2697 output_buffer_type numEntOut =
2698 subview (exports, pair_type (numEntBeg, numEntBeg + numEntLen));
2699 output_buffer_type gidsOut =
2700 subview (exports, pair_type (gidsBeg, gidsBeg + gidsLen));
2701 output_buffer_type valsOut =
2702 subview (exports, pair_type (valsBeg, valsBeg + valsLen));
2704 size_t numBytesOut = 0;
2705 numBytesOut += PackTraits<LO, D>::packValue (numEntOut, numEntLO);
2706 numBytesOut += PackTraits<GO, D>::packArray (gidsOut, gidsIn, numEnt);
2707 numBytesOut += PackTraits<ST, D>::packArray (valsOut, valsIn, numScalarEnt);
2709 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
2710 TEUCHOS_TEST_FOR_EXCEPTION(
2711 numBytesOut != expectedNumBytes, std::logic_error,
"unpackRow: " 2712 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = " 2713 << expectedNumBytes <<
".");
2718 template<
class ST,
class LO,
class GO,
class D>
2723 const size_t offset,
2724 const size_t numBytes,
2725 const size_t numEnt,
2726 const size_t numBytesPerValue,
2727 const size_t blockSize)
2729 using Kokkos::subview;
2736 typedef typename PackTraits<LO, D>::input_buffer_type input_buffer_type;
2737 typedef typename input_buffer_type::size_type size_type;
2738 typedef typename std::pair<size_type, size_type> pair_type;
2740 if (numBytes == 0) {
2744 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2745 TEUCHOS_TEST_FOR_EXCEPTION(
2746 static_cast<size_t> (imports.dimension_0 ()) <= offset,
2747 std::logic_error,
"unpackRow: imports.dimension_0() = " 2748 << imports.dimension_0 () <<
" <= offset = " << offset <<
".");
2749 TEUCHOS_TEST_FOR_EXCEPTION(
2750 static_cast<size_t> (imports.dimension_0 ()) < offset + numBytes,
2751 std::logic_error,
"unpackRow: imports.dimension_0() = " 2752 << imports.dimension_0 () <<
" < offset + numBytes = " 2753 << (offset + numBytes) <<
".");
2758 const size_t numEntBeg = offset;
2759 const size_t numEntLen = PackTraits<LO, D>::packValueCount (lid);
2760 const size_t gidsBeg = numEntBeg + numEntLen;
2761 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
2762 const size_t valsBeg = gidsBeg + gidsLen;
2763 const size_t valsLen = numScalarEnt * numBytesPerValue;
2765 input_buffer_type numEntIn =
2766 subview (imports, pair_type (numEntBeg, numEntBeg + numEntLen));
2767 input_buffer_type gidsIn =
2768 subview (imports, pair_type (gidsBeg, gidsBeg + gidsLen));
2769 input_buffer_type valsIn =
2770 subview (imports, pair_type (valsBeg, valsBeg + valsLen));
2772 size_t numBytesOut = 0;
2774 numBytesOut += PackTraits<LO, D>::unpackValue (numEntOut, numEntIn);
2775 TEUCHOS_TEST_FOR_EXCEPTION(
2776 static_cast<size_t> (numEntOut) != numEnt, std::logic_error,
2777 "unpackRow: Expected number of entries " << numEnt
2778 <<
" != actual number of entries " << numEntOut <<
".");
2780 numBytesOut += PackTraits<GO, D>::unpackArray (gidsOut, gidsIn, numEnt);
2781 numBytesOut += PackTraits<ST, D>::unpackArray (valsOut, valsIn, numScalarEnt);
2783 TEUCHOS_TEST_FOR_EXCEPTION(
2784 numBytesOut != numBytes, std::logic_error,
"unpackRow: numBytesOut = " 2785 << numBytesOut <<
" != numBytes = " << numBytes <<
".");
2786 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
2787 TEUCHOS_TEST_FOR_EXCEPTION(
2788 numBytesOut != expectedNumBytes, std::logic_error,
"unpackRow: " 2789 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = " 2790 << expectedNumBytes <<
".");
2795 template<
class Scalar,
class LO,
class GO,
class Node>
2799 const Teuchos::ArrayView<const LO>& exportLIDs,
2800 Teuchos::Array<packet_type>& exports,
2801 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
2802 size_t& constantNumPackets,
2807 using Kokkos::MemoryUnmanaged;
2808 using Kokkos::subview;
2811 typedef typename View<int*, device_type>::HostMirror::execution_space HES;
2813 typedef typename Teuchos::ArrayView<const LO>::size_type size_type;
2814 const bool debug =
false;
2817 std::ostringstream os;
2818 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
2819 os <<
"Proc " << myRank <<
": packAndPrepare: exportLIDs.size() = " 2820 << exportLIDs.size () <<
", numPacketsPerLID.size() = " 2821 << numPacketsPerLID.size () << endl;
2822 std::cerr << os.str ();
2825 if (* (this->localError_)) {
2826 std::ostream& err = this->markLocalErrorAndGetStream ();
2827 err <<
"packAndPrepare: The target object of the Import or Export is " 2828 "already in an error state." << endl;
2832 const this_type* src =
dynamic_cast<const this_type*
> (&source);
2835 std::ostream& err = this->markLocalErrorAndGetStream ();
2836 err <<
"packAndPrepare: The source (input) object of the Import or " 2837 "Export is either not a BlockCrsMatrix, or does not have the right " 2838 "template parameters. checkSizes() should have caught this. " 2839 "Please report this bug to the Tpetra developers." << endl;
2844 const size_t blockSize =
static_cast<size_t> (src->getBlockSize ());
2845 const size_type numExportLIDs = exportLIDs.size ();
2847 if (numExportLIDs != numPacketsPerLID.size ()) {
2848 std::ostream& err = this->markLocalErrorAndGetStream ();
2849 err <<
"packAndPrepare: exportLIDs.size() = " << numExportLIDs
2850 <<
" != numPacketsPerLID.size() = " << numPacketsPerLID.size ()
2863 constantNumPackets = 0;
2868 size_t totalNumBytes = 0;
2869 size_t totalNumEntries = 0;
2870 size_t maxRowLength = 0;
2871 for (size_type i = 0; i < numExportLIDs; ++i) {
2872 const LO lclRow = exportLIDs[i];
2880 if (numEnt == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2883 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2887 size_t numBytesPerValue = 0;
2893 Scalar* valsRaw = NULL;
2894 const LO* lidsRaw = NULL;
2895 LO actualNumEnt = 0;
2897 src->getLocalRowView (lclRow, lidsRaw, valsRaw, actualNumEnt);
2899 if (numEnt != static_cast<size_t> (actualNumEnt)) {
2900 std::ostream& err = this->markLocalErrorAndGetStream ();
2901 err <<
"packAndPrepare: Local row " << i <<
" claims to have " <<
2902 numEnt <<
"entry/ies, but the View returned by getLocalRowView() " 2903 "has " << actualNumEnt <<
" entry/ies. This should never happen. " 2904 "Please report this bug to the Tpetra developers." << endl;
2907 if (errCode == Teuchos::OrdinalTraits<LO>::invalid ()) {
2908 std::ostream& err = this->markLocalErrorAndGetStream ();
2909 err <<
"packAndPrepare: Local row " << i <<
" is not in the row Map " 2910 "of the source object on the calling process." << endl;
2914 const ST* valsRawST =
2915 const_cast<const ST*
> (
reinterpret_cast<ST*
> (valsRaw));
2916 View<const ST*, HES, MemoryUnmanaged> vals (valsRawST, numScalarEnt);
2921 numBytesPerValue = PackTraits<ST, HES>::packValueCount (vals(0));
2924 const size_t numBytes =
2925 packRowCount<LO, GO, HES> (numEnt, numBytesPerValue, blockSize);
2926 numPacketsPerLID[i] = numBytes;
2927 totalNumBytes += numBytes;
2928 totalNumEntries += numEnt;
2929 maxRowLength = std::max (maxRowLength, numEnt);
2933 const int myRank = graph_.
getComm ()->getRank ();
2934 std::ostringstream os;
2935 os <<
"Proc " << myRank <<
": packAndPrepare: totalNumBytes = " 2936 << totalNumBytes << endl;
2937 std::cerr << os.str ();
2943 exports.resize (totalNumBytes);
2944 if (totalNumEntries > 0) {
2945 View<char*, HES, MemoryUnmanaged> exportsK (exports.getRawPtr (),
2960 View<GO*, HES> gblColInds;
2963 gblColInds = PackTraits<GO, HES>::allocateArray (gid, maxRowLength,
"gids");
2967 for (size_type i = 0; i < numExportLIDs; ++i) {
2968 const LO lclRowInd = exportLIDs[i];
2969 const LO* lclColIndsRaw;
2975 (void) src->getLocalRowView (lclRowInd, lclColIndsRaw, valsRaw, numEntLO);
2976 const size_t numEnt =
static_cast<size_t> (numEntLO);
2977 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2978 View<const LO*, HES, MemoryUnmanaged> lclColInds (lclColIndsRaw, numEnt);
2979 const ST* valsRawST =
const_cast<const ST*
> (
reinterpret_cast<ST*
> (valsRaw));
2980 View<const ST*, HES, MemoryUnmanaged> vals (valsRawST, numScalarEnt);
2985 const size_t numBytesPerValue = numEnt == 0 ?
2986 static_cast<size_t> (0) :
2987 PackTraits<ST, HES>::packValueCount (vals(0));
2990 for (
size_t j = 0; j < numEnt; ++j) {
2995 const size_t numBytes =
2996 packRowForBlockCrs<ST, LO, GO, HES> (exportsK, offset, numEnt, gblColInds,
2997 vals, numBytesPerValue, blockSize);
3002 if (offset != totalNumBytes) {
3003 std::ostream& err = this->markLocalErrorAndGetStream ();
3004 err <<
"packAndPreapre: At end of method, the final offset (in bytes) " 3005 << offset <<
" does not equal the total number of bytes packed " 3006 << totalNumBytes <<
". " 3007 <<
"Please report this bug to the Tpetra developers." << endl;
3013 std::ostringstream os;
3014 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
3015 const bool lclSuccess = ! (* (this->localError_));
3016 os <<
"*** Proc " << myRank <<
": packAndPrepare " 3017 << (lclSuccess ?
"succeeded" :
"FAILED")
3018 <<
" (totalNumEntries = " << totalNumEntries <<
") ***" << endl;
3019 std::cerr << os.str ();
3024 template<
class Scalar,
class LO,
class GO,
class Node>
3028 const Teuchos::ArrayView<const packet_type>& imports,
3029 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
3036 using Kokkos::MemoryUnmanaged;
3037 using Kokkos::subview;
3040 typedef typename Teuchos::ArrayView<const LO>::size_type size_type;
3041 typedef typename View<int*, device_type>::HostMirror::execution_space HES;
3042 typedef std::pair<typename View<int*, HES>::size_type,
3043 typename View<int*, HES>::size_type> pair_type;
3044 typedef View<GO*, HES, MemoryUnmanaged> gids_out_type;
3045 typedef View<LO*, HES, MemoryUnmanaged> lids_out_type;
3046 typedef View<ST*, HES, MemoryUnmanaged> vals_out_type;
3047 typedef typename PackTraits<GO, HES>::input_buffer_type input_buffer_type;
3048 const char prefix[] =
"Tpetra::Experimental::BlockCrsMatrix::unpackAndCombine: ";
3049 const bool debug =
false;
3052 std::ostringstream os;
3053 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
3054 os <<
"Proc " << myRank <<
": unpackAndCombine" << endl;
3055 std::cerr << os.str ();
3061 if (* (this->localError_)) {
3062 std::ostream& err = this->markLocalErrorAndGetStream ();
3063 err << prefix <<
"The target object of the Import or Export is " 3064 "already in an error state." << endl;
3067 if (importLIDs.size () != numPacketsPerLID.size ()) {
3068 std::ostream& err = this->markLocalErrorAndGetStream ();
3069 err << prefix <<
"importLIDs.size() = " << importLIDs.size () <<
" != " 3070 "numPacketsPerLID.size() = " << numPacketsPerLID.size () <<
"." << endl;
3074 std::ostream& err = this->markLocalErrorAndGetStream ();
3075 err << prefix <<
"Invalid CombineMode value " << CM <<
". Valid " 3076 <<
"values include ADD, INSERT, REPLACE, ABSMAX, and ZERO." 3086 const size_type numImportLIDs = importLIDs.size ();
3087 if (CM ==
ZERO || numImportLIDs == 0) {
3089 std::ostringstream os;
3090 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
3091 os <<
"Proc " << myRank <<
": unpackAndCombine: Nothing to do" << endl;
3092 std::cerr << os.str ();
3098 std::ostringstream os;
3099 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
3100 os <<
"Proc " << myRank <<
": unpackAndCombine: Getting ready" << endl;
3101 std::cerr << os.str ();
3104 input_buffer_type importsK (imports.getRawPtr (), imports.size ());
3107 const size_t maxRowNumScalarEnt = maxRowNumEnt * blockSize * blockSize;
3109 size_t numBytesPerValue;
3121 numBytesPerValue = PackTraits<ST, HES>::packValueCount (val);
3128 View<GO*, HES> gblColInds;
3129 View<LO*, HES> lclColInds;
3130 View<ST*, HES> vals;
3144 gblColInds = PackTraits<GO, HES>::allocateArray (gid, maxRowNumEnt,
"gids");
3145 lclColInds = PackTraits<LO, HES>::allocateArray (lid, maxRowNumEnt,
"lids");
3146 vals = PackTraits<ST, HES>::allocateArray (val, maxRowNumScalarEnt,
"vals");
3150 bool errorDuringUnpack =
false;
3151 for (size_type i = 0; i < numImportLIDs; ++i) {
3152 const size_t numBytes = numPacketsPerLID[i];
3153 if (numBytes == 0) {
3156 const size_t numEnt =
3157 unpackRowCount<ST, LO, GO, HES> (importsK, offset, numBytes,
3159 if (numEnt > maxRowNumEnt) {
3160 errorDuringUnpack =
true;
3161 #ifdef HAVE_TPETRA_DEBUG 3162 std::ostream& err = this->markLocalErrorAndGetStream ();
3163 err << prefix <<
"At i = " << i <<
", numEnt = " << numEnt
3164 <<
" > maxRowNumEnt = " << maxRowNumEnt << endl;
3165 #endif // HAVE_TPETRA_DEBUG 3169 const size_t numScalarEnt = numEnt * blockSize * blockSize;
3170 const LO lclRow = importLIDs[i];
3172 gids_out_type gidsOut = subview (gblColInds, pair_type (0, numEnt));
3173 vals_out_type valsOut = subview (vals, pair_type (0, numScalarEnt));
3175 const size_t numBytesOut =
3176 unpackRowForBlockCrs<ST, LO, GO, HES> (gidsOut, valsOut, importsK,
3177 offset, numBytes, numEnt,
3178 numBytesPerValue, blockSize);
3179 if (numBytes != numBytesOut) {
3180 errorDuringUnpack =
true;
3181 #ifdef HAVE_TPETRA_DEBUG 3182 std::ostream& err = this->markLocalErrorAndGetStream ();
3183 err << prefix <<
"At i = " << i <<
", numBytes = " << numBytes
3184 <<
" != numBytesOut = " << numBytesOut <<
".";
3185 #endif // HAVE_TPETRA_DEBUG 3190 lids_out_type lidsOut = subview (lclColInds, pair_type (0, numEnt));
3191 for (
size_t k = 0; k < numEnt; ++k) {
3193 if (lidsOut(k) == Teuchos::OrdinalTraits<LO>::invalid ()) {
3194 errorDuringUnpack =
true;
3195 #ifdef HAVE_TPETRA_DEBUG 3196 std::ostream& err = this->markLocalErrorAndGetStream ();
3197 err << prefix <<
"At i = " << i <<
", GID " << gidsOut(k)
3198 <<
" is not owned by the calling process.";
3199 #endif // HAVE_TPETRA_DEBUG 3206 const LO*
const lidsRaw =
const_cast<const LO*
> (lidsOut.ptr_on_device ());
3207 const Scalar*
const valsRaw =
3208 reinterpret_cast<const Scalar*
> (
const_cast<const ST*
> (valsOut.ptr_on_device ()));
3213 }
else if (CM ==
ABSMAX) {
3217 if (static_cast<LO> (numEnt) != numCombd) {
3218 errorDuringUnpack =
true;
3219 #ifdef HAVE_TPETRA_DEBUG 3220 std::ostream& err = this->markLocalErrorAndGetStream ();
3221 err << prefix <<
"At i = " << i <<
", numEnt = " << numEnt
3222 <<
" != numCombd = " << numCombd <<
".";
3223 #endif // HAVE_TPETRA_DEBUG 3231 if (errorDuringUnpack) {
3232 std::ostream& err = this->markLocalErrorAndGetStream ();
3233 err << prefix <<
"Unpacking failed.";
3234 #ifndef HAVE_TPETRA_DEBUG 3235 err <<
" Please run again with a debug build to get more verbose " 3236 "diagnostic output.";
3237 #endif // ! HAVE_TPETRA_DEBUG 3242 std::ostringstream os;
3243 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
3244 const bool lclSuccess = ! (* (this->localError_));
3245 os <<
"*** Proc " << myRank <<
": unpackAndCombine " 3246 << (lclSuccess ?
"succeeded" :
"FAILED")
3248 std::cerr << os.str ();
3253 template<
class Scalar,
class LO,
class GO,
class Node>
3257 using Teuchos::TypeNameTraits;
3258 std::ostringstream os;
3259 os <<
"\"Tpetra::BlockCrsMatrix\": { " 3260 <<
"Template parameters: { " 3261 <<
"Scalar: " << TypeNameTraits<Scalar>::name ()
3262 <<
"LO: " << TypeNameTraits<LO>::name ()
3263 <<
"GO: " << TypeNameTraits<GO>::name ()
3264 <<
"Node: " << TypeNameTraits<Node>::name ()
3266 <<
", Label: \"" << this->getObjectLabel () <<
"\"" 3267 <<
", Global dimensions: [" 3268 << graph_.
getDomainMap ()->getGlobalNumElements () <<
", " 3269 << graph_.
getRangeMap ()->getGlobalNumElements () <<
"]" 3276 template<
class Scalar,
class LO,
class GO,
class Node>
3280 const Teuchos::EVerbosityLevel verbLevel)
const 3282 using Teuchos::ArrayRCP;
3283 using Teuchos::CommRequest;
3284 using Teuchos::FancyOStream;
3285 using Teuchos::getFancyOStream;
3286 using Teuchos::ireceive;
3287 using Teuchos::isend;
3288 using Teuchos::outArg;
3289 using Teuchos::TypeNameTraits;
3290 using Teuchos::VERB_DEFAULT;
3291 using Teuchos::VERB_NONE;
3292 using Teuchos::VERB_LOW;
3293 using Teuchos::VERB_MEDIUM;
3295 using Teuchos::VERB_EXTREME;
3297 using Teuchos::wait;
3299 #ifdef HAVE_TPETRA_DEBUG 3300 const char prefix[] =
"Tpetra::Experimental::BlockCrsMatrix::describe: ";
3301 #endif // HAVE_TPETRA_DEBUG 3304 const Teuchos::EVerbosityLevel vl =
3305 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
3307 if (vl == VERB_NONE) {
3312 Teuchos::OSTab tab0 (out);
3314 out <<
"\"Tpetra::BlockCrsMatrix\":" << endl;
3315 Teuchos::OSTab tab1 (out);
3317 out <<
"Template parameters:" << endl;
3319 Teuchos::OSTab tab2 (out);
3320 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
3321 <<
"LO: " << TypeNameTraits<LO>::name () << endl
3322 <<
"GO: " << TypeNameTraits<GO>::name () << endl
3323 <<
"Node: " << TypeNameTraits<Node>::name () << endl;
3325 out <<
"Label: \"" << this->getObjectLabel () <<
"\"" << endl
3326 <<
"Global dimensions: [" 3327 << graph_.
getDomainMap ()->getGlobalNumElements () <<
", " 3328 << graph_.
getRangeMap ()->getGlobalNumElements () <<
"]" << endl;
3331 out <<
"Block size: " << blockSize << endl;
3334 if (vl >= VERB_MEDIUM) {
3335 const Teuchos::Comm<int>& comm = * (graph_.
getMap ()->getComm ());
3336 const int myRank = comm.getRank ();
3338 out <<
"Row Map:" << endl;
3345 out <<
"Column Map: same as row Map" << endl;
3350 out <<
"Column Map:" << endl;
3358 out <<
"Domain Map: same as row Map" << endl;
3363 out <<
"Domain Map: same as column Map" << endl;
3368 out <<
"Domain Map:" << endl;
3376 out <<
"Range Map: same as domain Map" << endl;
3381 out <<
"Range Map: same as row Map" << endl;
3386 out <<
"Range Map: " << endl;
3393 if (vl >= VERB_EXTREME) {
3399 const_cast<this_type*
> (
this)->
template sync<Kokkos::HostSpace> ();
3401 #ifdef HAVE_TPETRA_DEBUG 3402 TEUCHOS_TEST_FOR_EXCEPTION
3403 (this->
template need_sync<Kokkos::HostSpace> (), std::logic_error,
3404 prefix <<
"Right after sync to host, the matrix claims that it needs " 3405 "sync to host. Please report this bug to the Tpetra developers.");
3406 #endif // HAVE_TPETRA_DEBUG 3408 const Teuchos::Comm<int>& comm = * (graph_.
getMap ()->getComm ());
3409 const int myRank = comm.getRank ();
3410 const int numProcs = comm.getSize ();
3413 RCP<std::ostringstream> lclOutStrPtr (
new std::ostringstream ());
3414 RCP<FancyOStream> osPtr = getFancyOStream (lclOutStrPtr);
3415 FancyOStream& os = *osPtr;
3416 os <<
"Process " << myRank <<
":" << endl;
3417 Teuchos::OSTab tab2 (os);
3425 os <<
"Row " << meshGblRow <<
": {";
3427 const LO* lclColInds = NULL;
3428 Scalar* vals = NULL;
3432 for (LO k = 0; k < numInds; ++k) {
3435 os <<
"Col " << gblCol <<
": [";
3436 for (LO i = 0; i < blockSize; ++i) {
3437 for (LO j = 0; j < blockSize; ++j) {
3438 os << vals[blockSize*blockSize*k + i*blockSize + j];
3439 if (j + 1 < blockSize) {
3443 if (i + 1 < blockSize) {
3448 if (k + 1 < numInds) {
3458 out << lclOutStrPtr->str ();
3459 lclOutStrPtr = Teuchos::null;
3462 const int sizeTag = 1337;
3463 const int dataTag = 1338;
3465 ArrayRCP<char> recvDataBuf;
3469 for (
int p = 1; p < numProcs; ++p) {
3472 ArrayRCP<size_t> recvSize (1);
3474 RCP<CommRequest<int> > recvSizeReq =
3475 ireceive<int, size_t> (recvSize, p, sizeTag, comm);
3476 wait<int> (comm, outArg (recvSizeReq));
3477 const size_t numCharsToRecv = recvSize[0];
3484 if (static_cast<size_t>(recvDataBuf.size()) < numCharsToRecv + 1) {
3485 recvDataBuf.resize (numCharsToRecv + 1);
3487 ArrayRCP<char> recvData = recvDataBuf.persistingView (0, numCharsToRecv);
3489 RCP<CommRequest<int> > recvDataReq =
3490 ireceive<int, char> (recvData, p, dataTag, comm);
3491 wait<int> (comm, outArg (recvDataReq));
3496 recvDataBuf[numCharsToRecv] =
'\0';
3497 out << recvDataBuf.getRawPtr ();
3499 else if (myRank == p) {
3503 const std::string stringToSend = lclOutStrPtr->str ();
3504 lclOutStrPtr = Teuchos::null;
3507 const size_t numCharsToSend = stringToSend.size ();
3508 ArrayRCP<size_t> sendSize (1);
3509 sendSize[0] = numCharsToSend;
3510 RCP<CommRequest<int> > sendSizeReq =
3511 isend<int, size_t> (sendSize, 0, sizeTag, comm);
3512 wait<int> (comm, outArg (sendSizeReq));
3520 ArrayRCP<const char> sendData (&stringToSend[0], 0, numCharsToSend,
false);
3521 RCP<CommRequest<int> > sendDataReq =
3522 isend<int, char> (sendData, 0, dataTag, comm);
3523 wait<int> (comm, outArg (sendDataReq));
3529 template<
class Scalar,
class LO,
class GO,
class Node>
3530 Teuchos::RCP<const Teuchos::Comm<int> >
3537 template<
class Scalar,
class LO,
class GO,
class Node>
3546 template<
class Scalar,
class LO,
class GO,
class Node>
3554 template<
class Scalar,
class LO,
class GO,
class Node>
3562 template<
class Scalar,
class LO,
class GO,
class Node>
3570 template<
class Scalar,
class LO,
class GO,
class Node>
3578 template<
class Scalar,
class LO,
class GO,
class Node>
3586 template<
class Scalar,
class LO,
class GO,
class Node>
3594 template<
class Scalar,
class LO,
class GO,
class Node>
3602 template<
class Scalar,
class LO,
class GO,
class Node>
3610 template<
class Scalar,
class LO,
class GO,
class Node>
3618 template<
class Scalar,
class LO,
class GO,
class Node>
3626 template<
class Scalar,
class LO,
class GO,
class Node>
3634 template<
class Scalar,
class LO,
class GO,
class Node>
3642 template<
class Scalar,
class LO,
class GO,
class Node>
3650 template<
class Scalar,
class LO,
class GO,
class Node>
3658 template<
class Scalar,
class LO,
class GO,
class Node>
3666 template<
class Scalar,
class LO,
class GO,
class Node>
3675 template<
class Scalar,
class LO,
class GO,
class Node>
3679 const Teuchos::ArrayView<GO> &Indices,
3680 const Teuchos::ArrayView<Scalar> &Values,
3681 size_t &NumEntries)
const 3683 TEUCHOS_TEST_FOR_EXCEPTION(
3684 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::getGlobalRowCopy: " 3685 "This class doesn't support global matrix indexing.");
3689 template<
class Scalar,
class LO,
class GO,
class Node>
3693 Teuchos::ArrayView<const GO> &indices,
3694 Teuchos::ArrayView<const Scalar> &values)
const 3696 TEUCHOS_TEST_FOR_EXCEPTION(
3697 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::getGlobalRowView: " 3698 "This class doesn't support global matrix indexing.");
3702 template<
class Scalar,
class LO,
class GO,
class Node>
3706 Teuchos::ArrayView<const LO> &indices,
3707 Teuchos::ArrayView<const Scalar> &values)
const 3709 TEUCHOS_TEST_FOR_EXCEPTION(
3710 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::getLocalRowView: " 3711 "This class doesn't support local matrix indexing.");
3716 template<
class Scalar,
class LO,
class GO,
class Node>
3721 #ifdef HAVE_TPETRA_DEBUG 3722 const char prefix[] =
3723 "Tpetra::Experimental::BlockCrsMatrix::getLocalDiagCopy: ";
3724 #endif // HAVE_TPETRA_DEBUG 3728 Kokkos::View<size_t*, device_type> diagOffsets (
"diagOffsets", lclNumMeshRows);
3732 auto diagOffsetsHost = Kokkos::create_mirror_view (diagOffsets);
3735 diag.template modify<typename decltype (diagOffsetsHost)::memory_space> ();
3737 #ifdef HAVE_TPETRA_DEBUG 3738 TEUCHOS_TEST_FOR_EXCEPTION
3739 (this->
template need_sync<Kokkos::HostSpace> (), std::runtime_error,
3740 prefix <<
"The matrix's data were last modified on device, but have " 3741 "not been sync'd to host. Please sync to host (by calling " 3742 "sync<Kokkos::HostSpace>() on this matrix) before calling this " 3744 #endif // HAVE_TPETRA_DEBUG 3750 auto vals_host_out =
3751 const_cast<this_type*
> (
this)->
template getValues<Kokkos::HostSpace> ();
3752 Scalar* vals_host_out_raw =
3753 reinterpret_cast<Scalar*
> (vals_host_out.ptr_on_device ());
3756 size_t rowOffset = 0;
3762 offset = rowOffset + diagOffsetsHost(r)*bs*bs;
3763 for(
int b=0; b<bs; b++)
3771 diag.template sync<memory_space> ();
3774 template<
class Scalar,
class LO,
class GO,
class Node>
3779 TEUCHOS_TEST_FOR_EXCEPTION(
3780 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::leftScale: " 3781 "not implemented.");
3785 template<
class Scalar,
class LO,
class GO,
class Node>
3790 TEUCHOS_TEST_FOR_EXCEPTION(
3791 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::rightScale: " 3792 "not implemented.");
3796 template<
class Scalar,
class LO,
class GO,
class Node>
3797 Teuchos::RCP<const Tpetra::RowGraph<LO, GO, Node> >
3804 template<
class Scalar,
class LO,
class GO,
class Node>
3809 TEUCHOS_TEST_FOR_EXCEPTION(
3810 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::getFrobeniusNorm: " 3811 "not implemented.");
3822 #define TPETRA_EXPERIMENTAL_BLOCKCRSMATRIX_INSTANT(S,LO,GO,NODE) \ 3823 template class Experimental::BlockCrsMatrix< S, LO, GO, NODE >; 3825 #endif // TPETRA_EXPERIMENTAL_BLOCKCRSMATRIX_DEF_HPP virtual Tpetra::RowMatrix< Scalar, LO, GO, Node >::mag_type getFrobeniusNorm() const
The Frobenius norm of the matrix.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
LO absMaxLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Like sumIntoLocalValues, but for the ABSMAX combine mode.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
LO replaceLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like replaceLocalValues, but avoids computing row offsets.
virtual void copyAndPermute(const Tpetra::SrcDistObject &source, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Perform copies and permutations that are local to this process.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
void apply(const mv_type &X, mv_type &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
For this matrix A, compute Y := beta * Y + alpha * Op(A) * X.
virtual size_t getNodeNumEntries() const
The local number of stored (structurally nonzero) entries.
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
Kokkos::View< impl_scalar_type *, Kokkos::LayoutRight, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > > little_vec_type
The type used to access nonconst vector blocks.
size_t getNodeNumEntries() const
Returns the local number of entries in the graph.
LO sumIntoLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Sum into values at the given (mesh, i.e., block) column indices, in the given (mesh, i.e., block) row.
mv_type mv_
The Tpetra::MultiVector used to represent the data.
LO getBlockSize() const
Get the number of degrees of freedom per mesh point.
GlobalOrdinal getIndexBase() const
Returns the index base for global indices for this graph.
virtual size_t getNumEntriesInGlobalRow(GO globalRow) const
The current number of entries on the calling process in the specified global row. ...
global_size_t getGlobalNumRows() const
get the global number of block rows
global_size_t getGlobalNumEntries() const
Returns the global number of entries in the graph.
void putScalar(const Scalar &val)
Fill all entries with the given value val.
Teuchos::RCP< const map_type > getRowMap() const
Returns the Map that describes the row distribution in this graph.
Kokkos::View< impl_scalar_type **, Kokkos::LayoutRight, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > > little_block_type
The type used to access nonconst matrix blocks.
std::string errorMessages() const
The current stream of error messages.
void applyBlock(const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar beta=Teuchos::ScalarTraits< Scalar >::zero())
Version of apply() that takes BlockMultiVector input and output.
LocalOrdinal getMaxLocalIndex() const
The maximum local index on the calling process.
Traits class for packing / unpacking data of type T, using Kokkos data structures that live in the gi...
Teuchos::RCP< const map_type > getRangeMap() const
Returns the Map associated with the domain of this graph.
One or more distributed dense vectors.
Node::device_type device_type
The Kokkos::Device specialization that this class uses.
MultiVector for multiple degrees of freedom per mesh point.
size_t getNumEntriesInLocalRow(const LO localRowInd) const
Return the number of entries in the given row on the calling process.
global_size_t getGlobalNumRows() const
Returns the number of global rows in the graph.
void deep_copy(MultiVector< DS, DL, DG, DN, dstClassic > &dst, const MultiVector< SS, SL, SG, SN, srcClassic > &src)
Copy the contents of the MultiVector src into dst.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
virtual Teuchos::RCP< const Tpetra::RowGraph< LO, GO, Node > > getGraph() const
Get the (mesh) graph.
virtual void getGlobalRowCopy(GO GlobalRow, const Teuchos::ArrayView< GO > &Indices, const Teuchos::ArrayView< Scalar > &Values, size_t &NumEntries) const
Get a copy of the given global row's entries.
virtual global_size_t getGlobalNumCols() const
The global number of columns of this matrix.
size_t getNodeNumDiags() const
Returns the number of local diagonal entries, based on global row/column index comparisons.
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
std::string description() const
One-line description of this object.
LO sumIntoLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like sumIntoLocalValues, but avoids computing row offsets.
size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const
Returns the current number of entries on this node in the specified global row.
virtual size_t getNodeNumDiags() const
The number of local diagonal entries, based on global row/column index comparisons.
bool isFillComplete() const
Returns true if fillComplete() has been called and the graph is in compute mode.
LO getLocalRowView(const LO localRowInd, const LO *&colInds, Scalar *&vals, LO &numInds) const
Get a view of the (mesh, i.e., block) row, using local (mesh, i.e., block) indices.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Returns the communicator.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the graph.
bool locallySameAs(const Map< LocalOrdinal, GlobalOrdinal, node_type > &map) const
Is this Map locally the same as the input Map?
virtual bool isGloballyIndexed() const
Whether matrix indices are globally indexed.
LocalOrdinal getLocalElement(GlobalOrdinal globalIndex) const
The local index corresponding to the given global index.
int local_ordinal_type
Default value of LocalOrdinal template parameter.
virtual bool isLowerTriangular() const
Whether this matrix is lower triangular.
virtual global_size_t getGlobalNumDiags() const
The number of global diagonal entries, based on global row/column index comparisons.
void scale(const Scalar &val)
Multiply all entries in place by the given value val.
virtual bool checkSizes(const Tpetra::SrcDistObject &source)
Compare the source and target (this) objects for compatibility.
Kokkos::View< char *, D, Kokkos::MemoryUnmanaged > output_buffer_type
The type of an output buffer of bytes.
size_t getNodeNumCols() const
Returns the number of columns connected to the locally owned rows of this graph.
Teuchos::RCP< node_type > getNode() const
Returns the underlying node.
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
size_t getNodeNumElements() const
The number of elements belonging to the calling process.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
Teuchos::RCP< const map_type > getDomainMap() const
Returns the Map associated with the domain of this graph.
virtual size_t getNodeNumCols() const
The number of columns needed to apply the forward operator on this node.
size_t global_size_t
Global size_t object.
device_type::execution_space execution_space
The Kokkos execution space that this class uses.
Insert new values that don't currently exist.
Teuchos::RCP< const import_type > getImporter() const
Returns the importer associated with this graph.
BlockCrsMatrix()
Default constructor: Makes an empty block matrix.
LO getLocalRowOffsets(const LO localRowInd, ptrdiff_t offsets[], const LO colInds[], const LO numColInds) const
Get relative offsets corresponding to the given rows, given by local row index.
void reorderedGaussSeidelCopy(MultiVector< Scalar, LO, GO, Node > &X, const MultiVector< Scalar, LO, GO, Node > &B, const MultiVector< Scalar, LO, GO, Node > &D, const Teuchos::ArrayView< LO > &rowIndices, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps, const bool zeroInitialGuess) const
Version of reorderedGaussSeidel(), with fewer requirements on X.
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
ESweepDirection
Sweep direction for Gauss-Seidel or Successive Over-Relaxation (SOR).
LO absMaxLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like sumIntoLocalValuesByOffsets, but for the ABSMAX combine mode.
LO replaceLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Replace values at the given (mesh, i.e., block) column indices, in the given (mesh, i.e., block) row.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Teuchos::RCP< const export_type > getExporter() const
Returns the exporter associated with this graph.
virtual Teuchos::RCP< Node > getNode() const
The Kokkos Node instance.
Sets up and executes a communication plan for a Tpetra DistObject.
void gaussSeidelCopy(MultiVector< Scalar, LO, GO, Node > &X, const MultiVector< Scalar, LO, GO, Node > &B, const MultiVector< Scalar, LO, GO, Node > &D, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps, const bool zeroInitialGuess) const
Version of gaussSeidel(), with fewer requirements on X.
CombineMode
Rule for combining data in an Import or Export.
Sum new values into existing values.
size_t getNodeMaxNumRowEntries() const
The maximum number of entries across all rows/columns on this node.
Tpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node >::mag_type mag_type
Type of a norm result.
Kokkos::View< const value_type *, D, Kokkos::MemoryUnmanaged > input_array_type
The type of an input array of value_type.
virtual void leftScale(const Vector< Scalar, LO, GO, Node > &x)
Scale the RowMatrix on the left with the given Vector x.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Kokkos::View< const char *, D, Kokkos::MemoryUnmanaged > input_buffer_type
The type of an input buffer of bytes.
Replace old value with maximum of magnitudes of old and new values.
GlobalOrdinal getGlobalElement(LocalOrdinal localIndex) const
The global index corresponding to the given local index.
Abstract base class for objects that can be the source of an Import or Export operation.
virtual size_t getGlobalMaxNumRowEntries() const
The maximum number of entries across all rows/columns on all nodes.
double scalar_type
Default value of Scalar template parameter.
size_t getNodeNumRows() const
Returns the number of graph rows owned on the calling node.
virtual bool supportsRowViews() const
Whether this object implements getLocalRowView() and getGlobalRowView().
Replace existing values with new values.
void getLocalRowCopy(LO LocalRow, const Teuchos::ArrayView< LO > &Indices, const Teuchos::ArrayView< Scalar > &Values, size_t &NumEntries) const
Not implemented.
Teuchos::RCP< const map_type > getRowMap() const
get the (mesh) map for the rows of this block matrix.
Kokkos::View< const impl_scalar_type **, Kokkos::LayoutRight, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > > const_little_block_type
The type used to access const matrix blocks.
bool isLocallyIndexed() const
If graph indices are in the local range, this function returns true. Otherwise, this function returns...
Replace old values with zero.
size_t getNodeMaxNumRowEntries() const
Maximum number of entries in all rows owned by the calling process.
mv_type getMultiVectorView() const
Get a Tpetra::MultiVector that views this BlockMultiVector's data.
bool isUpperTriangular() const
Whether the graph is locally upper triangular.
virtual bool hasColMap() const
Whether this matrix has a well-defined column map.
size_t getGlobalMaxNumRowEntries() const
Maximum number of entries in all rows over all processes.
Kokkos::Details::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
bool isLowerTriangular() const
Whether the graph is locally lower triangular.
virtual bool isUpperTriangular() const
Whether this matrix is upper triangular.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the matrix.
bool isGloballyIndexed() const
If graph indices are in the global range, this function returns true. Otherwise, this function return...
virtual global_size_t getGlobalNumEntries() const
The global number of stored (structurally nonzero) entries.
void replaceLocalValue(const LocalOrdinal myRow, const Scalar &value) const
Replace current value at the specified location with specified values.
virtual void rightScale(const Vector< Scalar, LO, GO, Node > &x)
Scale the RowMatrix on the right with the given Vector x.
virtual Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which this matrix is distributed.
virtual void getGlobalRowView(GO GlobalRow, Teuchos::ArrayView< const GO > &indices, Teuchos::ArrayView< const Scalar > &values) const
Get a constant, nonpersisting, globally indexed view of the given row of the matrix.
Teuchos::RCP< const map_type > getColMap() const
Returns the Map that describes the column distribution in this graph.
size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const
Get the number of entries in the given row (local index).
virtual GO getIndexBase() const
The index base for global indices in this matrix.
void localGaussSeidel(const BlockMultiVector< Scalar, LO, GO, Node > &Residual, BlockMultiVector< Scalar, LO, GO, Node > &Solution, const Kokkos::View< impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D_inv, const Scalar &omega, const ESweepDirection direction) const
Local Gauss-Seidel solve, given a factorized diagonal.
void getLocalDiagCopy(const Kokkos::View< impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &diag, const Kokkos::View< const size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Variant of getLocalDiagCopy() that uses precomputed offsets and puts diagonal blocks in a 3-D Kokkos:...
little_vec_type::HostMirror getLocalBlock(const LO localRowIndex, const LO colIndex) const
Get a host view of the degrees of freedom at the given mesh point.
Kokkos::View< value_type *, D, Kokkos::MemoryUnmanaged > output_array_type
The type of an output array of value_type.
A distributed dense vector.
Sparse matrix whose entries are small dense square blocks, all of the same dimensions.
device_type::memory_space memory_space
The Kokkos memory space that this class uses.
void doExport(const SrcDistObject &source, const Export< LO, GO, Node > &exporter, CombineMode CM)
Export data into this object using an Export object ("forward mode").
Teuchos::RCP< const map_type > getDomainMap() const
Get the (point) domain Map of this matrix.
virtual bool isFillComplete() const
Whether fillComplete() has been called.
Teuchos::DataAccess getCopyOrView() const
Get whether this has copy (copyOrView = Teuchos::Copy) or view (copyOrView = Teuchos::View) semantics...
BlockMultiVector< Scalar, LO, GO, Node >::impl_scalar_type impl_scalar_type
The implementation type of entries in the matrix.
LO getBlockSize() const
The number of degrees of freedom per mesh point.
Teuchos::RCP< const map_type > getColMap() const
get the (mesh) map for the columns of this block matrix.
Declaration of Tpetra::Experimental::BlockCrsMatrix.
bool isNodeLocalElement(LocalOrdinal localIndex) const
Whether the given local index is valid for this Map on the calling process.
global_size_t getGlobalNumCols() const
Returns the number of global columns in the graph.
Teuchos::RCP< const map_type > getRangeMap() const
Get the (point) range Map of this matrix.
bool hasColMap() const
Whether the graph has a column Map.
global_size_t getGlobalNumDiags() const
Returns the number of global diagonal entries, based on global row/column index comparisons.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const
Print a description of this object to the given output stream.
LocalOrdinal getMinLocalIndex() const
The minimum local index.
size_t getNodeNumRows() const
get the local number of block rows
virtual bool isLocallyIndexed() const
Whether matrix indices are locally indexed.
local_graph_type getLocalGraph() const
Get the local graph.
LO getNumVectors() const
Get the number of columns (vectors) in the BlockMultiVector.