43 #ifndef IFPACK2_RELAXATION_DEF_HPP 44 #define IFPACK2_RELAXATION_DEF_HPP 46 #include "Teuchos_StandardParameterEntryValidators.hpp" 47 #include "Teuchos_TimeMonitor.hpp" 48 #include "Tpetra_CrsMatrix.hpp" 49 #include "Tpetra_Experimental_BlockCrsMatrix.hpp" 51 #include "Ifpack2_Relaxation_decl.hpp" 53 #ifdef HAVE_IFPACK2_EXPERIMENTAL_KOKKOSKERNELS_FEATURES 54 # include "KokkosKernels_GaussSeidel.hpp" 55 #endif // HAVE_IFPACK2_EXPERIMENTAL_KOKKOSKERNELS_FEATURES 57 #ifdef HAVE_IFPACK2_DUMP_MTX_MATRIX 58 # include "MatrixMarket_Tpetra.hpp" 69 class NonnegativeIntValidator :
public Teuchos::ParameterEntryValidator {
72 NonnegativeIntValidator () {}
75 Teuchos::ParameterEntryValidator::ValidStringsList validStringValues ()
const {
81 validate (
const Teuchos::ParameterEntry& entry,
82 const std::string& paramName,
83 const std::string& sublistName)
const 86 Teuchos::any anyVal = entry.getAny (
true);
87 const std::string entryName = entry.getAny (
false).typeName ();
89 TEUCHOS_TEST_FOR_EXCEPTION(
90 anyVal.type () !=
typeid (int),
91 Teuchos::Exceptions::InvalidParameterType,
92 "Parameter \"" << paramName <<
"\" in sublist \"" << sublistName
93 <<
"\" has the wrong type." << endl <<
"Parameter: " << paramName
94 << endl <<
"Type specified: " << entryName << endl
95 <<
"Type required: int" << endl);
97 const int val = Teuchos::any_cast<
int> (anyVal);
98 TEUCHOS_TEST_FOR_EXCEPTION(
99 val < 0, Teuchos::Exceptions::InvalidParameterValue,
100 "Parameter \"" << paramName <<
"\" in sublist \"" << sublistName
101 <<
"\" is negative." << endl <<
"Parameter: " << paramName
102 << endl <<
"Value specified: " << val << endl
103 <<
"Required range: [0, INT_MAX]" << endl);
107 const std::string getXMLTypeName ()
const {
108 return "NonnegativeIntValidator";
113 printDoc (
const std::string& docString,
114 std::ostream &out)
const 116 Teuchos::StrUtils::printLines (out,
"# ", docString);
117 out <<
"#\tValidator Used: " << std::endl;
118 out <<
"#\t\tNonnegativeIntValidator" << std::endl;
125 template<class Scalar, const bool isOrdinal=Teuchos::ScalarTraits<Scalar>::isOrdinal>
129 static const Scalar eps ();
133 template<
class Scalar>
134 class SmallTraits<Scalar, true> {
136 static const Scalar eps () {
137 return Teuchos::ScalarTraits<Scalar>::one ();
142 template<
class Scalar>
143 class SmallTraits<Scalar, false> {
145 static const Scalar eps () {
146 return Teuchos::ScalarTraits<Scalar>::eps ();
153 template<
class MatrixType>
154 void Relaxation<MatrixType>::
155 setMatrix (
const Teuchos::RCP<const row_matrix_type>& A)
157 if (A.getRawPtr () != A_.getRawPtr ()) {
158 Importer_ = Teuchos::null;
159 Diagonal_ = Teuchos::null;
160 isInitialized_ =
false;
162 diagOffsets_ = Kokkos::View<size_t*, typename node_type::device_type> ();
163 savedDiagOffsets_ =
false;
164 hasBlockCrsMatrix_ =
false;
165 if (! A.is_null ()) {
166 IsParallel_ = (A->getRowMap ()->getComm ()->getSize () > 1);
173 template<
class MatrixType>
180 DampingFactor_ (STS::one ()),
181 IsParallel_ ((A.is_null () || A->getRowMap ().is_null () || A->getRowMap ()->getComm ().is_null ()) ?
183 A->getRowMap ()->getComm ()->getSize () > 1),
184 ZeroStartingSolution_ (true),
185 DoBackwardGS_ (false),
188 MinDiagonalValue_ (STS::zero ()),
189 fixTinyDiagEntries_ (false),
190 checkDiagEntries_ (false),
191 isInitialized_ (false),
196 InitializeTime_ (0.0),
201 globalMinMagDiagEntryMag_ (STM::zero ()),
202 globalMaxMagDiagEntryMag_ (STM::zero ()),
203 globalNumSmallDiagEntries_ (0),
204 globalNumZeroDiagEntries_ (0),
205 globalNumNegDiagEntries_ (0),
207 savedDiagOffsets_ (false),
208 hasBlockCrsMatrix_ (false)
210 this->setObjectLabel (
"Ifpack2::Relaxation");
214 template<
class MatrixType>
218 template<
class MatrixType>
219 Teuchos::RCP<const Teuchos::ParameterList>
222 using Teuchos::Array;
223 using Teuchos::ParameterList;
224 using Teuchos::parameterList;
227 using Teuchos::rcp_const_cast;
228 using Teuchos::rcp_implicit_cast;
229 using Teuchos::setStringToIntegralParameter;
230 typedef Teuchos::ParameterEntryValidator PEV;
232 if (validParams_.is_null ()) {
233 RCP<ParameterList> pl = parameterList (
"Ifpack2::Relaxation");
237 Array<std::string> precTypes (5);
238 precTypes[0] =
"Jacobi";
239 precTypes[1] =
"Gauss-Seidel";
240 precTypes[2] =
"Symmetric Gauss-Seidel";
241 precTypes[3] =
"MT Gauss-Seidel";
242 precTypes[4] =
"MT Symmetric Gauss-Seidel";
243 Array<Details::RelaxationType> precTypeEnums (5);
244 precTypeEnums[0] = Details::JACOBI;
245 precTypeEnums[1] = Details::GS;
246 precTypeEnums[2] = Details::SGS;
247 precTypeEnums[3] = Details::MTGS;
248 precTypeEnums[4] = Details::MTSGS;
249 const std::string defaultPrecType (
"Jacobi");
250 setStringToIntegralParameter<Details::RelaxationType> (
"relaxation: type",
251 defaultPrecType,
"Relaxation method", precTypes (), precTypeEnums (),
254 const int numSweeps = 1;
255 RCP<PEV> numSweepsValidator =
256 rcp_implicit_cast<PEV> (rcp (
new NonnegativeIntValidator));
257 pl->set (
"relaxation: sweeps", numSweeps,
"Number of relaxation sweeps",
258 rcp_const_cast<const PEV> (numSweepsValidator));
261 pl->set (
"relaxation: damping factor", dampingFactor);
263 const bool zeroStartingSolution =
true;
264 pl->set (
"relaxation: zero starting solution", zeroStartingSolution);
266 const bool doBackwardGS =
false;
267 pl->set (
"relaxation: backward mode", doBackwardGS);
269 const bool doL1Method =
false;
270 pl->set (
"relaxation: use l1", doL1Method);
272 const magnitude_type l1eta = (STM::one() + STM::one() + STM::one()) /
273 (STM::one() + STM::one());
274 pl->set (
"relaxation: l1 eta", l1eta);
277 pl->set (
"relaxation: min diagonal value", minDiagonalValue);
279 const bool fixTinyDiagEntries =
false;
280 pl->set (
"relaxation: fix tiny diagonal entries", fixTinyDiagEntries);
282 const bool checkDiagEntries =
false;
283 pl->set (
"relaxation: check diagonal entries", checkDiagEntries);
285 Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = Teuchos::null;
286 pl->set(
"relaxation: local smoothing indices", localSmoothingIndices);
288 validParams_ = rcp_const_cast<
const ParameterList> (pl);
294 template<
class MatrixType>
297 using Teuchos::getIntegralValue;
298 using Teuchos::ParameterList;
304 const Details::RelaxationType precType =
305 getIntegralValue<Details::RelaxationType> (pl,
"relaxation: type");
306 const int numSweeps = pl.get<
int> (
"relaxation: sweeps");
307 const ST dampingFactor = pl.get<ST> (
"relaxation: damping factor");
308 const bool zeroStartSol = pl.get<
bool> (
"relaxation: zero starting solution");
309 const bool doBackwardGS = pl.get<
bool> (
"relaxation: backward mode");
310 const bool doL1Method = pl.get<
bool> (
"relaxation: use l1");
312 const ST minDiagonalValue = pl.get<ST> (
"relaxation: min diagonal value");
313 const bool fixTinyDiagEntries = pl.get<
bool> (
"relaxation: fix tiny diagonal entries");
314 const bool checkDiagEntries = pl.get<
bool> (
"relaxation: check diagonal entries");
315 Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = pl.get<Teuchos::ArrayRCP<local_ordinal_type> >(
"relaxation: local smoothing indices");
319 PrecType_ = precType;
320 NumSweeps_ = numSweeps;
321 DampingFactor_ = dampingFactor;
322 ZeroStartingSolution_ = zeroStartSol;
323 DoBackwardGS_ = doBackwardGS;
324 DoL1Method_ = doL1Method;
326 MinDiagonalValue_ = minDiagonalValue;
327 fixTinyDiagEntries_ = fixTinyDiagEntries;
328 checkDiagEntries_ = checkDiagEntries;
329 localSmoothingIndices_ = localSmoothingIndices;
333 template<
class MatrixType>
338 this->setParametersImpl(const_cast<Teuchos::ParameterList&>(pl));
342 template<
class MatrixType>
343 Teuchos::RCP<const Teuchos::Comm<int> >
345 TEUCHOS_TEST_FOR_EXCEPTION(
346 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::getComm: " 347 "The input matrix A is null. Please call setMatrix() with a nonnull " 348 "input matrix before calling this method.");
349 return A_->getRowMap ()->getComm ();
353 template<
class MatrixType>
354 Teuchos::RCP<const typename Relaxation<MatrixType>::row_matrix_type>
360 template<
class MatrixType>
361 Teuchos::RCP<
const Tpetra::Map<
typename MatrixType::local_ordinal_type,
362 typename MatrixType::global_ordinal_type,
363 typename MatrixType::node_type> >
365 TEUCHOS_TEST_FOR_EXCEPTION(
366 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::getDomainMap: " 367 "The input matrix A is null. Please call setMatrix() with a nonnull " 368 "input matrix before calling this method.");
369 return A_->getDomainMap ();
373 template<
class MatrixType>
374 Teuchos::RCP<
const Tpetra::Map<
typename MatrixType::local_ordinal_type,
375 typename MatrixType::global_ordinal_type,
376 typename MatrixType::node_type> >
378 TEUCHOS_TEST_FOR_EXCEPTION(
379 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::getRangeMap: " 380 "The input matrix A is null. Please call setMatrix() with a nonnull " 381 "input matrix before calling this method.");
382 return A_->getRangeMap ();
386 template<
class MatrixType>
392 template<
class MatrixType>
394 return(NumInitialize_);
398 template<
class MatrixType>
404 template<
class MatrixType>
410 template<
class MatrixType>
412 return(InitializeTime_);
416 template<
class MatrixType>
418 return(ComputeTime_);
422 template<
class MatrixType>
428 template<
class MatrixType>
430 return(ComputeFlops_);
434 template<
class MatrixType>
441 template<
class MatrixType>
443 TEUCHOS_TEST_FOR_EXCEPTION(
444 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::getNodeSmootherComplexity: " 445 "The input matrix A is null. Please call setMatrix() with a nonnull " 446 "input matrix, then call compute(), before calling this method.");
448 return A_->getNodeNumRows() + A_->getNodeNumEntries();
452 template<
class MatrixType>
455 apply (
const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
456 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
457 Teuchos::ETransp mode,
464 using Teuchos::rcpFromRef;
467 TEUCHOS_TEST_FOR_EXCEPTION(
468 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::apply: " 469 "The input matrix A is null. Please call setMatrix() with a nonnull " 470 "input matrix, then call compute(), before calling this method.");
471 TEUCHOS_TEST_FOR_EXCEPTION(
474 "Ifpack2::Relaxation::apply: You must call compute() on this Ifpack2 " 475 "preconditioner instance before you may call apply(). You may call " 476 "isComputed() to find out if compute() has been called already.");
477 TEUCHOS_TEST_FOR_EXCEPTION(
478 X.getNumVectors() != Y.getNumVectors(),
480 "Ifpack2::Relaxation::apply: X and Y have different numbers of columns. " 481 "X has " << X.getNumVectors() <<
" columns, but Y has " 482 << Y.getNumVectors() <<
" columns.");
483 TEUCHOS_TEST_FOR_EXCEPTION(
484 beta != STS::zero (), std::logic_error,
485 "Ifpack2::Relaxation::apply: beta = " << beta <<
" != 0 case not " 490 Teuchos::TimeMonitor timeMon (*Time_,
true);
493 if (alpha == STS::zero ()) {
495 Y.putScalar (STS::zero ());
503 auto X_lcl_host = X.template getLocalView<Kokkos::HostSpace> ();
504 auto Y_lcl_host = Y.template getLocalView<Kokkos::HostSpace> ();
505 if (X_lcl_host.ptr_on_device () == Y_lcl_host.ptr_on_device ()) {
506 Xcopy = rcp (
new MV (X, Teuchos::Copy));
508 Xcopy = rcpFromRef (X);
516 case Ifpack2::Details::JACOBI:
517 ApplyInverseJacobi(*Xcopy,Y);
519 case Ifpack2::Details::GS:
520 ApplyInverseGS(*Xcopy,Y);
522 case Ifpack2::Details::SGS:
523 ApplyInverseSGS(*Xcopy,Y);
525 case Ifpack2::Details::MTSGS:
526 ApplyInverseMTSGS_CrsMatrix(*Xcopy,Y);
528 case Ifpack2::Details::MTGS:
529 ApplyInverseMTGS_CrsMatrix(*Xcopy,Y);
533 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error,
534 "Ifpack2::Relaxation::apply: Invalid preconditioner type enum value " 535 << PrecType_ <<
". Please report this bug to the Ifpack2 developers.");
537 if (alpha != STS::one ()) {
539 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
540 const double numVectors = as<double> (Y.getNumVectors ());
541 ApplyFlops_ += numGlobalRows * numVectors;
545 ApplyTime_ += Time_->totalElapsedTime ();
550 template<
class MatrixType>
553 applyMat (
const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
554 Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
555 Teuchos::ETransp mode)
const 557 TEUCHOS_TEST_FOR_EXCEPTION(
558 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::applyMat: " 559 "The input matrix A is null. Please call setMatrix() with a nonnull " 560 "input matrix, then call compute(), before calling this method.");
561 TEUCHOS_TEST_FOR_EXCEPTION(
562 !
isComputed (), std::runtime_error,
"Ifpack2::Relaxation::applyMat: " 563 "isComputed() must be true before you may call applyMat(). " 564 "Please call compute() before calling this method.");
565 TEUCHOS_TEST_FOR_EXCEPTION(
566 X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
567 "Ifpack2::Relaxation::applyMat: X.getNumVectors() = " << X.getNumVectors ()
568 <<
" != Y.getNumVectors() = " << Y.getNumVectors () <<
".");
569 A_->apply (X, Y, mode);
573 template<
class MatrixType>
576 TEUCHOS_TEST_FOR_EXCEPTION
577 (A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::initialize: " 578 "The input matrix A is null. Please call setMatrix() with a nonnull " 579 "input matrix before calling this method.");
580 Teuchos::TimeMonitor timeMon (*Time_,
true);
583 hasBlockCrsMatrix_ =
false;
586 Teuchos::RCP<const block_crs_matrix_type> A_bcrs =
587 Teuchos::rcp_dynamic_cast<
const block_crs_matrix_type> (A_);
588 if (A_bcrs.is_null ()) {
589 hasBlockCrsMatrix_ =
false;
592 hasBlockCrsMatrix_ =
true;
596 if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
597 #ifdef HAVE_IFPACK2_EXPERIMENTAL_KOKKOSKERNELS_FEATURES 598 const crs_matrix_type* crsMat =
dynamic_cast<const crs_matrix_type*
> (&(*A_));
599 TEUCHOS_TEST_FOR_EXCEPTION
600 (crsMat == NULL, std::logic_error,
"Ifpack2::Relaxation::initialize: " 601 "Multithreaded Gauss-Seidel methods currently only work when the input " 602 "matrix is a Tpetra::CrsMatrix.");
604 #ifdef HAVE_IFPACK2_DUMP_MTX_MATRIX 605 Tpetra::MatrixMarket::Writer<crs_matrix_type> crs_writer;
606 std::string file_name =
"Ifpack2_MT_GS.mtx";
607 Teuchos::RCP<const crs_matrix_type> rcp_crs_mat = Teuchos::rcp_dynamic_cast<
const crs_matrix_type> (A_);
608 crs_writer.writeSparseFile(file_name, rcp_crs_mat);
611 this->mtKernelHandle_ = Teuchos::rcp (
new mt_kernel_handle_type ());
612 if (mtKernelHandle_->get_gs_handle () == NULL) {
613 mtKernelHandle_->create_gs_handle ();
615 local_matrix_type kcsr = crsMat->getLocalMatrix ();
617 const bool is_symmetric = (PrecType_ == Ifpack2::Details::MTSGS);
618 using KokkosKernels::Experimental::Graph::gauss_seidel_symbolic;
619 gauss_seidel_symbolic<mt_kernel_handle_type,
621 lno_nonzero_view_t> (mtKernelHandle_.getRawPtr (),
622 A_->getNodeNumRows (),
623 A_->getNodeNumCols (),
627 #else // HAVE_IFPACK2_EXPERIMENTAL_KOKKOSKERNELS_FEATURES 628 TEUCHOS_TEST_FOR_EXCEPTION
629 (
true, std::logic_error,
"The multithreaded implementation of Gauss-Seidel " 630 "is not enabled. Please talk to the Ifpack2 developers about how to " 631 "enable this method.");
632 #endif // HAVE_IFPACK2_EXPERIMENTAL_KOKKOSKERNELS_FEATURES 636 InitializeTime_ += Time_->totalElapsedTime ();
638 isInitialized_ =
true;
642 template <
typename BlockDiagView>
643 struct InvertDiagBlocks {
644 typedef int value_type;
645 typedef typename BlockDiagView::size_type Size;
648 typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
649 template <
typename View>
650 using UnmanagedView = Kokkos::View<
typename View::data_type,
typename View::array_layout,
651 typename View::device_type, Unmanaged>;
653 typedef typename BlockDiagView::non_const_value_type Scalar;
654 typedef typename BlockDiagView::device_type Device;
655 typedef Kokkos::View<Scalar**, Kokkos::LayoutRight, Device> RWrk;
656 typedef Kokkos::View<int**, Kokkos::LayoutRight, Device> IWrk;
658 UnmanagedView<BlockDiagView> block_diag_;
662 UnmanagedView<RWrk> rwrk_;
664 UnmanagedView<IWrk> iwrk_;
667 InvertDiagBlocks (BlockDiagView& block_diag)
668 : block_diag_(block_diag)
670 const auto blksz = block_diag.dimension_1();
671 Kokkos::resize(rwrk_buf_, block_diag_.dimension_0(), blksz);
673 Kokkos::resize(iwrk_buf_, block_diag_.dimension_0(), blksz);
677 KOKKOS_INLINE_FUNCTION
678 void operator() (
const Size i,
int& jinfo)
const {
679 auto D_cur = Kokkos::subview(block_diag_, i, Kokkos::ALL(), Kokkos::ALL());
680 auto ipiv = Kokkos::subview(iwrk_, i, Kokkos::ALL());
681 auto work = Kokkos::subview(rwrk_, i, Kokkos::ALL());
683 Tpetra::Experimental::GETF2(D_cur, ipiv, info);
688 Tpetra::Experimental::GETRI(D_cur, ipiv, work, info);
693 KOKKOS_INLINE_FUNCTION
694 void join (
volatile value_type& dst,
volatile value_type
const& src)
const { dst += src; }
698 template<
class MatrixType>
702 using Teuchos::Array;
703 using Teuchos::ArrayRCP;
704 using Teuchos::ArrayView;
709 using Teuchos::REDUCE_MAX;
710 using Teuchos::REDUCE_MIN;
711 using Teuchos::REDUCE_SUM;
712 using Teuchos::rcp_dynamic_cast;
713 using Teuchos::reduceAll;
715 typedef typename node_type::device_type device_type;
720 Teuchos::TimeMonitor timeMon (*Time_,
true);
722 TEUCHOS_TEST_FOR_EXCEPTION(
723 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::" 724 "computeBlockCrs: The input matrix A is null. Please call setMatrix() " 725 "with a nonnull input matrix, then call initialize(), before calling " 727 const block_crs_matrix_type* blockCrsA =
728 dynamic_cast<const block_crs_matrix_type*
> (A_.getRawPtr ());
729 TEUCHOS_TEST_FOR_EXCEPTION(
730 blockCrsA == NULL, std::logic_error,
"Ifpack2::Relaxation::" 731 "computeBlockCrs: A_ is not a BlockCrsMatrix, but it should be if we " 732 "got this far. Please report this bug to the Ifpack2 developers.");
739 const LO lclNumMeshRows =
740 blockCrsA->getCrsGraph ().getNodeNumRows ();
741 const LO blockSize = blockCrsA->getBlockSize ();
743 if (! savedDiagOffsets_) {
744 blockDiag_ = block_diag_type ();
745 blockDiag_ = block_diag_type (
"Ifpack2::Relaxation::blockDiag_",
746 lclNumMeshRows, blockSize, blockSize);
747 if (Teuchos::as<LO>(diagOffsets_.dimension_0 () ) < lclNumMeshRows) {
750 diagOffsets_ = Kokkos::View<size_t*, device_type> ();
751 diagOffsets_ = Kokkos::View<size_t*, device_type> (
"offsets", lclNumMeshRows);
753 blockCrsA->getCrsGraph ().getLocalDiagOffsets (diagOffsets_);
754 TEUCHOS_TEST_FOR_EXCEPTION
755 (static_cast<size_t> (diagOffsets_.dimension_0 ()) !=
756 static_cast<size_t> (blockDiag_.dimension_0 ()),
757 std::logic_error,
"diagOffsets_.dimension_0() = " <<
758 diagOffsets_.dimension_0 () <<
" != blockDiag_.dimension_0() = " 759 << blockDiag_.dimension_0 () <<
760 ". Please report this bug to the Ifpack2 developers.");
761 savedDiagOffsets_ =
true;
763 blockCrsA->getLocalDiagCopy (blockDiag_, diagOffsets_);
770 unmanaged_block_diag_type blockDiag = blockDiag_;
772 if (DoL1Method_ && IsParallel_) {
774 const size_t maxLength = A_->getNodeMaxNumRowEntries ();
775 Array<LO> indices (maxLength);
776 Array<scalar_type> values (maxLength * blockSize * blockSize);
777 size_t numEntries = 0;
779 for (LO i = 0; i < lclNumMeshRows; ++i) {
781 blockCrsA->getLocalRowCopy (i, indices (), values (), numEntries);
783 auto diagBlock = Kokkos::subview (blockDiag, i, ALL (), ALL ());
784 for (LO subRow = 0; subRow < blockSize; ++subRow) {
786 for (
size_t k = 0 ; k < numEntries ; ++k) {
787 if (indices[k] > lclNumMeshRows) {
788 const size_t offset = blockSize*blockSize*k + subRow*blockSize;
789 for (LO subCol = 0; subCol < blockSize; ++subCol) {
790 diagonal_boost += STS::magnitude (values[offset+subCol] / two);
794 if (STS::magnitude (diagBlock(subRow, subRow)) < L1Eta_ * diagonal_boost) {
795 diagBlock(subRow, subRow) += diagonal_boost;
803 Impl::InvertDiagBlocks<unmanaged_block_diag_type> idb(blockDiag);
804 typedef typename unmanaged_block_diag_type::execution_space exec_space;
805 typedef Kokkos::RangePolicy<exec_space, LO> range_type;
807 Kokkos::parallel_reduce (range_type (0, lclNumMeshRows), idb, info);
812 TEUCHOS_TEST_FOR_EXCEPTION
813 (info > 0, std::runtime_error,
"GETF2 or GETRI failed on = " << info
814 <<
" diagonal blocks.");
819 #ifdef HAVE_IFPACK2_DEBUG 820 const int numResults = 2;
822 int lclResults[2], gblResults[2];
823 lclResults[0] = info;
824 lclResults[1] = -info;
827 reduceAll<int, int> (* (A_->getGraph ()->getComm ()), REDUCE_MIN,
828 numResults, lclResults, gblResults);
829 TEUCHOS_TEST_FOR_EXCEPTION
830 (gblResults[0] != 0 || gblResults[1] != 0, std::runtime_error,
831 "Ifpack2::Relaxation::compute: When processing the input " 832 "Tpetra::BlockCrsMatrix, one or more diagonal block LU factorizations " 833 "failed on one or more (MPI) processes.");
834 #endif // HAVE_IFPACK2_DEBUG 836 Importer_ = A_->getGraph ()->getImporter ();
839 ComputeTime_ += Time_->totalElapsedTime ();
844 template<
class MatrixType>
847 using Teuchos::Array;
848 using Teuchos::ArrayRCP;
849 using Teuchos::ArrayView;
854 using Teuchos::REDUCE_MAX;
855 using Teuchos::REDUCE_MIN;
856 using Teuchos::REDUCE_SUM;
857 using Teuchos::rcp_dynamic_cast;
858 using Teuchos::reduceAll;
861 typedef typename vector_type::device_type device_type;
862 const scalar_type zero = STS::zero ();
863 const scalar_type one = STS::one ();
870 if (hasBlockCrsMatrix_) {
880 Teuchos::TimeMonitor timeMon (*Time_,
true);
882 TEUCHOS_TEST_FOR_EXCEPTION(
883 A_.is_null (), std::runtime_error,
"Ifpack2::Relaxation::compute: " 884 "The input matrix A is null. Please call setMatrix() with a nonnull " 885 "input matrix, then call initialize(), before calling this method.");
890 TEUCHOS_TEST_FOR_EXCEPTION(
891 NumSweeps_ < 0, std::logic_error,
892 "Ifpack2::Relaxation::compute: NumSweeps_ = " << NumSweeps_ <<
" < 0. " 893 "Please report this bug to the Ifpack2 developers.");
895 Diagonal_ = rcp (
new vector_type (A_->getRowMap ()));
913 const crs_matrix_type* crsMat =
914 dynamic_cast<const crs_matrix_type*
> (A_.getRawPtr ());
915 if (crsMat == NULL || ! crsMat->isStaticGraph ()) {
916 A_->getLocalDiagCopy (*Diagonal_);
918 if (! savedDiagOffsets_) {
919 const size_t lclNumRows = A_->getRowMap ()->getNodeNumElements ();
920 if (diagOffsets_.dimension_0 () < lclNumRows) {
921 typedef typename node_type::device_type DT;
922 diagOffsets_ = Kokkos::View<size_t*, DT> ();
923 diagOffsets_ = Kokkos::View<size_t*, DT> (
"offsets", lclNumRows);
925 crsMat->getCrsGraph ()->getLocalDiagOffsets (diagOffsets_);
926 savedDiagOffsets_ =
true;
928 crsMat->getLocalDiagCopy (*Diagonal_, diagOffsets_);
929 #ifdef HAVE_IFPACK2_DEBUG 931 vector_type D_copy (A_->getRowMap ());
932 A_->getLocalDiagCopy (D_copy);
933 D_copy.update (STS::one (), *Diagonal_, -STS::one ());
937 TEUCHOS_TEST_FOR_EXCEPTION(
938 err != STM::zero(), std::logic_error,
"Ifpack2::Relaxation::compute: " 939 "\"fast-path\" diagonal computation failed. \\|D1 - D2\\|_inf = " 941 #endif // HAVE_IFPACK2_DEBUG 947 RCP<vector_type> origDiag;
948 if (checkDiagEntries_) {
949 origDiag = rcp (
new vector_type (A_->getRowMap ()));
950 Tpetra::deep_copy (*origDiag, *Diagonal_);
953 const size_t numMyRows = A_->getNodeNumRows ();
956 Diagonal_->template sync<Kokkos::HostSpace> ();
957 Diagonal_->template modify<Kokkos::HostSpace> ();
958 auto diag_2d = Diagonal_->template getLocalView<Kokkos::HostSpace> ();
959 auto diag_1d = Kokkos::subview (diag_2d, Kokkos::ALL (), 0);
961 scalar_type*
const diag =
reinterpret_cast<scalar_type*
> (diag_1d.ptr_on_device ());
970 if (DoL1Method_ && IsParallel_) {
971 const scalar_type two = one + one;
972 const size_t maxLength = A_->getNodeMaxNumRowEntries ();
973 Array<local_ordinal_type> indices (maxLength);
974 Array<scalar_type> values (maxLength);
977 for (
size_t i = 0; i < numMyRows; ++i) {
978 A_->getLocalRowCopy (i, indices (), values (), numEntries);
980 for (
size_t k = 0 ; k < numEntries ; ++k) {
981 if (static_cast<size_t> (indices[k]) > numMyRows) {
982 diagonal_boost += STS::magnitude (values[k] / two);
985 if (STS::magnitude (diag[i]) < L1Eta_ * diagonal_boost) {
986 diag[i] += diagonal_boost;
1002 const scalar_type oneOverMinDiagVal = (MinDiagonalValue_ == zero) ?
1003 one / SmallTraits<scalar_type>::eps () :
1004 one / MinDiagonalValue_;
1006 const magnitude_type minDiagValMag = STS::magnitude (MinDiagonalValue_);
1008 if (checkDiagEntries_) {
1014 size_t numSmallDiagEntries = 0;
1015 size_t numZeroDiagEntries = 0;
1016 size_t numNegDiagEntries = 0;
1027 if (numMyRows > 0) {
1028 const scalar_type d_0 = diag[0];
1032 minMagDiagEntryMag = d_0_mag;
1033 maxMagDiagEntryMag = d_0_mag;
1041 for (
size_t i = 0 ; i < numMyRows; ++i) {
1042 const scalar_type d_i = diag[i];
1048 if (d_i_real < STM::zero ()) {
1049 ++numNegDiagEntries;
1051 if (d_i_mag < minMagDiagEntryMag) {
1053 minMagDiagEntryMag = d_i_mag;
1055 if (d_i_mag > maxMagDiagEntryMag) {
1057 maxMagDiagEntryMag = d_i_mag;
1060 if (fixTinyDiagEntries_) {
1062 if (d_i_mag <= minDiagValMag) {
1063 ++numSmallDiagEntries;
1064 if (d_i_mag == STM::zero ()) {
1065 ++numZeroDiagEntries;
1067 diag[i] = oneOverMinDiagVal;
1069 diag[i] = one / d_i;
1074 if (d_i_mag <= minDiagValMag) {
1075 ++numSmallDiagEntries;
1076 if (d_i_mag == STM::zero ()) {
1077 ++numZeroDiagEntries;
1080 diag[i] = one / d_i;
1087 if (STS::isComplex) {
1088 ComputeFlops_ += 4.0 * numMyRows;
1090 ComputeFlops_ += numMyRows;
1107 const Comm<int>& comm = * (A_->getRowMap ()->getComm ());
1110 Array<magnitude_type> localVals (2);
1111 localVals[0] = minMagDiagEntryMag;
1113 localVals[1] = -maxMagDiagEntryMag;
1114 Array<magnitude_type> globalVals (2);
1115 reduceAll<int, magnitude_type> (comm, REDUCE_MIN, 2,
1116 localVals.getRawPtr (),
1117 globalVals.getRawPtr ());
1118 globalMinMagDiagEntryMag_ = globalVals[0];
1119 globalMaxMagDiagEntryMag_ = -globalVals[1];
1121 Array<size_t> localCounts (3);
1122 localCounts[0] = numSmallDiagEntries;
1123 localCounts[1] = numZeroDiagEntries;
1124 localCounts[2] = numNegDiagEntries;
1125 Array<size_t> globalCounts (3);
1126 reduceAll<int, size_t> (comm, REDUCE_SUM, 3,
1127 localCounts.getRawPtr (),
1128 globalCounts.getRawPtr ());
1129 globalNumSmallDiagEntries_ = globalCounts[0];
1130 globalNumZeroDiagEntries_ = globalCounts[1];
1131 globalNumNegDiagEntries_ = globalCounts[2];
1143 vector_type diff (A_->getRowMap ());
1144 diff.reciprocal (*origDiag);
1145 Diagonal_->template sync<device_type> ();
1146 diff.update (-one, *Diagonal_, one);
1147 globalDiagNormDiff_ = diff.norm2 ();
1150 if (fixTinyDiagEntries_) {
1154 for (
size_t i = 0 ; i < numMyRows; ++i) {
1155 const scalar_type d_i = diag[i];
1159 if (d_i_mag <= minDiagValMag) {
1160 diag[i] = oneOverMinDiagVal;
1162 diag[i] = one / d_i;
1167 for (
size_t i = 0 ; i < numMyRows; ++i) {
1168 diag[i] = one / diag[i];
1171 if (STS::isComplex) {
1172 ComputeFlops_ += 4.0 * numMyRows;
1174 ComputeFlops_ += numMyRows;
1178 if (IsParallel_ && (PrecType_ == Ifpack2::Details::GS ||
1179 PrecType_ == Ifpack2::Details::SGS)) {
1183 Importer_ = A_->getGraph ()->getImporter ();
1184 Diagonal_->template sync<device_type> ();
1186 #ifdef HAVE_IFPACK2_EXPERIMENTAL_KOKKOSKERNELS_FEATURES 1188 if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
1189 const crs_matrix_type* crsMat =
dynamic_cast<const crs_matrix_type*
> (&(*A_));
1190 TEUCHOS_TEST_FOR_EXCEPTION
1191 (crsMat == NULL, std::logic_error,
"Ifpack2::Relaxation::compute: " 1192 "Multithreaded Gauss-Seidel methods currently only work when the input " 1193 "matrix is a Tpetra::CrsMatrix.");
1194 local_matrix_type kcsr = crsMat->getLocalMatrix ();
1196 const bool is_symmetric = (PrecType_ == Ifpack2::Details::MTSGS);
1197 using KokkosKernels::Experimental::Graph::gauss_seidel_numeric;
1198 gauss_seidel_numeric<mt_kernel_handle_type,
1201 scalar_nonzero_view_t> (mtKernelHandle_.getRawPtr (),
1202 A_->getNodeNumRows (), A_->getNodeNumCols (),
1208 #endif // HAVE_IFPACK2_EXPERIMENTAL_KOKKOSKERNELS_FEATURES 1211 ComputeTime_ += Time_->totalElapsedTime ();
1217 template<
class MatrixType>
1220 ApplyInverseJacobi (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1221 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const 1227 if (hasBlockCrsMatrix_) {
1228 ApplyInverseJacobi_BlockCrsMatrix (X, Y);
1232 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1233 const double numVectors = as<double> (X.getNumVectors ());
1234 if (ZeroStartingSolution_) {
1239 Y.elementWiseMultiply (DampingFactor_, *Diagonal_, X, STS::zero ());
1245 double flopUpdate = 0.0;
1246 if (DampingFactor_ == STS::one ()) {
1248 flopUpdate = numGlobalRows * numVectors;
1252 flopUpdate = 2.0 * numGlobalRows * numVectors;
1254 ApplyFlops_ += flopUpdate;
1255 if (NumSweeps_ == 1) {
1261 const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1264 MV A_times_Y (Y.getMap (), as<size_t>(numVectors),
false);
1265 for (
int j = startSweep; j < NumSweeps_; ++j) {
1268 A_times_Y.update (STS::one (), X, -STS::one ());
1269 Y.elementWiseMultiply (DampingFactor_, *Diagonal_, A_times_Y, STS::one ());
1283 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1284 const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
1285 ApplyFlops_ += as<double> (NumSweeps_ - startSweep) * numVectors *
1286 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1289 template<
class MatrixType>
1301 typedef Tpetra::Experimental::BlockMultiVector<
scalar_type,
1304 const block_crs_matrix_type* blockMatConst =
1305 dynamic_cast<const block_crs_matrix_type*
> (A_.getRawPtr ());
1306 TEUCHOS_TEST_FOR_EXCEPTION
1307 (blockMatConst == NULL, std::logic_error,
"This method should never be " 1308 "called if the matrix A_ is not a BlockCrsMatrix. Please report this " 1309 "bug to the Ifpack2 developers.");
1314 block_crs_matrix_type* blockMat =
1315 const_cast<block_crs_matrix_type*
> (blockMatConst);
1317 auto meshRowMap = blockMat->getRowMap ();
1318 auto meshColMap = blockMat->getColMap ();
1319 const local_ordinal_type blockSize = blockMat->getBlockSize ();
1321 BMV X_blk (X, *meshColMap, blockSize);
1322 BMV Y_blk (Y, *meshRowMap, blockSize);
1324 if (ZeroStartingSolution_) {
1328 Y_blk.blockWiseMultiply (DampingFactor_, blockDiag_, X_blk);
1329 if (NumSweeps_ == 1) {
1334 auto pointRowMap = Y.getMap ();
1335 const size_t numVecs = X.getNumVectors ();
1339 BMV A_times_Y (*meshRowMap, *pointRowMap, blockSize, numVecs);
1343 const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1345 for (
int j = startSweep; j < NumSweeps_; ++j) {
1346 blockMat->applyBlock (Y_blk, A_times_Y);
1348 Y_blk.blockJacobiUpdate (DampingFactor_, blockDiag_,
1349 X_blk, A_times_Y, STS::one ());
1353 template<
class MatrixType>
1356 ApplyInverseGS (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1357 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const 1369 const block_crs_matrix_type* blockCrsMat =
1370 dynamic_cast<const block_crs_matrix_type*
> (A_.getRawPtr ());
1371 const crs_matrix_type* crsMat =
1372 dynamic_cast<const crs_matrix_type*
> (A_.getRawPtr ());
1373 if (blockCrsMat != NULL) {
1374 const_cast<this_type*
> (
this)->ApplyInverseGS_BlockCrsMatrix (*blockCrsMat, X, Y);
1375 }
else if (crsMat != NULL) {
1376 ApplyInverseGS_CrsMatrix (*crsMat, X, Y);
1378 ApplyInverseGS_RowMatrix (X, Y);
1383 template<
class MatrixType>
1386 ApplyInverseGS_RowMatrix (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1387 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const 1389 using Teuchos::Array;
1390 using Teuchos::ArrayRCP;
1391 using Teuchos::ArrayView;
1395 using Teuchos::rcpFromRef;
1396 typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
1401 if (ZeroStartingSolution_) {
1402 Y.putScalar (STS::zero ());
1405 const size_t NumVectors = X.getNumVectors ();
1406 const size_t maxLength = A_->getNodeMaxNumRowEntries ();
1407 Array<local_ordinal_type> Indices (maxLength);
1408 Array<scalar_type> Values (maxLength);
1411 const size_t numMyRows = A_->getNodeNumRows();
1413 size_t numActive = numMyRows;
1414 bool do_local = ! localSmoothingIndices_.is_null ();
1416 rowInd = localSmoothingIndices_.getRawPtr ();
1417 numActive = localSmoothingIndices_.size ();
1422 if (Importer_.is_null ()) {
1424 Y2 = rcp (
new MV (Y.getMap (), NumVectors,
false));
1429 Y2 = rcp (
new MV (Importer_->getTargetMap (), NumVectors));
1433 Y2 = rcpFromRef (Y);
1437 ArrayRCP<const scalar_type> d_rcp = Diagonal_->get1dView ();
1438 ArrayView<const scalar_type> d_ptr = d_rcp();
1441 bool constant_stride = X.isConstantStride() && Y2->isConstantStride();
1443 if (constant_stride) {
1445 size_t x_stride = X.getStride();
1446 size_t y2_stride = Y2->getStride();
1447 ArrayRCP<scalar_type> y2_rcp = Y2->get1dViewNonConst();
1448 ArrayRCP<const scalar_type> x_rcp = X.get1dView();
1449 ArrayView<scalar_type> y2_ptr = y2_rcp();
1450 ArrayView<const scalar_type> x_ptr = x_rcp();
1451 Array<scalar_type> dtemp(NumVectors,STS::zero());
1453 for (
int j = 0; j < NumSweeps_; ++j) {
1456 if (Importer_.is_null ()) {
1459 Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1463 if (! DoBackwardGS_) {
1464 for (
size_t ii = 0; ii < numActive; ++ii) {
1467 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1468 dtemp.assign(NumVectors,STS::zero());
1470 for (
size_t k = 0; k < NumEntries; ++k) {
1472 for (
size_t m = 0; m < NumVectors; ++m) {
1473 dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
1477 for (
size_t m = 0; m < NumVectors; ++m) {
1478 y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
1485 for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
1488 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1489 dtemp.assign (NumVectors, STS::zero ());
1491 for (
size_t k = 0; k < NumEntries; ++k) {
1493 for (
size_t m = 0; m < NumVectors; ++m) {
1494 dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
1498 for (
size_t m = 0; m < NumVectors; ++m) {
1499 y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
1505 Tpetra::deep_copy (Y, *Y2);
1511 ArrayRCP<ArrayRCP<scalar_type> > y2_ptr = Y2->get2dViewNonConst ();
1512 ArrayRCP<ArrayRCP<const scalar_type> > x_ptr = X.get2dView ();
1514 for (
int j = 0; j < NumSweeps_; ++j) {
1517 if (Importer_.is_null ()) {
1520 Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1524 if (! DoBackwardGS_) {
1525 for (
size_t ii = 0; ii < numActive; ++ii) {
1528 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1530 for (
size_t m = 0; m < NumVectors; ++m) {
1532 ArrayView<const scalar_type> x_local = (x_ptr())[m]();
1533 ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
1535 for (
size_t k = 0; k < NumEntries; ++k) {
1537 dtemp += Values[k] * y2_local[col];
1539 y2_local[i] += DampingFactor_ * d_ptr[i] * (x_local[i] - dtemp);
1546 for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
1550 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1552 for (
size_t m = 0; m < NumVectors; ++m) {
1554 ArrayView<const scalar_type> x_local = (x_ptr())[m]();
1555 ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
1557 for (
size_t k = 0; k < NumEntries; ++k) {
1559 dtemp += Values[k] * y2_local[col];
1561 y2_local[i] += DampingFactor_ * d_ptr[i] * (x_local[i] - dtemp);
1568 Tpetra::deep_copy (Y, *Y2);
1575 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1576 const double numVectors = as<double> (X.getNumVectors ());
1577 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1578 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1579 ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
1580 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1584 template<
class MatrixType>
1588 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1589 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const 1592 const Tpetra::ESweepDirection direction =
1593 DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
1594 if (localSmoothingIndices_.is_null ()) {
1595 A.gaussSeidelCopy (Y, X, *Diagonal_, DampingFactor_, direction,
1596 NumSweeps_, ZeroStartingSolution_);
1599 A.reorderedGaussSeidelCopy (Y, X, *Diagonal_, localSmoothingIndices_ (),
1600 DampingFactor_, direction,
1601 NumSweeps_, ZeroStartingSolution_);
1615 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1616 const double numVectors = as<double> (X.getNumVectors ());
1617 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1618 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1619 ApplyFlops_ += NumSweeps_ * numVectors *
1620 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1623 template<
class MatrixType>
1627 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1628 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
1632 using Teuchos::rcpFromRef;
1633 typedef Tpetra::Experimental::BlockMultiVector<
scalar_type,
1645 BMV yBlock (Y, * (A.getGraph ()->getDomainMap ()), A.getBlockSize ());
1646 const BMV xBlock (X, * (A.getColMap ()), A.getBlockSize ());
1648 bool performImport =
false;
1650 if (Importer_.is_null ()) {
1651 yBlockCol = rcpFromRef (yBlock);
1654 if (yBlockColumnPointMap_.is_null () ||
1655 yBlockColumnPointMap_->getNumVectors () != yBlock.getNumVectors () ||
1656 yBlockColumnPointMap_->getBlockSize () != yBlock.getBlockSize ()) {
1657 yBlockColumnPointMap_ =
1658 rcp (
new BMV (* (A.getColMap ()), A.getBlockSize (),
1659 static_cast<local_ordinal_type
> (yBlock.getNumVectors ())));
1661 yBlockCol = yBlockColumnPointMap_;
1662 performImport =
true;
1665 if (ZeroStartingSolution_) {
1666 yBlockCol->putScalar (STS::zero ());
1668 else if (performImport) {
1669 yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
1672 const Tpetra::ESweepDirection direction =
1673 DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
1675 for (
int sweep = 0; sweep < NumSweeps_; ++sweep) {
1676 if (performImport && sweep > 0) {
1677 yBlockCol->doImport(yBlock, *Importer_, Tpetra::INSERT);
1679 A.localGaussSeidel (xBlock, *yBlockCol, blockDiag_,
1680 DampingFactor_, direction);
1681 if (performImport) {
1682 RCP<const MV> yBlockColPointDomain =
1683 yBlockCol->getMultiVectorView ().offsetView (A.getDomainMap (), 0);
1684 Tpetra::deep_copy (Y, *yBlockColPointDomain);
1692 template<
class MatrixType>
1696 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1697 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
1698 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& D,
1700 const Tpetra::ESweepDirection direction,
1701 const int numSweeps,
1702 const bool zeroInitialGuess)
const 1704 #ifdef HAVE_IFPACK2_EXPERIMENTAL_KOKKOSKERNELS_FEATURES 1705 using Teuchos::null;
1708 using Teuchos::rcpFromRef;
1709 using Teuchos::rcp_const_cast;
1716 const char prefix[] =
"Ifpack2::Relaxation::(reordered)MTGaussSeidel: ";
1717 const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
1719 TEUCHOS_TEST_FOR_EXCEPTION
1720 (crsMat == NULL, std::logic_error, prefix <<
"The matrix is NULL. This " 1721 "should never happen. Please report this bug to the Ifpack2 developers.");
1722 TEUCHOS_TEST_FOR_EXCEPTION
1723 (! crsMat->isFillComplete (), std::runtime_error, prefix <<
"The input " 1724 "CrsMatrix is not fill complete. Please call fillComplete on the matrix," 1725 " then do setup again, before calling apply(). \"Do setup\" means that " 1726 "if only the matrix's values have changed since last setup, you need only" 1727 " call compute(). If the matrix's structure may also have changed, you " 1728 "must first call initialize(), then call compute(). If you have not set" 1729 " up this preconditioner for this matrix before, you must first call " 1730 "initialize(), then call compute().");
1731 TEUCHOS_TEST_FOR_EXCEPTION
1732 (numSweeps < 0, std::invalid_argument, prefix <<
"The number of sweeps " 1733 "must be nonnegative, but you provided numSweeps = " << numSweeps <<
1735 if (numSweeps == 0) {
1739 typedef typename Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> MV;
1740 typedef typename crs_matrix_type::import_type import_type;
1741 typedef typename crs_matrix_type::export_type export_type;
1742 typedef typename crs_matrix_type::map_type map_type;
1744 RCP<const import_type> importer = crsMat->getGraph ()->getImporter ();
1745 RCP<const export_type> exporter = crsMat->getGraph ()->getExporter ();
1746 TEUCHOS_TEST_FOR_EXCEPTION(
1747 ! exporter.is_null (), std::runtime_error,
1748 "This method's implementation currently requires that the matrix's row, " 1749 "domain, and range Maps be the same. This cannot be the case, because " 1750 "the matrix has a nontrivial Export object.");
1752 RCP<const map_type> domainMap = crsMat->getDomainMap ();
1753 RCP<const map_type> rangeMap = crsMat->getRangeMap ();
1754 RCP<const map_type> rowMap = crsMat->getGraph ()->getRowMap ();
1755 RCP<const map_type> colMap = crsMat->getGraph ()->getColMap ();
1757 #ifdef HAVE_IFPACK2_DEBUG 1762 TEUCHOS_TEST_FOR_EXCEPTION(
1763 ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
1764 "Ifpack2::Relaxation::MTGaussSeidel requires that the input " 1765 "multivector X be in the domain Map of the matrix.");
1766 TEUCHOS_TEST_FOR_EXCEPTION(
1767 ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
1768 "Ifpack2::Relaxation::MTGaussSeidel requires that the input " 1769 "B be in the range Map of the matrix.");
1770 TEUCHOS_TEST_FOR_EXCEPTION(
1771 ! D.getMap ()->isSameAs (*rowMap), std::runtime_error,
1772 "Ifpack2::Relaxation::MTGaussSeidel requires that the input " 1773 "D be in the row Map of the matrix.");
1774 TEUCHOS_TEST_FOR_EXCEPTION(
1775 ! rowMap->isSameAs (*rangeMap), std::runtime_error,
1776 "Ifpack2::Relaxation::MTGaussSeidel requires that the row Map and the " 1777 "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1778 TEUCHOS_TEST_FOR_EXCEPTION(
1779 ! domainMap->isSameAs (*rangeMap), std::runtime_error,
1780 "Ifpack2::Relaxation::MTGaussSeidel requires that the domain Map and " 1781 "the range Map of the matrix be the same.");
1787 #endif // HAVE_IFPACK2_DEBUG 1798 RCP<MV> X_domainMap;
1799 bool copyBackOutput =
false;
1800 if (importer.is_null ()) {
1801 if (X.isConstantStride ()) {
1802 X_colMap = rcpFromRef (X);
1803 X_domainMap = rcpFromRef (X);
1810 if (zeroInitialGuess) {
1811 X_colMap->putScalar (ZERO);
1821 X_colMap = rcp (
new MV (colMap, X.getNumVectors ()));
1825 X_domainMap = X_colMap;
1826 if (! zeroInitialGuess) {
1828 deep_copy (*X_domainMap , X);
1829 }
catch (std::exception& e) {
1830 std::ostringstream os;
1831 os <<
"Ifpack2::Relaxation::MTGaussSeidel: " 1832 "deep_copy(*X_domainMap, X) threw an exception: " 1833 << e.what () <<
".";
1834 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error, e.what ());
1837 copyBackOutput =
true;
1852 X_colMap = rcp (
new MV (colMap, X.getNumVectors ()));
1854 X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
1856 #ifdef HAVE_IFPACK2_DEBUG 1857 auto X_colMap_host_view = X_colMap->template getLocalView<Kokkos::HostSpace> ();
1858 auto X_domainMap_host_view = X_domainMap->template getLocalView<Kokkos::HostSpace> ();
1860 if (X_colMap->getLocalLength () != 0 && X_domainMap->getLocalLength ()) {
1861 TEUCHOS_TEST_FOR_EXCEPTION(
1862 X_colMap_host_view.ptr_on_device () != X_domainMap_host_view.ptr_on_device (),
1863 std::logic_error,
"Ifpack2::Relaxation::MTGaussSeidel: " 1864 "Pointer to start of column Map view of X is not equal to pointer to " 1865 "start of (domain Map view of) X. This may mean that " 1866 "Tpetra::MultiVector::offsetViewNonConst is broken. " 1867 "Please report this bug to the Tpetra developers.");
1870 TEUCHOS_TEST_FOR_EXCEPTION(
1871 X_colMap_host_view.dimension_0 () < X_domainMap_host_view.dimension_0 () ||
1872 X_colMap->getLocalLength () < X_domainMap->getLocalLength (),
1873 std::logic_error,
"Ifpack2::Relaxation::MTGaussSeidel: " 1874 "X_colMap has fewer local rows than X_domainMap. " 1875 "X_colMap_host_view.dimension_0() = " << X_colMap_host_view.dimension_0 ()
1876 <<
", X_domainMap_host_view.dimension_0() = " 1877 << X_domainMap_host_view.dimension_0 ()
1878 <<
", X_colMap->getLocalLength() = " << X_colMap->getLocalLength ()
1879 <<
", and X_domainMap->getLocalLength() = " 1880 << X_domainMap->getLocalLength ()
1881 <<
". This means that Tpetra::MultiVector::offsetViewNonConst " 1882 "is broken. Please report this bug to the Tpetra developers.");
1884 TEUCHOS_TEST_FOR_EXCEPTION(
1885 X_colMap->getNumVectors () != X_domainMap->getNumVectors (),
1886 std::logic_error,
"Ifpack2::Relaxation::MTGaussSeidel: " 1887 "X_colMap has a different number of columns than X_domainMap. " 1888 "X_colMap->getNumVectors() = " << X_colMap->getNumVectors ()
1889 <<
" != X_domainMap->getNumVectors() = " 1890 << X_domainMap->getNumVectors ()
1891 <<
". This means that Tpetra::MultiVector::offsetViewNonConst " 1892 "is broken. Please report this bug to the Tpetra developers.");
1893 #endif // HAVE_IFPACK2_DEBUG 1895 if (zeroInitialGuess) {
1897 X_colMap->putScalar (ZERO);
1907 X_colMap->doImport (X, *importer, Tpetra::CombineMode::INSERT);
1909 copyBackOutput =
true;
1916 if (B.isConstantStride ()) {
1917 B_in = rcpFromRef (B);
1924 RCP<MV> B_in_nonconst = rcp (
new MV (rowMap, B.getNumVectors()));
1926 deep_copy (*B_in_nonconst, B);
1927 }
catch (std::exception& e) {
1928 std::ostringstream os;
1929 os <<
"Ifpack2::Relaxation::MTGaussSeidel: " 1930 "deep_copy(*B_in_nonconst, B) threw an exception: " 1931 << e.what () <<
".";
1932 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error, e.what ());
1934 B_in = rcp_const_cast<
const MV> (B_in_nonconst);
1948 local_matrix_type kcsr = crsMat->getLocalMatrix ();
1949 const size_t NumVectors = X.getNumVectors ();
1951 bool update_y_vector =
true;
1953 bool zero_x_vector =
false;
1954 for (
int sweep = 0; sweep < numSweeps; ++sweep) {
1955 if (! importer.is_null () && sweep > 0) {
1958 X_colMap->doImport (*X_domainMap, *importer, Tpetra::CombineMode::INSERT);
1961 for (
size_t indVec = 0; indVec < NumVectors; ++indVec) {
1962 if (direction == Tpetra::Symmetric) {
1963 KokkosKernels::Experimental::Graph::symmetric_gauss_seidel_apply
1964 (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
1965 kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
1966 Kokkos::subview(X_colMap->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
1967 Kokkos::subview(B_in->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
1968 zero_x_vector, update_y_vector);
1970 else if (direction == Tpetra::Forward) {
1971 KokkosKernels::Experimental::Graph::forward_sweep_gauss_seidel_apply
1972 (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
1973 kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
1974 Kokkos::subview(X_colMap->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec ),
1975 Kokkos::subview(B_in->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
1976 zero_x_vector, update_y_vector);
1978 else if (direction == Tpetra::Backward) {
1979 KokkosKernels::Experimental::Graph::backward_sweep_gauss_seidel_apply
1980 (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
1981 kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
1982 Kokkos::subview(X_colMap->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec ),
1983 Kokkos::subview(B_in->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
1984 zero_x_vector, update_y_vector);
1987 TEUCHOS_TEST_FOR_EXCEPTION(
1988 true, std::invalid_argument,
1989 prefix <<
"The 'direction' enum does not have any of its valid " 1990 "values: Forward, Backward, or Symmetric.");
1994 if (NumVectors > 1){
1995 update_y_vector =
true;
1998 update_y_vector =
false;
2002 if (copyBackOutput) {
2004 deep_copy (X , *X_domainMap);
2005 }
catch (std::exception& e) {
2006 TEUCHOS_TEST_FOR_EXCEPTION(
2007 true, std::runtime_error, prefix <<
"deep_copy(X, *X_domainMap) " 2008 "threw an exception: " << e.what ());
2013 TEUCHOS_TEST_FOR_EXCEPTION
2014 (
true, std::logic_error,
"The multithreaded implementation of Gauss-Seidel " 2015 "is not enabled. Please talk to the Ifpack2 developers about how to " 2016 "enable this method.");
2017 #endif // HAVE_IFPACK2_EXPERIMENTAL_KOKKOSKERNELS_FEATURES 2020 template<
class MatrixType>
2024 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X)
const 2026 const crs_matrix_type* crsMat =
dynamic_cast<const crs_matrix_type*
> (&(*A_));
2027 TEUCHOS_TEST_FOR_EXCEPTION
2028 (crsMat == NULL, std::logic_error,
"Ifpack2::Relaxation::apply: " 2029 "Multithreaded Gauss-Seidel methods currently only work when the input " 2030 "matrix is a Tpetra::CrsMatrix.");
2033 const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
2036 TEUCHOS_TEST_FOR_EXCEPTION
2037 (! localSmoothingIndices_.is_null (), std::logic_error,
2038 "Our implementation of Multithreaded Gauss-Seidel does not implement the " 2039 "use case where the user supplies an iteration order. " 2040 "This error used to appear as \"MT GaussSeidel ignores the given " 2042 "I tried to add more explanation, but I didn't implement \"MT " 2043 "GaussSeidel\" [sic]. " 2044 "You'll have to ask the person who did.");
2045 this->MTGaussSeidel (crsMat, X, B, *Diagonal_, DampingFactor_, direction,
2046 NumSweeps_, ZeroStartingSolution_);
2048 const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
2049 const double numVectors = as<double> (X.getNumVectors ());
2050 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2051 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2052 ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
2053 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2057 template<
class MatrixType>
2059 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
2060 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X)
const {
2062 const crs_matrix_type* crsMat =
dynamic_cast<const crs_matrix_type*
> (&(*A_));
2063 TEUCHOS_TEST_FOR_EXCEPTION(
2064 crsMat == NULL, std::runtime_error,
"Ifpack2::Relaxation::compute: " 2065 "MT methods works for CRSMatrix Only.");
2068 const Tpetra::ESweepDirection direction =
2069 DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
2072 TEUCHOS_TEST_FOR_EXCEPTION
2073 (! localSmoothingIndices_.is_null (), std::logic_error,
2074 "Our implementation of Multithreaded Gauss-Seidel does not implement the " 2075 "use case where the user supplies an iteration order. " 2076 "This error used to appear as \"MT GaussSeidel ignores the given " 2078 "I tried to add more explanation, but I didn't implement \"MT " 2079 "GaussSeidel\" [sic]. " 2080 "You'll have to ask the person who did.");
2081 this->MTGaussSeidel (crsMat, X, B, *Diagonal_, DampingFactor_, direction,
2082 NumSweeps_, ZeroStartingSolution_);
2084 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2085 const double numVectors = as<double> (X.getNumVectors ());
2086 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2087 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2088 ApplyFlops_ += NumSweeps_ * numVectors *
2089 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2092 template<
class MatrixType>
2095 ApplyInverseSGS (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2096 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const 2108 const block_crs_matrix_type* blockCrsMat =
dynamic_cast<const block_crs_matrix_type*
> (A_.getRawPtr());
2109 const crs_matrix_type* crsMat =
dynamic_cast<const crs_matrix_type*
> (&(*A_));
2110 if (blockCrsMat != NULL) {
2111 const_cast<this_type*
> (
this)->ApplyInverseSGS_BlockCrsMatrix(*blockCrsMat, X, Y);
2113 else if (crsMat != NULL) {
2114 ApplyInverseSGS_CrsMatrix (*crsMat, X, Y);
2116 ApplyInverseSGS_RowMatrix (X, Y);
2121 template<
class MatrixType>
2125 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const 2127 using Teuchos::Array;
2128 using Teuchos::ArrayRCP;
2129 using Teuchos::ArrayView;
2133 using Teuchos::rcpFromRef;
2134 typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
2140 if (ZeroStartingSolution_) {
2141 Y.putScalar (STS::zero ());
2144 const size_t NumVectors = X.getNumVectors ();
2145 const size_t maxLength = A_->getNodeMaxNumRowEntries ();
2146 Array<local_ordinal_type> Indices (maxLength);
2147 Array<scalar_type> Values (maxLength);
2150 const size_t numMyRows = A_->getNodeNumRows();
2152 size_t numActive = numMyRows;
2153 bool do_local = !localSmoothingIndices_.is_null();
2155 rowInd = localSmoothingIndices_.getRawPtr();
2156 numActive = localSmoothingIndices_.size();
2162 if (Importer_.is_null ()) {
2164 Y2 = rcp (
new MV (Y.getMap (), NumVectors,
false));
2169 Y2 = rcp (
new MV (Importer_->getTargetMap (), NumVectors));
2173 Y2 = rcpFromRef (Y);
2177 ArrayRCP<const scalar_type> d_rcp = Diagonal_->get1dView();
2178 ArrayView<const scalar_type> d_ptr = d_rcp();
2181 bool constant_stride = X.isConstantStride() && Y2->isConstantStride();
2183 if(constant_stride) {
2185 size_t x_stride = X.getStride();
2186 size_t y2_stride = Y2->getStride();
2187 ArrayRCP<scalar_type> y2_rcp = Y2->get1dViewNonConst();
2188 ArrayRCP<const scalar_type> x_rcp = X.get1dView();
2189 ArrayView<scalar_type> y2_ptr = y2_rcp();
2190 ArrayView<const scalar_type> x_ptr = x_rcp();
2191 Array<scalar_type> dtemp(NumVectors,STS::zero());
2193 for (
int j = 0; j < NumSweeps_; j++) {
2196 if (Importer_.is_null ()) {
2198 Tpetra::deep_copy (*Y2, Y);
2200 Y2->doImport (Y, *Importer_, Tpetra::INSERT);
2203 for (
size_t ii = 0; ii < numActive; ++ii) {
2206 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
2207 dtemp.assign(NumVectors,STS::zero());
2209 for (
size_t k = 0; k < NumEntries; ++k) {
2211 for (
size_t m = 0; m < NumVectors; ++m) {
2212 dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
2216 for (
size_t m = 0; m < NumVectors; ++m) {
2217 y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
2223 for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
2226 A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
2227 dtemp.assign(NumVectors,STS::zero());
2229 for (
size_t k = 0; k < NumEntries; ++k) {
2231 for (
size_t m = 0; m < NumVectors; ++m) {
2232 dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
2236 for (
size_t m = 0; m < NumVectors; ++m) {
2237 y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
2243 Tpetra::deep_copy (Y, *Y2);
2249 ArrayRCP<ArrayRCP<scalar_type> > y2_ptr = Y2->get2dViewNonConst ();
2250 ArrayRCP<ArrayRCP<const scalar_type> > x_ptr = X.get2dView ();
2252 for (
int iter = 0; iter < NumSweeps_; ++iter) {
2255 if (Importer_.is_null ()) {
2257 Tpetra::deep_copy (*Y2, Y);
2259 Y2->doImport (Y, *Importer_, Tpetra::INSERT);
2263 for (
size_t ii = 0; ii < numActive; ++ii) {
2267 A_->getLocalRowCopy (as<local_ordinal_type> (i), Indices (), Values (), NumEntries);
2269 for (
size_t m = 0; m < NumVectors; ++m) {
2271 ArrayView<const scalar_type> x_local = (x_ptr())[m]();
2272 ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
2274 for (
size_t k = 0; k < NumEntries; ++k) {
2276 dtemp += Values[k] * y2_local[col];
2278 y2_local[i] += DampingFactor_ * (x_local[i] - dtemp) * diag;
2284 for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
2288 A_->getLocalRowCopy (as<local_ordinal_type> (i), Indices (), Values (), NumEntries);
2290 for (
size_t m = 0; m < NumVectors; ++m) {
2292 ArrayView<const scalar_type> x_local = (x_ptr())[m]();
2293 ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
2295 for (
size_t k = 0; k < NumEntries; ++k) {
2297 dtemp += Values[k] * y2_local[col];
2299 y2_local[i] += DampingFactor_ * (x_local[i] - dtemp) * diag;
2305 Tpetra::deep_copy (Y, *Y2);
2311 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2312 const double numVectors = as<double> (X.getNumVectors ());
2313 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2314 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2315 ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
2316 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2320 template<
class MatrixType>
2324 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2325 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
const 2328 const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
2329 if (localSmoothingIndices_.is_null ()) {
2330 A.gaussSeidelCopy (Y, X, *Diagonal_, DampingFactor_, direction,
2331 NumSweeps_, ZeroStartingSolution_);
2334 A.reorderedGaussSeidelCopy (Y, X, *Diagonal_, localSmoothingIndices_ (),
2335 DampingFactor_, direction,
2336 NumSweeps_, ZeroStartingSolution_);
2353 const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2354 const double numVectors = as<double> (X.getNumVectors ());
2355 const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2356 const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2357 ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
2358 (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2361 template<
class MatrixType>
2365 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2366 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
2370 using Teuchos::rcpFromRef;
2371 typedef Tpetra::Experimental::BlockMultiVector<
scalar_type,
2383 BMV yBlock (Y, * (A.getGraph ()->getDomainMap ()), A.getBlockSize ());
2384 const BMV xBlock (X, * (A.getColMap ()), A.getBlockSize ());
2386 bool performImport =
false;
2388 if (Importer_.is_null ()) {
2389 yBlockCol = Teuchos::rcpFromRef (yBlock);
2392 if (yBlockColumnPointMap_.is_null () ||
2393 yBlockColumnPointMap_->getNumVectors () != yBlock.getNumVectors () ||
2394 yBlockColumnPointMap_->getBlockSize () != yBlock.getBlockSize ()) {
2395 yBlockColumnPointMap_ =
2396 rcp (
new BMV (* (A.getColMap ()), A.getBlockSize (),
2397 static_cast<local_ordinal_type
> (yBlock.getNumVectors ())));
2399 yBlockCol = yBlockColumnPointMap_;
2400 performImport =
true;
2403 if (ZeroStartingSolution_) {
2404 yBlockCol->putScalar (STS::zero ());
2406 else if (performImport) {
2407 yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
2411 const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
2413 for (
int sweep = 0; sweep < NumSweeps_; ++sweep) {
2414 if (performImport && sweep > 0) {
2415 yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
2417 A.localGaussSeidel (xBlock, *yBlockCol, blockDiag_,
2418 DampingFactor_, direction);
2419 if (performImport) {
2420 RCP<const MV> yBlockColPointDomain =
2421 yBlockCol->getMultiVectorView ().offsetView (A.getDomainMap (), 0);
2422 MV yBlockView = yBlock.getMultiVectorView ();
2423 Tpetra::deep_copy (yBlockView, *yBlockColPointDomain);
2429 template<
class MatrixType>
2432 std::ostringstream os;
2437 os <<
"\"Ifpack2::Relaxation\": {";
2439 os <<
"Initialized: " << (
isInitialized () ?
"true" :
"false") <<
", " 2440 <<
"Computed: " << (
isComputed () ?
"true" :
"false") <<
", ";
2446 if (PrecType_ == Ifpack2::Details::JACOBI) {
2448 }
else if (PrecType_ == Ifpack2::Details::GS) {
2449 os <<
"Gauss-Seidel";
2450 }
else if (PrecType_ == Ifpack2::Details::SGS) {
2451 os <<
"Symmetric Gauss-Seidel";
2452 }
else if (PrecType_ == Ifpack2::Details::MTGS) {
2453 os <<
"MT Gauss-Seidel";
2454 }
else if (PrecType_ == Ifpack2::Details::MTSGS) {
2455 os <<
"MT Symmetric Gauss-Seidel";
2461 os <<
", " <<
"sweeps: " << NumSweeps_ <<
", " 2462 <<
"damping factor: " << DampingFactor_ <<
", ";
2464 os <<
"use l1: " << DoL1Method_ <<
", " 2465 <<
"l1 eta: " << L1Eta_ <<
", ";
2468 if (A_.is_null ()) {
2469 os <<
"Matrix: null";
2472 os <<
"Global matrix dimensions: [" 2473 << A_->getGlobalNumRows () <<
", " << A_->getGlobalNumCols () <<
"]" 2474 <<
", Global nnz: " << A_->getGlobalNumEntries();
2482 template<
class MatrixType>
2486 const Teuchos::EVerbosityLevel verbLevel)
const 2488 using Teuchos::OSTab;
2489 using Teuchos::TypeNameTraits;
2490 using Teuchos::VERB_DEFAULT;
2491 using Teuchos::VERB_NONE;
2492 using Teuchos::VERB_LOW;
2493 using Teuchos::VERB_MEDIUM;
2494 using Teuchos::VERB_HIGH;
2495 using Teuchos::VERB_EXTREME;
2498 const Teuchos::EVerbosityLevel vl =
2499 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
2501 const int myRank = this->
getComm ()->getRank ();
2509 if (vl != VERB_NONE && myRank == 0) {
2513 out <<
"\"Ifpack2::Relaxation\":" << endl;
2515 out <<
"MatrixType: \"" << TypeNameTraits<MatrixType>::name () <<
"\"" 2517 if (this->getObjectLabel () !=
"") {
2518 out <<
"Label: " << this->getObjectLabel () << endl;
2520 out <<
"Initialized: " << (
isInitialized () ?
"true" :
"false") << endl
2521 <<
"Computed: " << (
isComputed () ?
"true" :
"false") << endl
2522 <<
"Parameters: " << endl;
2525 out <<
"\"relaxation: type\": ";
2526 if (PrecType_ == Ifpack2::Details::JACOBI) {
2528 }
else if (PrecType_ == Ifpack2::Details::GS) {
2529 out <<
"Gauss-Seidel";
2530 }
else if (PrecType_ == Ifpack2::Details::SGS) {
2531 out <<
"Symmetric Gauss-Seidel";
2532 }
else if (PrecType_ == Ifpack2::Details::MTGS) {
2533 out <<
"MT Gauss-Seidel";
2534 }
else if (PrecType_ == Ifpack2::Details::MTSGS) {
2535 out <<
"MT Symmetric Gauss-Seidel";
2542 <<
"\"relaxation: sweeps\": " << NumSweeps_ << endl
2543 <<
"\"relaxation: damping factor\": " << DampingFactor_ << endl
2544 <<
"\"relaxation: min diagonal value\": " << MinDiagonalValue_ << endl
2545 <<
"\"relaxation: zero starting solution\": " << ZeroStartingSolution_ << endl
2546 <<
"\"relaxation: backward mode\": " << DoBackwardGS_ << endl
2547 <<
"\"relaxation: use l1\": " << DoL1Method_ << endl
2548 <<
"\"relaxation: l1 eta\": " << L1Eta_ << endl;
2550 out <<
"Computed quantities:" << endl;
2553 out <<
"Global number of rows: " << A_->getGlobalNumRows () << endl
2554 <<
"Global number of columns: " << A_->getGlobalNumCols () << endl;
2557 out <<
"Properties of input diagonal entries:" << endl;
2560 out <<
"Magnitude of minimum-magnitude diagonal entry: " 2561 << globalMinMagDiagEntryMag_ << endl
2562 <<
"Magnitude of maximum-magnitude diagonal entry: " 2563 << globalMaxMagDiagEntryMag_ << endl
2564 <<
"Number of diagonal entries with small magnitude: " 2565 << globalNumSmallDiagEntries_ << endl
2566 <<
"Number of zero diagonal entries: " 2567 << globalNumZeroDiagEntries_ << endl
2568 <<
"Number of diagonal entries with negative real part: " 2569 << globalNumNegDiagEntries_ << endl
2570 <<
"Abs 2-norm diff between computed and actual inverse " 2571 <<
"diagonal: " << globalDiagNormDiff_ << endl;
2575 out <<
"Saved diagonal offsets: " 2576 << (savedDiagOffsets_ ?
"true" :
"false") << endl;
2578 out <<
"Call counts and total times (in seconds): " << endl;
2581 out <<
"initialize: " << endl;
2584 out <<
"Call count: " << NumInitialize_ << endl;
2585 out <<
"Total time: " << InitializeTime_ << endl;
2587 out <<
"compute: " << endl;
2590 out <<
"Call count: " << NumCompute_ << endl;
2591 out <<
"Total time: " << ComputeTime_ << endl;
2593 out <<
"apply: " << endl;
2596 out <<
"Call count: " << NumApply_ << endl;
2597 out <<
"Total time: " << ApplyTime_ << endl;
2605 #define IFPACK2_RELAXATION_INSTANT(S,LO,GO,N) \ 2606 template class Ifpack2::Relaxation< Tpetra::RowMatrix<S, LO, GO, N> >; 2608 #endif // IFPACK2_RELAXATION_DEF_HPP std::string description() const
A simple one-line description of this object.
Definition: Ifpack2_Relaxation_def.hpp:2430
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition: Ifpack2_Relaxation_def.hpp:377
int getNumInitialize() const
Total number of calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:393
double getApplyTime() const
Total time in seconds spent in all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:423
void compute()
Compute the preconditioner.
Definition: Ifpack2_Relaxation_def.hpp:845
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:455
bool hasTransposeApply() const
Whether apply() and applyMat() let you apply the transpose or conjugate transpose.
Definition: Ifpack2_Relaxation_def.hpp:387
int getNumApply() const
Total number of calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:405
bool isInitialized() const
Returns true if the preconditioner has been successfully initialized.
Definition: Ifpack2_Relaxation_decl.hpp:401
double getInitializeTime() const
Total time in seconds spent in all calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:411
Ifpack2 implementation details.
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition: Ifpack2_Relaxation_def.hpp:364
Teuchos::RCP< const row_matrix_type > getMatrix() const
The matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:355
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:250
void setParameters(const Teuchos::ParameterList ¶ms)
Set the relaxation / preconditioner parameters.
Definition: Ifpack2_Relaxation_def.hpp:334
File for utility functions.
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition: Ifpack2_Relaxation_def.hpp:442
double getApplyFlops() const
Total number of floating-point operations over all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:435
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
Return a list of all the parameters that this class accepts.
Definition: Ifpack2_Relaxation_def.hpp:220
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:247
double getComputeFlops() const
Total number of floating-point operations over all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:429
bool isComputed() const
Return true if compute() has been called.
Definition: Ifpack2_Relaxation_decl.hpp:416
double getComputeTime() const
Total time in seconds spent in all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:417
Definition: Ifpack2_Container.hpp:761
int getNumCompute() const
Total number of calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:399
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:244
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix and vectors are distributed.
Definition: Ifpack2_Relaxation_def.hpp:344
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:241
virtual ~Relaxation()
Destructor.
Definition: Ifpack2_Relaxation_def.hpp:215
void applyMat(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS) const
Apply the preconditioner to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:553
Relaxation preconditioners for Tpetra::RowMatrix and Tpetra::CrsMatrix sparse matrices.
Definition: Ifpack2_Relaxation_decl.hpp:226
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object's attributes to the given output stream.
Definition: Ifpack2_Relaxation_def.hpp:2485
Preconditioners and smoothers for Tpetra sparse matrices.
Definition: Ifpack2_AdditiveSchwarz_decl.hpp:72
void initialize()
Initialize the preconditioner.
Definition: Ifpack2_Relaxation_def.hpp:574
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_Relaxation_decl.hpp:253