Ifpack2 Templated Preconditioning Package  Version 1.0
Ifpack2_Relaxation_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Tempated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_RELAXATION_DEF_HPP
44 #define IFPACK2_RELAXATION_DEF_HPP
45 
46 #include "Teuchos_StandardParameterEntryValidators.hpp"
47 #include "Teuchos_TimeMonitor.hpp"
48 #include "Tpetra_CrsMatrix.hpp"
49 #include "Tpetra_Experimental_BlockCrsMatrix.hpp"
50 #include "Ifpack2_Utilities.hpp"
51 #include "Ifpack2_Relaxation_decl.hpp"
52 
53 #ifdef HAVE_IFPACK2_EXPERIMENTAL_KOKKOSKERNELS_FEATURES
54 # include "KokkosKernels_GaussSeidel.hpp"
55 #endif // HAVE_IFPACK2_EXPERIMENTAL_KOKKOSKERNELS_FEATURES
56 
57 #ifdef HAVE_IFPACK2_DUMP_MTX_MATRIX
58 # include "MatrixMarket_Tpetra.hpp"
59 #endif
60 
61 // mfh 28 Mar 2013: Uncomment out these three lines to compute
62 // statistics on diagonal entries in compute().
63 // #ifndef IFPACK2_RELAXATION_COMPUTE_DIAGONAL_STATS
64 // # define IFPACK2_RELAXATION_COMPUTE_DIAGONAL_STATS 1
65 // #endif // IFPACK2_RELAXATION_COMPUTE_DIAGONAL_STATS
66 
67 namespace {
68  // Validate that a given int is nonnegative.
69  class NonnegativeIntValidator : public Teuchos::ParameterEntryValidator {
70  public:
71  // Constructor (does nothing).
72  NonnegativeIntValidator () {}
73 
74  // ParameterEntryValidator wants this method.
75  Teuchos::ParameterEntryValidator::ValidStringsList validStringValues () const {
76  return Teuchos::null;
77  }
78 
79  // Actually validate the parameter's value.
80  void
81  validate (const Teuchos::ParameterEntry& entry,
82  const std::string& paramName,
83  const std::string& sublistName) const
84  {
85  using std::endl;
86  Teuchos::any anyVal = entry.getAny (true);
87  const std::string entryName = entry.getAny (false).typeName ();
88 
89  TEUCHOS_TEST_FOR_EXCEPTION(
90  anyVal.type () != typeid (int),
91  Teuchos::Exceptions::InvalidParameterType,
92  "Parameter \"" << paramName << "\" in sublist \"" << sublistName
93  << "\" has the wrong type." << endl << "Parameter: " << paramName
94  << endl << "Type specified: " << entryName << endl
95  << "Type required: int" << endl);
96 
97  const int val = Teuchos::any_cast<int> (anyVal);
98  TEUCHOS_TEST_FOR_EXCEPTION(
99  val < 0, Teuchos::Exceptions::InvalidParameterValue,
100  "Parameter \"" << paramName << "\" in sublist \"" << sublistName
101  << "\" is negative." << endl << "Parameter: " << paramName
102  << endl << "Value specified: " << val << endl
103  << "Required range: [0, INT_MAX]" << endl);
104  }
105 
106  // ParameterEntryValidator wants this method.
107  const std::string getXMLTypeName () const {
108  return "NonnegativeIntValidator";
109  }
110 
111  // ParameterEntryValidator wants this method.
112  void
113  printDoc (const std::string& docString,
114  std::ostream &out) const
115  {
116  Teuchos::StrUtils::printLines (out, "# ", docString);
117  out << "#\tValidator Used: " << std::endl;
118  out << "#\t\tNonnegativeIntValidator" << std::endl;
119  }
120  };
121 
122  // A way to get a small positive number (eps() for floating-point
123  // types, or 1 for integer types) when Teuchos::ScalarTraits doesn't
124  // define it (for example, for integer values).
125  template<class Scalar, const bool isOrdinal=Teuchos::ScalarTraits<Scalar>::isOrdinal>
126  class SmallTraits {
127  public:
128  // Return eps if Scalar is a floating-point type, else return 1.
129  static const Scalar eps ();
130  };
131 
132  // Partial specialization for when Scalar is not a floating-point type.
133  template<class Scalar>
134  class SmallTraits<Scalar, true> {
135  public:
136  static const Scalar eps () {
137  return Teuchos::ScalarTraits<Scalar>::one ();
138  }
139  };
140 
141  // Partial specialization for when Scalar is a floating-point type.
142  template<class Scalar>
143  class SmallTraits<Scalar, false> {
144  public:
145  static const Scalar eps () {
146  return Teuchos::ScalarTraits<Scalar>::eps ();
147  }
148  };
149 } // namespace (anonymous)
150 
151 namespace Ifpack2 {
152 
153 template<class MatrixType>
154 void Relaxation<MatrixType>::
155 setMatrix (const Teuchos::RCP<const row_matrix_type>& A)
156 {
157  if (A.getRawPtr () != A_.getRawPtr ()) { // it's a different matrix
158  Importer_ = Teuchos::null;
159  Diagonal_ = Teuchos::null; // ??? what if this comes from the user???
160  isInitialized_ = false;
161  IsComputed_ = false;
162  diagOffsets_ = Kokkos::View<size_t*, typename node_type::device_type> ();
163  savedDiagOffsets_ = false;
164  hasBlockCrsMatrix_ = false;
165  if (! A.is_null ()) {
166  IsParallel_ = (A->getRowMap ()->getComm ()->getSize () > 1);
167  }
168  A_ = A;
169  }
170 }
171 
172 
173 template<class MatrixType>
175 Relaxation (const Teuchos::RCP<const row_matrix_type>& A)
176 : A_ (A),
177  Time_ (Teuchos::rcp (new Teuchos::Time ("Ifpack2::Relaxation"))),
178  NumSweeps_ (1),
179  PrecType_ (Ifpack2::Details::JACOBI),
180  DampingFactor_ (STS::one ()),
181  IsParallel_ ((A.is_null () || A->getRowMap ().is_null () || A->getRowMap ()->getComm ().is_null ()) ?
182  false : // a reasonable default if there's no communicator
183  A->getRowMap ()->getComm ()->getSize () > 1),
184  ZeroStartingSolution_ (true),
185  DoBackwardGS_ (false),
186  DoL1Method_ (false),
187  L1Eta_ (Teuchos::as<magnitude_type> (1.5)),
188  MinDiagonalValue_ (STS::zero ()),
189  fixTinyDiagEntries_ (false),
190  checkDiagEntries_ (false),
191  isInitialized_ (false),
192  IsComputed_ (false),
193  NumInitialize_ (0),
194  NumCompute_ (0),
195  NumApply_ (0),
196  InitializeTime_ (0.0), // Times are double anyway, so no need for ScalarTraits.
197  ComputeTime_ (0.0),
198  ApplyTime_ (0.0),
199  ComputeFlops_ (0.0),
200  ApplyFlops_ (0.0),
201  globalMinMagDiagEntryMag_ (STM::zero ()),
202  globalMaxMagDiagEntryMag_ (STM::zero ()),
203  globalNumSmallDiagEntries_ (0),
204  globalNumZeroDiagEntries_ (0),
205  globalNumNegDiagEntries_ (0),
206  globalDiagNormDiff_(Teuchos::ScalarTraits<magnitude_type>::zero()),
207  savedDiagOffsets_ (false),
208  hasBlockCrsMatrix_ (false)
209 {
210  this->setObjectLabel ("Ifpack2::Relaxation");
211 }
212 
213 //==========================================================================
214 template<class MatrixType>
216 }
217 
218 template<class MatrixType>
219 Teuchos::RCP<const Teuchos::ParameterList>
221 {
222  using Teuchos::Array;
223  using Teuchos::ParameterList;
224  using Teuchos::parameterList;
225  using Teuchos::RCP;
226  using Teuchos::rcp;
227  using Teuchos::rcp_const_cast;
228  using Teuchos::rcp_implicit_cast;
229  using Teuchos::setStringToIntegralParameter;
230  typedef Teuchos::ParameterEntryValidator PEV;
231 
232  if (validParams_.is_null ()) {
233  RCP<ParameterList> pl = parameterList ("Ifpack2::Relaxation");
234 
235  // Set a validator that automatically converts from the valid
236  // string options to their enum values.
237  Array<std::string> precTypes (5);
238  precTypes[0] = "Jacobi";
239  precTypes[1] = "Gauss-Seidel";
240  precTypes[2] = "Symmetric Gauss-Seidel";
241  precTypes[3] = "MT Gauss-Seidel";
242  precTypes[4] = "MT Symmetric Gauss-Seidel";
243  Array<Details::RelaxationType> precTypeEnums (5);
244  precTypeEnums[0] = Details::JACOBI;
245  precTypeEnums[1] = Details::GS;
246  precTypeEnums[2] = Details::SGS;
247  precTypeEnums[3] = Details::MTGS;
248  precTypeEnums[4] = Details::MTSGS;
249  const std::string defaultPrecType ("Jacobi");
250  setStringToIntegralParameter<Details::RelaxationType> ("relaxation: type",
251  defaultPrecType, "Relaxation method", precTypes (), precTypeEnums (),
252  pl.getRawPtr ());
253 
254  const int numSweeps = 1;
255  RCP<PEV> numSweepsValidator =
256  rcp_implicit_cast<PEV> (rcp (new NonnegativeIntValidator));
257  pl->set ("relaxation: sweeps", numSweeps, "Number of relaxation sweeps",
258  rcp_const_cast<const PEV> (numSweepsValidator));
259 
260  const scalar_type dampingFactor = STS::one ();
261  pl->set ("relaxation: damping factor", dampingFactor);
262 
263  const bool zeroStartingSolution = true;
264  pl->set ("relaxation: zero starting solution", zeroStartingSolution);
265 
266  const bool doBackwardGS = false;
267  pl->set ("relaxation: backward mode", doBackwardGS);
268 
269  const bool doL1Method = false;
270  pl->set ("relaxation: use l1", doL1Method);
271 
272  const magnitude_type l1eta = (STM::one() + STM::one() + STM::one()) /
273  (STM::one() + STM::one()); // 1.5
274  pl->set ("relaxation: l1 eta", l1eta);
275 
276  const scalar_type minDiagonalValue = STS::zero ();
277  pl->set ("relaxation: min diagonal value", minDiagonalValue);
278 
279  const bool fixTinyDiagEntries = false;
280  pl->set ("relaxation: fix tiny diagonal entries", fixTinyDiagEntries);
281 
282  const bool checkDiagEntries = false;
283  pl->set ("relaxation: check diagonal entries", checkDiagEntries);
284 
285  Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = Teuchos::null;
286  pl->set("relaxation: local smoothing indices", localSmoothingIndices);
287 
288  validParams_ = rcp_const_cast<const ParameterList> (pl);
289  }
290  return validParams_;
291 }
292 
293 
294 template<class MatrixType>
295 void Relaxation<MatrixType>::setParametersImpl (Teuchos::ParameterList& pl)
296 {
297  using Teuchos::getIntegralValue;
298  using Teuchos::ParameterList;
299  using Teuchos::RCP;
300  typedef scalar_type ST; // just to make code below shorter
301 
302  pl.validateParametersAndSetDefaults (* getValidParameters ());
303 
304  const Details::RelaxationType precType =
305  getIntegralValue<Details::RelaxationType> (pl, "relaxation: type");
306  const int numSweeps = pl.get<int> ("relaxation: sweeps");
307  const ST dampingFactor = pl.get<ST> ("relaxation: damping factor");
308  const bool zeroStartSol = pl.get<bool> ("relaxation: zero starting solution");
309  const bool doBackwardGS = pl.get<bool> ("relaxation: backward mode");
310  const bool doL1Method = pl.get<bool> ("relaxation: use l1");
311  const magnitude_type l1Eta = pl.get<magnitude_type> ("relaxation: l1 eta");
312  const ST minDiagonalValue = pl.get<ST> ("relaxation: min diagonal value");
313  const bool fixTinyDiagEntries = pl.get<bool> ("relaxation: fix tiny diagonal entries");
314  const bool checkDiagEntries = pl.get<bool> ("relaxation: check diagonal entries");
315  Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = pl.get<Teuchos::ArrayRCP<local_ordinal_type> >("relaxation: local smoothing indices");
316 
317 
318  // "Commit" the changes, now that we've validated everything.
319  PrecType_ = precType;
320  NumSweeps_ = numSweeps;
321  DampingFactor_ = dampingFactor;
322  ZeroStartingSolution_ = zeroStartSol;
323  DoBackwardGS_ = doBackwardGS;
324  DoL1Method_ = doL1Method;
325  L1Eta_ = l1Eta;
326  MinDiagonalValue_ = minDiagonalValue;
327  fixTinyDiagEntries_ = fixTinyDiagEntries;
328  checkDiagEntries_ = checkDiagEntries;
329  localSmoothingIndices_ = localSmoothingIndices;
330 }
331 
332 
333 template<class MatrixType>
334 void Relaxation<MatrixType>::setParameters (const Teuchos::ParameterList& pl)
335 {
336  // FIXME (aprokop 18 Oct 2013) Casting away const is bad here.
337  // but otherwise, we will get [unused] in pl
338  this->setParametersImpl(const_cast<Teuchos::ParameterList&>(pl));
339 }
340 
341 
342 template<class MatrixType>
343 Teuchos::RCP<const Teuchos::Comm<int> >
345  TEUCHOS_TEST_FOR_EXCEPTION(
346  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getComm: "
347  "The input matrix A is null. Please call setMatrix() with a nonnull "
348  "input matrix before calling this method.");
349  return A_->getRowMap ()->getComm ();
350 }
351 
352 
353 template<class MatrixType>
354 Teuchos::RCP<const typename Relaxation<MatrixType>::row_matrix_type>
356  return A_;
357 }
358 
359 
360 template<class MatrixType>
361 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
362  typename MatrixType::global_ordinal_type,
363  typename MatrixType::node_type> >
365  TEUCHOS_TEST_FOR_EXCEPTION(
366  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getDomainMap: "
367  "The input matrix A is null. Please call setMatrix() with a nonnull "
368  "input matrix before calling this method.");
369  return A_->getDomainMap ();
370 }
371 
372 
373 template<class MatrixType>
374 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
375  typename MatrixType::global_ordinal_type,
376  typename MatrixType::node_type> >
378  TEUCHOS_TEST_FOR_EXCEPTION(
379  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getRangeMap: "
380  "The input matrix A is null. Please call setMatrix() with a nonnull "
381  "input matrix before calling this method.");
382  return A_->getRangeMap ();
383 }
384 
385 
386 template<class MatrixType>
388  return true;
389 }
390 
391 
392 template<class MatrixType>
394  return(NumInitialize_);
395 }
396 
397 
398 template<class MatrixType>
400  return(NumCompute_);
401 }
402 
403 
404 template<class MatrixType>
406  return(NumApply_);
407 }
408 
409 
410 template<class MatrixType>
412  return(InitializeTime_);
413 }
414 
415 
416 template<class MatrixType>
418  return(ComputeTime_);
419 }
420 
421 
422 template<class MatrixType>
424  return(ApplyTime_);
425 }
426 
427 
428 template<class MatrixType>
430  return(ComputeFlops_);
431 }
432 
433 
434 template<class MatrixType>
436  return(ApplyFlops_);
437 }
438 
439 
440 
441 template<class MatrixType>
443  TEUCHOS_TEST_FOR_EXCEPTION(
444  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getNodeSmootherComplexity: "
445  "The input matrix A is null. Please call setMatrix() with a nonnull "
446  "input matrix, then call compute(), before calling this method.");
447  // Relaxation methods cost roughly one apply + one diagonal inverse per iteration
448  return A_->getNodeNumRows() + A_->getNodeNumEntries();
449 }
450 
451 
452 template<class MatrixType>
453 void
455 apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
456  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
457  Teuchos::ETransp mode,
458  scalar_type alpha,
459  scalar_type beta) const
460 {
461  using Teuchos::as;
462  using Teuchos::RCP;
463  using Teuchos::rcp;
464  using Teuchos::rcpFromRef;
465  typedef Tpetra::MultiVector<scalar_type, local_ordinal_type,
467  TEUCHOS_TEST_FOR_EXCEPTION(
468  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::apply: "
469  "The input matrix A is null. Please call setMatrix() with a nonnull "
470  "input matrix, then call compute(), before calling this method.");
471  TEUCHOS_TEST_FOR_EXCEPTION(
472  ! isComputed (),
473  std::runtime_error,
474  "Ifpack2::Relaxation::apply: You must call compute() on this Ifpack2 "
475  "preconditioner instance before you may call apply(). You may call "
476  "isComputed() to find out if compute() has been called already.");
477  TEUCHOS_TEST_FOR_EXCEPTION(
478  X.getNumVectors() != Y.getNumVectors(),
479  std::runtime_error,
480  "Ifpack2::Relaxation::apply: X and Y have different numbers of columns. "
481  "X has " << X.getNumVectors() << " columns, but Y has "
482  << Y.getNumVectors() << " columns.");
483  TEUCHOS_TEST_FOR_EXCEPTION(
484  beta != STS::zero (), std::logic_error,
485  "Ifpack2::Relaxation::apply: beta = " << beta << " != 0 case not "
486  "implemented.");
487  {
488  // Reset the timer each time, since Relaxation uses the same Time
489  // object to track times for different methods.
490  Teuchos::TimeMonitor timeMon (*Time_, true);
491 
492  // Special case: alpha == 0.
493  if (alpha == STS::zero ()) {
494  // No floating-point operations, so no need to update a count.
495  Y.putScalar (STS::zero ());
496  }
497  else {
498  // If X and Y alias one another, then we need to create an
499  // auxiliary vector, Xcopy (a deep copy of X).
500  RCP<const MV> Xcopy;
501  // FIXME (mfh 12 Sep 2014) This test for aliasing is incomplete.
502  {
503  auto X_lcl_host = X.template getLocalView<Kokkos::HostSpace> ();
504  auto Y_lcl_host = Y.template getLocalView<Kokkos::HostSpace> ();
505  if (X_lcl_host.ptr_on_device () == Y_lcl_host.ptr_on_device ()) {
506  Xcopy = rcp (new MV (X, Teuchos::Copy));
507  } else {
508  Xcopy = rcpFromRef (X);
509  }
510  }
511 
512  // Each of the following methods updates the flop count itself.
513  // All implementations handle zeroing out the starting solution
514  // (if necessary) themselves.
515  switch (PrecType_) {
516  case Ifpack2::Details::JACOBI:
517  ApplyInverseJacobi(*Xcopy,Y);
518  break;
519  case Ifpack2::Details::GS:
520  ApplyInverseGS(*Xcopy,Y);
521  break;
522  case Ifpack2::Details::SGS:
523  ApplyInverseSGS(*Xcopy,Y);
524  break;
525  case Ifpack2::Details::MTSGS:
526  ApplyInverseMTSGS_CrsMatrix(*Xcopy,Y);
527  break;
528  case Ifpack2::Details::MTGS:
529  ApplyInverseMTGS_CrsMatrix(*Xcopy,Y);
530  break;
531 
532  default:
533  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
534  "Ifpack2::Relaxation::apply: Invalid preconditioner type enum value "
535  << PrecType_ << ". Please report this bug to the Ifpack2 developers.");
536  }
537  if (alpha != STS::one ()) {
538  Y.scale (alpha);
539  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
540  const double numVectors = as<double> (Y.getNumVectors ());
541  ApplyFlops_ += numGlobalRows * numVectors;
542  }
543  }
544  }
545  ApplyTime_ += Time_->totalElapsedTime ();
546  ++NumApply_;
547 }
548 
549 
550 template<class MatrixType>
551 void
553 applyMat (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
554  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
555  Teuchos::ETransp mode) const
556 {
557  TEUCHOS_TEST_FOR_EXCEPTION(
558  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::applyMat: "
559  "The input matrix A is null. Please call setMatrix() with a nonnull "
560  "input matrix, then call compute(), before calling this method.");
561  TEUCHOS_TEST_FOR_EXCEPTION(
562  ! isComputed (), std::runtime_error, "Ifpack2::Relaxation::applyMat: "
563  "isComputed() must be true before you may call applyMat(). "
564  "Please call compute() before calling this method.");
565  TEUCHOS_TEST_FOR_EXCEPTION(
566  X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
567  "Ifpack2::Relaxation::applyMat: X.getNumVectors() = " << X.getNumVectors ()
568  << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");
569  A_->apply (X, Y, mode);
570 }
571 
572 
573 template<class MatrixType>
575 {
576  TEUCHOS_TEST_FOR_EXCEPTION
577  (A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::initialize: "
578  "The input matrix A is null. Please call setMatrix() with a nonnull "
579  "input matrix before calling this method.");
580  Teuchos::TimeMonitor timeMon (*Time_, true);
581 
582  if (A_.is_null ()) {
583  hasBlockCrsMatrix_ = false;
584  }
585  else { // A_ is not null
586  Teuchos::RCP<const block_crs_matrix_type> A_bcrs =
587  Teuchos::rcp_dynamic_cast<const block_crs_matrix_type> (A_);
588  if (A_bcrs.is_null ()) {
589  hasBlockCrsMatrix_ = false;
590  }
591  else { // A_ is a block_crs_matrix_type
592  hasBlockCrsMatrix_ = true;
593  }
594  }
595 
596  if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
597 #ifdef HAVE_IFPACK2_EXPERIMENTAL_KOKKOSKERNELS_FEATURES
598  const crs_matrix_type* crsMat = dynamic_cast<const crs_matrix_type*> (&(*A_));
599  TEUCHOS_TEST_FOR_EXCEPTION
600  (crsMat == NULL, std::logic_error, "Ifpack2::Relaxation::initialize: "
601  "Multithreaded Gauss-Seidel methods currently only work when the input "
602  "matrix is a Tpetra::CrsMatrix.");
603 
604 #ifdef HAVE_IFPACK2_DUMP_MTX_MATRIX
605  Tpetra::MatrixMarket::Writer<crs_matrix_type> crs_writer;
606  std::string file_name = "Ifpack2_MT_GS.mtx";
607  Teuchos::RCP<const crs_matrix_type> rcp_crs_mat = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_);
608  crs_writer.writeSparseFile(file_name, rcp_crs_mat);
609 #endif
610 
611  this->mtKernelHandle_ = Teuchos::rcp (new mt_kernel_handle_type ());
612  if (mtKernelHandle_->get_gs_handle () == NULL) {
613  mtKernelHandle_->create_gs_handle ();
614  }
615  local_matrix_type kcsr = crsMat->getLocalMatrix ();
616 
617  const bool is_symmetric = (PrecType_ == Ifpack2::Details::MTSGS);
618  using KokkosKernels::Experimental::Graph::gauss_seidel_symbolic;
619  gauss_seidel_symbolic<mt_kernel_handle_type,
620  lno_row_view_t,
621  lno_nonzero_view_t> (mtKernelHandle_.getRawPtr (),
622  A_->getNodeNumRows (),
623  A_->getNodeNumCols (),
624  kcsr.graph.row_map,
625  kcsr.graph.entries,
626  is_symmetric);
627 #else // HAVE_IFPACK2_EXPERIMENTAL_KOKKOSKERNELS_FEATURES
628  TEUCHOS_TEST_FOR_EXCEPTION
629  (true, std::logic_error, "The multithreaded implementation of Gauss-Seidel "
630  "is not enabled. Please talk to the Ifpack2 developers about how to "
631  "enable this method.");
632 #endif // HAVE_IFPACK2_EXPERIMENTAL_KOKKOSKERNELS_FEATURES
633  }
634 
635 
636  InitializeTime_ += Time_->totalElapsedTime ();
637  ++NumInitialize_;
638  isInitialized_ = true;
639 }
640 
641 namespace Impl {
642 template <typename BlockDiagView>
643 struct InvertDiagBlocks {
644  typedef int value_type;
645  typedef typename BlockDiagView::size_type Size;
646 
647 private:
648  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
649  template <typename View>
650  using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
651  typename View::device_type, Unmanaged>;
652 
653  typedef typename BlockDiagView::non_const_value_type Scalar;
654  typedef typename BlockDiagView::device_type Device;
655  typedef Kokkos::View<Scalar**, Kokkos::LayoutRight, Device> RWrk;
656  typedef Kokkos::View<int**, Kokkos::LayoutRight, Device> IWrk;
657 
658  UnmanagedView<BlockDiagView> block_diag_;
659  // TODO Use thread team and scratch memory space. In this first
660  // pass, provide workspace for each block.
661  RWrk rwrk_buf_;
662  UnmanagedView<RWrk> rwrk_;
663  IWrk iwrk_buf_;
664  UnmanagedView<IWrk> iwrk_;
665 
666 public:
667  InvertDiagBlocks (BlockDiagView& block_diag)
668  : block_diag_(block_diag)
669  {
670  const auto blksz = block_diag.dimension_1();
671  Kokkos::resize(rwrk_buf_, block_diag_.dimension_0(), blksz);
672  rwrk_ = rwrk_buf_;
673  Kokkos::resize(iwrk_buf_, block_diag_.dimension_0(), blksz);
674  iwrk_ = iwrk_buf_;
675  }
676 
677  KOKKOS_INLINE_FUNCTION
678  void operator() (const Size i, int& jinfo) const {
679  auto D_cur = Kokkos::subview(block_diag_, i, Kokkos::ALL(), Kokkos::ALL());
680  auto ipiv = Kokkos::subview(iwrk_, i, Kokkos::ALL());
681  auto work = Kokkos::subview(rwrk_, i, Kokkos::ALL());
682  int info = 0;
683  Tpetra::Experimental::GETF2(D_cur, ipiv, info);
684  if (info) {
685  ++jinfo;
686  return;
687  }
688  Tpetra::Experimental::GETRI(D_cur, ipiv, work, info);
689  if (info) ++jinfo;
690  }
691 
692  // Report the number of blocks with errors.
693  KOKKOS_INLINE_FUNCTION
694  void join (volatile value_type& dst, volatile value_type const& src) const { dst += src; }
695 };
696 }
697 
698 template<class MatrixType>
700 {
701  using Kokkos::ALL;
702  using Teuchos::Array;
703  using Teuchos::ArrayRCP;
704  using Teuchos::ArrayView;
705  using Teuchos::as;
706  using Teuchos::Comm;
707  using Teuchos::RCP;
708  using Teuchos::rcp;
709  using Teuchos::REDUCE_MAX;
710  using Teuchos::REDUCE_MIN;
711  using Teuchos::REDUCE_SUM;
712  using Teuchos::rcp_dynamic_cast;
713  using Teuchos::reduceAll;
714  typedef local_ordinal_type LO;
715  typedef typename node_type::device_type device_type;
716 
717  {
718  // Reset the timer each time, since Relaxation uses the same Time
719  // object to track times for different methods.
720  Teuchos::TimeMonitor timeMon (*Time_, true);
721 
722  TEUCHOS_TEST_FOR_EXCEPTION(
723  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::"
724  "computeBlockCrs: The input matrix A is null. Please call setMatrix() "
725  "with a nonnull input matrix, then call initialize(), before calling "
726  "this method.");
727  const block_crs_matrix_type* blockCrsA =
728  dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ());
729  TEUCHOS_TEST_FOR_EXCEPTION(
730  blockCrsA == NULL, std::logic_error, "Ifpack2::Relaxation::"
731  "computeBlockCrs: A_ is not a BlockCrsMatrix, but it should be if we "
732  "got this far. Please report this bug to the Ifpack2 developers.");
733 
734  const scalar_type one = STS::one ();
735 
736  // Reset state.
737  IsComputed_ = false;
738 
739  const LO lclNumMeshRows =
740  blockCrsA->getCrsGraph ().getNodeNumRows ();
741  const LO blockSize = blockCrsA->getBlockSize ();
742 
743  if (! savedDiagOffsets_) {
744  blockDiag_ = block_diag_type (); // clear it before reallocating
745  blockDiag_ = block_diag_type ("Ifpack2::Relaxation::blockDiag_",
746  lclNumMeshRows, blockSize, blockSize);
747  if (Teuchos::as<LO>(diagOffsets_.dimension_0 () ) < lclNumMeshRows) {
748  // Clear diagOffsets_ first (by assigning an empty View to it)
749  // to save memory, before reallocating.
750  diagOffsets_ = Kokkos::View<size_t*, device_type> ();
751  diagOffsets_ = Kokkos::View<size_t*, device_type> ("offsets", lclNumMeshRows);
752  }
753  blockCrsA->getCrsGraph ().getLocalDiagOffsets (diagOffsets_);
754  TEUCHOS_TEST_FOR_EXCEPTION
755  (static_cast<size_t> (diagOffsets_.dimension_0 ()) !=
756  static_cast<size_t> (blockDiag_.dimension_0 ()),
757  std::logic_error, "diagOffsets_.dimension_0() = " <<
758  diagOffsets_.dimension_0 () << " != blockDiag_.dimension_0() = "
759  << blockDiag_.dimension_0 () <<
760  ". Please report this bug to the Ifpack2 developers.");
761  savedDiagOffsets_ = true;
762  }
763  blockCrsA->getLocalDiagCopy (blockDiag_, diagOffsets_);
764 
765  // Use an unmanaged View in this method, so that when we take
766  // subviews of it (to get each diagonal block), we don't have to
767  // touch the reference count. Reference count updates are a
768  // thread scalability bottleneck and have a performance cost even
769  // without using threads.
770  unmanaged_block_diag_type blockDiag = blockDiag_;
771 
772  if (DoL1Method_ && IsParallel_) {
773  const scalar_type two = one + one;
774  const size_t maxLength = A_->getNodeMaxNumRowEntries ();
775  Array<LO> indices (maxLength);
776  Array<scalar_type> values (maxLength * blockSize * blockSize);
777  size_t numEntries = 0;
778 
779  for (LO i = 0; i < lclNumMeshRows; ++i) {
780  // FIXME (mfh 16 Dec 2015) Get views instead of copies.
781  blockCrsA->getLocalRowCopy (i, indices (), values (), numEntries);
782 
783  auto diagBlock = Kokkos::subview (blockDiag, i, ALL (), ALL ());
784  for (LO subRow = 0; subRow < blockSize; ++subRow) {
785  magnitude_type diagonal_boost = STM::zero ();
786  for (size_t k = 0 ; k < numEntries ; ++k) {
787  if (indices[k] > lclNumMeshRows) {
788  const size_t offset = blockSize*blockSize*k + subRow*blockSize;
789  for (LO subCol = 0; subCol < blockSize; ++subCol) {
790  diagonal_boost += STS::magnitude (values[offset+subCol] / two);
791  }
792  }
793  }
794  if (STS::magnitude (diagBlock(subRow, subRow)) < L1Eta_ * diagonal_boost) {
795  diagBlock(subRow, subRow) += diagonal_boost;
796  }
797  }
798  }
799  }
800 
801  int info = 0;
802  {
803  Impl::InvertDiagBlocks<unmanaged_block_diag_type> idb(blockDiag);
804  typedef typename unmanaged_block_diag_type::execution_space exec_space;
805  typedef Kokkos::RangePolicy<exec_space, LO> range_type;
806 
807  Kokkos::parallel_reduce (range_type (0, lclNumMeshRows), idb, info);
808  // FIXME (mfh 19 May 2017) Different processes may not
809  // necessarily have this error all at the same time, so it would
810  // be better just to preserve a local error state and let users
811  // check.
812  TEUCHOS_TEST_FOR_EXCEPTION
813  (info > 0, std::runtime_error, "GETF2 or GETRI failed on = " << info
814  << " diagonal blocks.");
815  }
816 
817  // In a debug build, do an extra test to make sure that all the
818  // factorizations were computed correctly.
819 #ifdef HAVE_IFPACK2_DEBUG
820  const int numResults = 2;
821  // Use "max = -min" trick to get min and max in a single all-reduce.
822  int lclResults[2], gblResults[2];
823  lclResults[0] = info;
824  lclResults[1] = -info;
825  gblResults[0] = 0;
826  gblResults[1] = 0;
827  reduceAll<int, int> (* (A_->getGraph ()->getComm ()), REDUCE_MIN,
828  numResults, lclResults, gblResults);
829  TEUCHOS_TEST_FOR_EXCEPTION
830  (gblResults[0] != 0 || gblResults[1] != 0, std::runtime_error,
831  "Ifpack2::Relaxation::compute: When processing the input "
832  "Tpetra::BlockCrsMatrix, one or more diagonal block LU factorizations "
833  "failed on one or more (MPI) processes.");
834 #endif // HAVE_IFPACK2_DEBUG
835 
836  Importer_ = A_->getGraph ()->getImporter ();
837  } // end TimeMonitor scope
838 
839  ComputeTime_ += Time_->totalElapsedTime ();
840  ++NumCompute_;
841  IsComputed_ = true;
842 }
843 
844 template<class MatrixType>
846 {
847  using Teuchos::Array;
848  using Teuchos::ArrayRCP;
849  using Teuchos::ArrayView;
850  using Teuchos::as;
851  using Teuchos::Comm;
852  using Teuchos::RCP;
853  using Teuchos::rcp;
854  using Teuchos::REDUCE_MAX;
855  using Teuchos::REDUCE_MIN;
856  using Teuchos::REDUCE_SUM;
857  using Teuchos::rcp_dynamic_cast;
858  using Teuchos::reduceAll;
859  typedef Tpetra::Vector<scalar_type, local_ordinal_type,
860  global_ordinal_type, node_type> vector_type;
861  typedef typename vector_type::device_type device_type;
862  const scalar_type zero = STS::zero ();
863  const scalar_type one = STS::one ();
864 
865  // We don't count initialization in compute() time.
866  if (! isInitialized ()) {
867  initialize ();
868  }
869 
870  if (hasBlockCrsMatrix_) {
871  computeBlockCrs ();
872  return;
873  }
874 
875 
876 
877  {
878  // Reset the timer each time, since Relaxation uses the same Time
879  // object to track times for different methods.
880  Teuchos::TimeMonitor timeMon (*Time_, true);
881 
882  TEUCHOS_TEST_FOR_EXCEPTION(
883  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::compute: "
884  "The input matrix A is null. Please call setMatrix() with a nonnull "
885  "input matrix, then call initialize(), before calling this method.");
886 
887  // Reset state.
888  IsComputed_ = false;
889 
890  TEUCHOS_TEST_FOR_EXCEPTION(
891  NumSweeps_ < 0, std::logic_error,
892  "Ifpack2::Relaxation::compute: NumSweeps_ = " << NumSweeps_ << " < 0. "
893  "Please report this bug to the Ifpack2 developers.");
894 
895  Diagonal_ = rcp (new vector_type (A_->getRowMap ()));
896 
897  // Extract the diagonal entries. The CrsMatrix static graph
898  // version is faster for subsequent calls to compute(), since it
899  // caches the diagonal offsets.
900  //
901  // isStaticGraph() == true means that the matrix was created with
902  // a const graph. The only requirement is that the structure of
903  // the matrix never changes, so isStaticGraph() == true is a bit
904  // more conservative than we need. However, Tpetra doesn't (as of
905  // 05 Apr 2013) have a way to tell if the graph hasn't changed
906  // since the last time we used it.
907  {
908  // NOTE (mfh 07 Jul 2013): We must cast here to CrsMatrix
909  // instead of MatrixType, because isStaticGraph is a CrsMatrix
910  // method (not inherited from RowMatrix's interface). It's
911  // perfectly valid to do relaxation on a RowMatrix which is not
912  // a CrsMatrix.
913  const crs_matrix_type* crsMat =
914  dynamic_cast<const crs_matrix_type*> (A_.getRawPtr ());
915  if (crsMat == NULL || ! crsMat->isStaticGraph ()) {
916  A_->getLocalDiagCopy (*Diagonal_); // slow path
917  } else {
918  if (! savedDiagOffsets_) { // we haven't precomputed offsets
919  const size_t lclNumRows = A_->getRowMap ()->getNodeNumElements ();
920  if (diagOffsets_.dimension_0 () < lclNumRows) {
921  typedef typename node_type::device_type DT;
922  diagOffsets_ = Kokkos::View<size_t*, DT> (); // clear 1st to save mem
923  diagOffsets_ = Kokkos::View<size_t*, DT> ("offsets", lclNumRows);
924  }
925  crsMat->getCrsGraph ()->getLocalDiagOffsets (diagOffsets_);
926  savedDiagOffsets_ = true;
927  }
928  crsMat->getLocalDiagCopy (*Diagonal_, diagOffsets_);
929 #ifdef HAVE_IFPACK2_DEBUG
930  // Validate the fast-path diagonal against the slow-path diagonal.
931  vector_type D_copy (A_->getRowMap ());
932  A_->getLocalDiagCopy (D_copy);
933  D_copy.update (STS::one (), *Diagonal_, -STS::one ());
934  const magnitude_type err = D_copy.normInf ();
935  // The two diagonals should be exactly the same, so their
936  // difference should be exactly zero.
937  TEUCHOS_TEST_FOR_EXCEPTION(
938  err != STM::zero(), std::logic_error, "Ifpack2::Relaxation::compute: "
939  "\"fast-path\" diagonal computation failed. \\|D1 - D2\\|_inf = "
940  << err << ".");
941 #endif // HAVE_IFPACK2_DEBUG
942  }
943  }
944 
945  // If we're checking the computed inverse diagonal, then keep a
946  // copy of the original diagonal entries for later comparison.
947  RCP<vector_type> origDiag;
948  if (checkDiagEntries_) {
949  origDiag = rcp (new vector_type (A_->getRowMap ()));
950  Tpetra::deep_copy (*origDiag, *Diagonal_);
951  }
952 
953  const size_t numMyRows = A_->getNodeNumRows ();
954 
955  // We're about to read and write diagonal entries on the host.
956  Diagonal_->template sync<Kokkos::HostSpace> ();
957  Diagonal_->template modify<Kokkos::HostSpace> ();
958  auto diag_2d = Diagonal_->template getLocalView<Kokkos::HostSpace> ();
959  auto diag_1d = Kokkos::subview (diag_2d, Kokkos::ALL (), 0);
960  // FIXME (mfh 12 Jan 2016) temp fix for Kokkos::complex vs. std::complex.
961  scalar_type* const diag = reinterpret_cast<scalar_type*> (diag_1d.ptr_on_device ());
962 
963  // Setup for L1 Methods.
964  // Here we add half the value of the off-processor entries in the row,
965  // but only if diagonal isn't sufficiently large.
966  //
967  // This follows from Equation (6.5) in: Baker, Falgout, Kolev and
968  // Yang. "Multigrid Smoothers for Ultraparallel Computing." SIAM
969  // J. Sci. Comput., Vol. 33, No. 5. (2011), pp. 2864-2887.
970  if (DoL1Method_ && IsParallel_) {
971  const scalar_type two = one + one;
972  const size_t maxLength = A_->getNodeMaxNumRowEntries ();
973  Array<local_ordinal_type> indices (maxLength);
974  Array<scalar_type> values (maxLength);
975  size_t numEntries;
976 
977  for (size_t i = 0; i < numMyRows; ++i) {
978  A_->getLocalRowCopy (i, indices (), values (), numEntries);
979  magnitude_type diagonal_boost = STM::zero ();
980  for (size_t k = 0 ; k < numEntries ; ++k) {
981  if (static_cast<size_t> (indices[k]) > numMyRows) {
982  diagonal_boost += STS::magnitude (values[k] / two);
983  }
984  }
985  if (STS::magnitude (diag[i]) < L1Eta_ * diagonal_boost) {
986  diag[i] += diagonal_boost;
987  }
988  }
989  }
990 
991  //
992  // Invert the diagonal entries of the matrix (not in place).
993  //
994 
995  // Precompute some quantities for "fixing" zero or tiny diagonal
996  // entries. We'll only use them if this "fixing" is enabled.
997  //
998  // SmallTraits covers for the lack of eps() in
999  // Teuchos::ScalarTraits when its template parameter is not a
1000  // floating-point type. (Ifpack2 sometimes gets instantiated for
1001  // integer Scalar types.)
1002  const scalar_type oneOverMinDiagVal = (MinDiagonalValue_ == zero) ?
1003  one / SmallTraits<scalar_type>::eps () :
1004  one / MinDiagonalValue_;
1005  // It's helpful not to have to recompute this magnitude each time.
1006  const magnitude_type minDiagValMag = STS::magnitude (MinDiagonalValue_);
1007 
1008  if (checkDiagEntries_) {
1009  //
1010  // Check diagonal elements, replace zero elements with the minimum
1011  // diagonal value, and store their inverses. Count the number of
1012  // "small" and zero diagonal entries while we're at it.
1013  //
1014  size_t numSmallDiagEntries = 0; // "small" includes zero
1015  size_t numZeroDiagEntries = 0; // # zero diagonal entries
1016  size_t numNegDiagEntries = 0; // # negative (real parts of) diagonal entries
1017 
1018  // As we go, keep track of the diagonal entries with the least and
1019  // greatest magnitude. We could use the trick of starting the min
1020  // with +Inf and the max with -Inf, but that doesn't work if
1021  // scalar_type is a built-in integer type. Thus, we have to start
1022  // by reading the first diagonal entry redundantly.
1023  // scalar_type minMagDiagEntry = zero;
1024  // scalar_type maxMagDiagEntry = zero;
1025  magnitude_type minMagDiagEntryMag = STM::zero ();
1026  magnitude_type maxMagDiagEntryMag = STM::zero ();
1027  if (numMyRows > 0) {
1028  const scalar_type d_0 = diag[0];
1029  const magnitude_type d_0_mag = STS::magnitude (d_0);
1030  // minMagDiagEntry = d_0;
1031  // maxMagDiagEntry = d_0;
1032  minMagDiagEntryMag = d_0_mag;
1033  maxMagDiagEntryMag = d_0_mag;
1034  }
1035 
1036  // Go through all the diagonal entries. Compute counts of
1037  // small-magnitude, zero, and negative-real-part entries. Invert
1038  // the diagonal entries that aren't too small. For those that are
1039  // too small in magnitude, replace them with 1/MinDiagonalValue_
1040  // (or 1/eps if MinDiagonalValue_ happens to be zero).
1041  for (size_t i = 0 ; i < numMyRows; ++i) {
1042  const scalar_type d_i = diag[i];
1043  const magnitude_type d_i_mag = STS::magnitude (d_i);
1044  const magnitude_type d_i_real = STS::real (d_i);
1045 
1046  // We can't compare complex numbers, but we can compare their
1047  // real parts.
1048  if (d_i_real < STM::zero ()) {
1049  ++numNegDiagEntries;
1050  }
1051  if (d_i_mag < minMagDiagEntryMag) {
1052  // minMagDiagEntry = d_i;
1053  minMagDiagEntryMag = d_i_mag;
1054  }
1055  if (d_i_mag > maxMagDiagEntryMag) {
1056  // maxMagDiagEntry = d_i;
1057  maxMagDiagEntryMag = d_i_mag;
1058  }
1059 
1060  if (fixTinyDiagEntries_) {
1061  // <= not <, in case minDiagValMag is zero.
1062  if (d_i_mag <= minDiagValMag) {
1063  ++numSmallDiagEntries;
1064  if (d_i_mag == STM::zero ()) {
1065  ++numZeroDiagEntries;
1066  }
1067  diag[i] = oneOverMinDiagVal;
1068  } else {
1069  diag[i] = one / d_i;
1070  }
1071  }
1072  else { // Don't fix zero or tiny diagonal entries.
1073  // <= not <, in case minDiagValMag is zero.
1074  if (d_i_mag <= minDiagValMag) {
1075  ++numSmallDiagEntries;
1076  if (d_i_mag == STM::zero ()) {
1077  ++numZeroDiagEntries;
1078  }
1079  }
1080  diag[i] = one / d_i;
1081  }
1082  }
1083 
1084  // Count floating-point operations of computing the inverse diagonal.
1085  //
1086  // FIXME (mfh 30 Mar 2013) Shouldn't counts be global, not local?
1087  if (STS::isComplex) { // magnitude: at least 3 flops per diagonal entry
1088  ComputeFlops_ += 4.0 * numMyRows;
1089  } else {
1090  ComputeFlops_ += numMyRows;
1091  }
1092 
1093  // Collect global data about the diagonal entries. Only do this
1094  // (which involves O(1) all-reduces) if the user asked us to do
1095  // the extra work.
1096  //
1097  // FIXME (mfh 28 Mar 2013) This is wrong if some processes have
1098  // zero rows. Fixing this requires one of two options:
1099  //
1100  // 1. Use a custom reduction operation that ignores processes
1101  // with zero diagonal entries.
1102  // 2. Split the communicator, compute all-reduces using the
1103  // subcommunicator over processes that have a nonzero number
1104  // of diagonal entries, and then broadcast from one of those
1105  // processes (if there is one) to the processes in the other
1106  // subcommunicator.
1107  const Comm<int>& comm = * (A_->getRowMap ()->getComm ());
1108 
1109  // Compute global min and max magnitude of entries.
1110  Array<magnitude_type> localVals (2);
1111  localVals[0] = minMagDiagEntryMag;
1112  // (- (min (- x))) is the same as (max x).
1113  localVals[1] = -maxMagDiagEntryMag;
1114  Array<magnitude_type> globalVals (2);
1115  reduceAll<int, magnitude_type> (comm, REDUCE_MIN, 2,
1116  localVals.getRawPtr (),
1117  globalVals.getRawPtr ());
1118  globalMinMagDiagEntryMag_ = globalVals[0];
1119  globalMaxMagDiagEntryMag_ = -globalVals[1];
1120 
1121  Array<size_t> localCounts (3);
1122  localCounts[0] = numSmallDiagEntries;
1123  localCounts[1] = numZeroDiagEntries;
1124  localCounts[2] = numNegDiagEntries;
1125  Array<size_t> globalCounts (3);
1126  reduceAll<int, size_t> (comm, REDUCE_SUM, 3,
1127  localCounts.getRawPtr (),
1128  globalCounts.getRawPtr ());
1129  globalNumSmallDiagEntries_ = globalCounts[0];
1130  globalNumZeroDiagEntries_ = globalCounts[1];
1131  globalNumNegDiagEntries_ = globalCounts[2];
1132 
1133  // Forestall "set but not used" compiler warnings.
1134  // (void) minMagDiagEntry;
1135  // (void) maxMagDiagEntry;
1136 
1137  // Compute and save the difference between the computed inverse
1138  // diagonal, and the original diagonal's inverse.
1139  //
1140  // NOTE (mfh 11 Jan 2016) We need to sync Diagonal_ back from
1141  // host to device for the update kernel below, and we don't need
1142  // to modify it or sync it back again here.
1143  vector_type diff (A_->getRowMap ());
1144  diff.reciprocal (*origDiag);
1145  Diagonal_->template sync<device_type> ();
1146  diff.update (-one, *Diagonal_, one);
1147  globalDiagNormDiff_ = diff.norm2 ();
1148  }
1149  else { // don't check diagonal elements
1150  if (fixTinyDiagEntries_) {
1151  // Go through all the diagonal entries. Invert those that
1152  // aren't too small in magnitude. For those that are too
1153  // small in magnitude, replace them with oneOverMinDiagVal.
1154  for (size_t i = 0 ; i < numMyRows; ++i) {
1155  const scalar_type d_i = diag[i];
1156  const magnitude_type d_i_mag = STS::magnitude (d_i);
1157 
1158  // <= not <, in case minDiagValMag is zero.
1159  if (d_i_mag <= minDiagValMag) {
1160  diag[i] = oneOverMinDiagVal;
1161  } else {
1162  diag[i] = one / d_i;
1163  }
1164  }
1165  }
1166  else { // don't fix tiny or zero diagonal entries
1167  for (size_t i = 0 ; i < numMyRows; ++i) {
1168  diag[i] = one / diag[i];
1169  }
1170  }
1171  if (STS::isComplex) { // magnitude: at least 3 flops per diagonal entry
1172  ComputeFlops_ += 4.0 * numMyRows;
1173  } else {
1174  ComputeFlops_ += numMyRows;
1175  }
1176  }
1177 
1178  if (IsParallel_ && (PrecType_ == Ifpack2::Details::GS ||
1179  PrecType_ == Ifpack2::Details::SGS)) {
1180  // mfh 21 Mar 2013: The Import object may be null, but in that
1181  // case, the domain and column Maps are the same and we don't
1182  // need to Import anyway.
1183  Importer_ = A_->getGraph ()->getImporter ();
1184  Diagonal_->template sync<device_type> ();
1185  }
1186 #ifdef HAVE_IFPACK2_EXPERIMENTAL_KOKKOSKERNELS_FEATURES
1187  //KokkosKernels GaussSiedel Initialization.
1188  if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
1189  const crs_matrix_type* crsMat = dynamic_cast<const crs_matrix_type*> (&(*A_));
1190  TEUCHOS_TEST_FOR_EXCEPTION
1191  (crsMat == NULL, std::logic_error, "Ifpack2::Relaxation::compute: "
1192  "Multithreaded Gauss-Seidel methods currently only work when the input "
1193  "matrix is a Tpetra::CrsMatrix.");
1194  local_matrix_type kcsr = crsMat->getLocalMatrix ();
1195 
1196  const bool is_symmetric = (PrecType_ == Ifpack2::Details::MTSGS);
1197  using KokkosKernels::Experimental::Graph::gauss_seidel_numeric;
1198  gauss_seidel_numeric<mt_kernel_handle_type,
1199  lno_row_view_t,
1200  lno_nonzero_view_t,
1201  scalar_nonzero_view_t> (mtKernelHandle_.getRawPtr (),
1202  A_->getNodeNumRows (), A_->getNodeNumCols (),
1203  kcsr.graph.row_map,
1204  kcsr.graph.entries,
1205  kcsr.values,
1206  is_symmetric);
1207  }
1208 #endif // HAVE_IFPACK2_EXPERIMENTAL_KOKKOSKERNELS_FEATURES
1209  } // end TimeMonitor scope
1210 
1211  ComputeTime_ += Time_->totalElapsedTime ();
1212  ++NumCompute_;
1213  IsComputed_ = true;
1214 }
1215 
1216 
1217 template<class MatrixType>
1218 void
1220 ApplyInverseJacobi (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1221  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1222 {
1223  using Teuchos::as;
1224  typedef Tpetra::MultiVector<scalar_type, local_ordinal_type,
1226 
1227  if (hasBlockCrsMatrix_) {
1228  ApplyInverseJacobi_BlockCrsMatrix (X, Y);
1229  return;
1230  }
1231 
1232  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1233  const double numVectors = as<double> (X.getNumVectors ());
1234  if (ZeroStartingSolution_) {
1235  // For the first Jacobi sweep, if we are allowed to assume that
1236  // the initial guess is zero, then Jacobi is just diagonal
1237  // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1238  // Compute it as Y(i,j) = DampingFactor_ * X(i,j) * D(i).
1239  Y.elementWiseMultiply (DampingFactor_, *Diagonal_, X, STS::zero ());
1240 
1241  // Count (global) floating-point operations. Ifpack2 represents
1242  // this as a floating-point number rather than an integer, so that
1243  // overflow (for a very large number of calls, or a very large
1244  // problem) is approximate instead of catastrophic.
1245  double flopUpdate = 0.0;
1246  if (DampingFactor_ == STS::one ()) {
1247  // Y(i,j) = X(i,j) * D(i): one multiply for each entry of Y.
1248  flopUpdate = numGlobalRows * numVectors;
1249  } else {
1250  // Y(i,j) = DampingFactor_ * X(i,j) * D(i):
1251  // Two multiplies per entry of Y.
1252  flopUpdate = 2.0 * numGlobalRows * numVectors;
1253  }
1254  ApplyFlops_ += flopUpdate;
1255  if (NumSweeps_ == 1) {
1256  return;
1257  }
1258  }
1259  // If we were allowed to assume that the starting guess was zero,
1260  // then we have already done the first sweep above.
1261  const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1262  // We don't need to initialize the result MV, since the sparse
1263  // mat-vec will clobber its contents anyway.
1264  MV A_times_Y (Y.getMap (), as<size_t>(numVectors), false);
1265  for (int j = startSweep; j < NumSweeps_; ++j) {
1266  // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
1267  applyMat (Y, A_times_Y);
1268  A_times_Y.update (STS::one (), X, -STS::one ());
1269  Y.elementWiseMultiply (DampingFactor_, *Diagonal_, A_times_Y, STS::one ());
1270  }
1271 
1272  // For each column of output, for each pass over the matrix:
1273  //
1274  // - One + and one * for each matrix entry
1275  // - One / and one + for each row of the matrix
1276  // - If the damping factor is not one: one * for each row of the
1277  // matrix. (It's not fair to count this if the damping factor is
1278  // one, since the implementation could skip it. Whether it does
1279  // or not is the implementation's choice.)
1280 
1281  // Floating-point operations due to the damping factor, per matrix
1282  // row, per direction, per columm of output.
1283  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1284  const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
1285  ApplyFlops_ += as<double> (NumSweeps_ - startSweep) * numVectors *
1286  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1287 }
1288 
1289 template<class MatrixType>
1290 void
1292 ApplyInverseJacobi_BlockCrsMatrix (const Tpetra::MultiVector<scalar_type,
1295  node_type>& X,
1296  Tpetra::MultiVector<scalar_type,
1299  node_type>& Y) const
1300 {
1301  typedef Tpetra::Experimental::BlockMultiVector<scalar_type,
1303 
1304  const block_crs_matrix_type* blockMatConst =
1305  dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ());
1306  TEUCHOS_TEST_FOR_EXCEPTION
1307  (blockMatConst == NULL, std::logic_error, "This method should never be "
1308  "called if the matrix A_ is not a BlockCrsMatrix. Please report this "
1309  "bug to the Ifpack2 developers.");
1310  // mfh 23 Jan 2016: Unfortunately, the const cast is necessary.
1311  // This is because applyBlock() is nonconst (more accurate), while
1312  // apply() is const (required by Tpetra::Operator interface, but a
1313  // lie, because it possibly allocates temporary buffers).
1314  block_crs_matrix_type* blockMat =
1315  const_cast<block_crs_matrix_type*> (blockMatConst);
1316 
1317  auto meshRowMap = blockMat->getRowMap ();
1318  auto meshColMap = blockMat->getColMap ();
1319  const local_ordinal_type blockSize = blockMat->getBlockSize ();
1320 
1321  BMV X_blk (X, *meshColMap, blockSize);
1322  BMV Y_blk (Y, *meshRowMap, blockSize);
1323 
1324  if (ZeroStartingSolution_) {
1325  // For the first sweep, if we are allowed to assume that the
1326  // initial guess is zero, then block Jacobi is just block diagonal
1327  // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1328  Y_blk.blockWiseMultiply (DampingFactor_, blockDiag_, X_blk);
1329  if (NumSweeps_ == 1) {
1330  return;
1331  }
1332  }
1333 
1334  auto pointRowMap = Y.getMap ();
1335  const size_t numVecs = X.getNumVectors ();
1336 
1337  // We don't need to initialize the result MV, since the sparse
1338  // mat-vec will clobber its contents anyway.
1339  BMV A_times_Y (*meshRowMap, *pointRowMap, blockSize, numVecs);
1340 
1341  // If we were allowed to assume that the starting guess was zero,
1342  // then we have already done the first sweep above.
1343  const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1344 
1345  for (int j = startSweep; j < NumSweeps_; ++j) {
1346  blockMat->applyBlock (Y_blk, A_times_Y);
1347  // Y := Y + \omega D^{-1} (X - A*Y). Use A_times_Y as scratch.
1348  Y_blk.blockJacobiUpdate (DampingFactor_, blockDiag_,
1349  X_blk, A_times_Y, STS::one ());
1350  }
1351 }
1352 
1353 template<class MatrixType>
1354 void
1356 ApplyInverseGS (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1357  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1358 {
1359  typedef Relaxation<MatrixType> this_type;
1360  // The CrsMatrix version is faster, because it can access the sparse
1361  // matrix data directly, rather than by copying out each row's data
1362  // in turn. Thus, we check whether the RowMatrix is really a
1363  // CrsMatrix.
1364  //
1365  // FIXME (mfh 07 Jul 2013) See note on crs_matrix_type typedef
1366  // declaration in Ifpack2_Relaxation_decl.hpp header file. The code
1367  // will still be correct if the cast fails, but it will use an
1368  // unoptimized kernel.
1369  const block_crs_matrix_type* blockCrsMat =
1370  dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ());
1371  const crs_matrix_type* crsMat =
1372  dynamic_cast<const crs_matrix_type*> (A_.getRawPtr ());
1373  if (blockCrsMat != NULL) {
1374  const_cast<this_type*> (this)->ApplyInverseGS_BlockCrsMatrix (*blockCrsMat, X, Y);
1375  } else if (crsMat != NULL) {
1376  ApplyInverseGS_CrsMatrix (*crsMat, X, Y);
1377  } else {
1378  ApplyInverseGS_RowMatrix (X, Y);
1379  }
1380 }
1381 
1382 
1383 template<class MatrixType>
1384 void
1386 ApplyInverseGS_RowMatrix (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1387  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1388 {
1389  using Teuchos::Array;
1390  using Teuchos::ArrayRCP;
1391  using Teuchos::ArrayView;
1392  using Teuchos::as;
1393  using Teuchos::RCP;
1394  using Teuchos::rcp;
1395  using Teuchos::rcpFromRef;
1396  typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
1397 
1398  // Tpetra's GS implementation for CrsMatrix handles zeroing out the
1399  // starting multivector itself. The generic RowMatrix version here
1400  // does not, so we have to zero out Y here.
1401  if (ZeroStartingSolution_) {
1402  Y.putScalar (STS::zero ());
1403  }
1404 
1405  const size_t NumVectors = X.getNumVectors ();
1406  const size_t maxLength = A_->getNodeMaxNumRowEntries ();
1407  Array<local_ordinal_type> Indices (maxLength);
1408  Array<scalar_type> Values (maxLength);
1409 
1410  // Local smoothing stuff
1411  const size_t numMyRows = A_->getNodeNumRows();
1412  const local_ordinal_type* rowInd = 0;
1413  size_t numActive = numMyRows;
1414  bool do_local = ! localSmoothingIndices_.is_null ();
1415  if (do_local) {
1416  rowInd = localSmoothingIndices_.getRawPtr ();
1417  numActive = localSmoothingIndices_.size ();
1418  }
1419 
1420  RCP<MV> Y2;
1421  if (IsParallel_) {
1422  if (Importer_.is_null ()) { // domain and column Maps are the same.
1423  // We will copy Y into Y2 below, so no need to fill with zeros here.
1424  Y2 = rcp (new MV (Y.getMap (), NumVectors, false));
1425  } else {
1426  // FIXME (mfh 21 Mar 2013) We probably don't need to fill with
1427  // zeros here, since we are doing an Import into Y2 below
1428  // anyway. However, it doesn't hurt correctness.
1429  Y2 = rcp (new MV (Importer_->getTargetMap (), NumVectors));
1430  }
1431  }
1432  else {
1433  Y2 = rcpFromRef (Y);
1434  }
1435 
1436  // Diagonal
1437  ArrayRCP<const scalar_type> d_rcp = Diagonal_->get1dView ();
1438  ArrayView<const scalar_type> d_ptr = d_rcp();
1439 
1440  // Constant stride check
1441  bool constant_stride = X.isConstantStride() && Y2->isConstantStride();
1442 
1443  if (constant_stride) {
1444  // extract 1D RCPs
1445  size_t x_stride = X.getStride();
1446  size_t y2_stride = Y2->getStride();
1447  ArrayRCP<scalar_type> y2_rcp = Y2->get1dViewNonConst();
1448  ArrayRCP<const scalar_type> x_rcp = X.get1dView();
1449  ArrayView<scalar_type> y2_ptr = y2_rcp();
1450  ArrayView<const scalar_type> x_ptr = x_rcp();
1451  Array<scalar_type> dtemp(NumVectors,STS::zero());
1452 
1453  for (int j = 0; j < NumSweeps_; ++j) {
1454  // data exchange is here, once per sweep
1455  if (IsParallel_) {
1456  if (Importer_.is_null ()) {
1457  *Y2 = Y; // just copy, since domain and column Maps are the same
1458  } else {
1459  Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1460  }
1461  }
1462 
1463  if (! DoBackwardGS_) { // Forward sweep
1464  for (size_t ii = 0; ii < numActive; ++ii) {
1465  local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1466  size_t NumEntries;
1467  A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1468  dtemp.assign(NumVectors,STS::zero());
1469 
1470  for (size_t k = 0; k < NumEntries; ++k) {
1471  const local_ordinal_type col = Indices[k];
1472  for (size_t m = 0; m < NumVectors; ++m) {
1473  dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
1474  }
1475  }
1476 
1477  for (size_t m = 0; m < NumVectors; ++m) {
1478  y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
1479  }
1480  }
1481  }
1482  else { // Backward sweep
1483  // ptrdiff_t is the same size as size_t, but is signed. Being
1484  // signed is important so that i >= 0 is not trivially true.
1485  for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
1486  local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1487  size_t NumEntries;
1488  A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1489  dtemp.assign (NumVectors, STS::zero ());
1490 
1491  for (size_t k = 0; k < NumEntries; ++k) {
1492  const local_ordinal_type col = Indices[k];
1493  for (size_t m = 0; m < NumVectors; ++m) {
1494  dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
1495  }
1496  }
1497 
1498  for (size_t m = 0; m < NumVectors; ++m) {
1499  y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
1500  }
1501  }
1502  }
1503  // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
1504  if (IsParallel_) {
1505  Tpetra::deep_copy (Y, *Y2);
1506  }
1507  }
1508  }
1509  else {
1510  // extract 2D RCPS
1511  ArrayRCP<ArrayRCP<scalar_type> > y2_ptr = Y2->get2dViewNonConst ();
1512  ArrayRCP<ArrayRCP<const scalar_type> > x_ptr = X.get2dView ();
1513 
1514  for (int j = 0; j < NumSweeps_; ++j) {
1515  // data exchange is here, once per sweep
1516  if (IsParallel_) {
1517  if (Importer_.is_null ()) {
1518  *Y2 = Y; // just copy, since domain and column Maps are the same
1519  } else {
1520  Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1521  }
1522  }
1523 
1524  if (! DoBackwardGS_) { // Forward sweep
1525  for (size_t ii = 0; ii < numActive; ++ii) {
1526  local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1527  size_t NumEntries;
1528  A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1529 
1530  for (size_t m = 0; m < NumVectors; ++m) {
1531  scalar_type dtemp = STS::zero ();
1532  ArrayView<const scalar_type> x_local = (x_ptr())[m]();
1533  ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
1534 
1535  for (size_t k = 0; k < NumEntries; ++k) {
1536  const local_ordinal_type col = Indices[k];
1537  dtemp += Values[k] * y2_local[col];
1538  }
1539  y2_local[i] += DampingFactor_ * d_ptr[i] * (x_local[i] - dtemp);
1540  }
1541  }
1542  }
1543  else { // Backward sweep
1544  // ptrdiff_t is the same size as size_t, but is signed. Being
1545  // signed is important so that i >= 0 is not trivially true.
1546  for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
1547  local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1548 
1549  size_t NumEntries;
1550  A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1551 
1552  for (size_t m = 0; m < NumVectors; ++m) {
1553  scalar_type dtemp = STS::zero ();
1554  ArrayView<const scalar_type> x_local = (x_ptr())[m]();
1555  ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
1556 
1557  for (size_t k = 0; k < NumEntries; ++k) {
1558  const local_ordinal_type col = Indices[k];
1559  dtemp += Values[k] * y2_local[col];
1560  }
1561  y2_local[i] += DampingFactor_ * d_ptr[i] * (x_local[i] - dtemp);
1562  }
1563  }
1564  }
1565 
1566  // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
1567  if (IsParallel_) {
1568  Tpetra::deep_copy (Y, *Y2);
1569  }
1570  }
1571  }
1572 
1573 
1574  // See flop count discussion in implementation of ApplyInverseGS_CrsMatrix().
1575  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1576  const double numVectors = as<double> (X.getNumVectors ());
1577  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1578  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1579  ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
1580  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1581 }
1582 
1583 
1584 template<class MatrixType>
1585 void
1587 ApplyInverseGS_CrsMatrix (const crs_matrix_type& A,
1588  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1589  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1590 {
1591  using Teuchos::as;
1592  const Tpetra::ESweepDirection direction =
1593  DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
1594  if (localSmoothingIndices_.is_null ()) {
1595  A.gaussSeidelCopy (Y, X, *Diagonal_, DampingFactor_, direction,
1596  NumSweeps_, ZeroStartingSolution_);
1597  }
1598  else {
1599  A.reorderedGaussSeidelCopy (Y, X, *Diagonal_, localSmoothingIndices_ (),
1600  DampingFactor_, direction,
1601  NumSweeps_, ZeroStartingSolution_);
1602  }
1603 
1604  // For each column of output, for each sweep over the matrix:
1605  //
1606  // - One + and one * for each matrix entry
1607  // - One / and one + for each row of the matrix
1608  // - If the damping factor is not one: one * for each row of the
1609  // matrix. (It's not fair to count this if the damping factor is
1610  // one, since the implementation could skip it. Whether it does
1611  // or not is the implementation's choice.)
1612 
1613  // Floating-point operations due to the damping factor, per matrix
1614  // row, per direction, per columm of output.
1615  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1616  const double numVectors = as<double> (X.getNumVectors ());
1617  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1618  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1619  ApplyFlops_ += NumSweeps_ * numVectors *
1620  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1621 }
1622 
1623 template<class MatrixType>
1624 void
1626 ApplyInverseGS_BlockCrsMatrix (const block_crs_matrix_type& A,
1627  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1628  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
1629 {
1630  using Teuchos::RCP;
1631  using Teuchos::rcp;
1632  using Teuchos::rcpFromRef;
1633  typedef Tpetra::Experimental::BlockMultiVector<scalar_type,
1635  typedef Tpetra::MultiVector<scalar_type,
1637 
1638  //FIXME: (tcf) 8/21/2014 -- may be problematic for multiple right hand sides
1639  //
1640  // NOTE (mfh 12 Sep 2014) I don't think it should be a problem for
1641  // multiple right-hand sides, unless the input or output MultiVector
1642  // does not have constant stride. We should check for that case
1643  // here, in case it doesn't work in localGaussSeidel (which is
1644  // entirely possible).
1645  BMV yBlock (Y, * (A.getGraph ()->getDomainMap ()), A.getBlockSize ());
1646  const BMV xBlock (X, * (A.getColMap ()), A.getBlockSize ());
1647 
1648  bool performImport = false;
1649  RCP<BMV> yBlockCol;
1650  if (Importer_.is_null ()) {
1651  yBlockCol = rcpFromRef (yBlock);
1652  }
1653  else {
1654  if (yBlockColumnPointMap_.is_null () ||
1655  yBlockColumnPointMap_->getNumVectors () != yBlock.getNumVectors () ||
1656  yBlockColumnPointMap_->getBlockSize () != yBlock.getBlockSize ()) {
1657  yBlockColumnPointMap_ =
1658  rcp (new BMV (* (A.getColMap ()), A.getBlockSize (),
1659  static_cast<local_ordinal_type> (yBlock.getNumVectors ())));
1660  }
1661  yBlockCol = yBlockColumnPointMap_;
1662  performImport = true;
1663  }
1664 
1665  if (ZeroStartingSolution_) {
1666  yBlockCol->putScalar (STS::zero ());
1667  }
1668  else if (performImport) {
1669  yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
1670  }
1671 
1672  const Tpetra::ESweepDirection direction =
1673  DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
1674 
1675  for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
1676  if (performImport && sweep > 0) {
1677  yBlockCol->doImport(yBlock, *Importer_, Tpetra::INSERT);
1678  }
1679  A.localGaussSeidel (xBlock, *yBlockCol, blockDiag_,
1680  DampingFactor_, direction);
1681  if (performImport) {
1682  RCP<const MV> yBlockColPointDomain =
1683  yBlockCol->getMultiVectorView ().offsetView (A.getDomainMap (), 0);
1684  Tpetra::deep_copy (Y, *yBlockColPointDomain);
1685  }
1686  }
1687 }
1688 
1689 
1690 
1691 
1692 template<class MatrixType>
1693 void
1695 MTGaussSeidel (const crs_matrix_type* crsMat,
1696  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1697  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
1698  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& D,
1699  const scalar_type& dampingFactor,
1700  const Tpetra::ESweepDirection direction,
1701  const int numSweeps,
1702  const bool zeroInitialGuess) const
1703 {
1704 #ifdef HAVE_IFPACK2_EXPERIMENTAL_KOKKOSKERNELS_FEATURES
1705  using Teuchos::null;
1706  using Teuchos::RCP;
1707  using Teuchos::rcp;
1708  using Teuchos::rcpFromRef;
1709  using Teuchos::rcp_const_cast;
1710 
1711  typedef scalar_type Scalar;
1712  typedef local_ordinal_type LocalOrdinal;
1713  typedef global_ordinal_type GlobalOrdinal;
1714  typedef node_type Node;
1715 
1716  const char prefix[] = "Ifpack2::Relaxation::(reordered)MTGaussSeidel: ";
1717  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
1718 
1719  TEUCHOS_TEST_FOR_EXCEPTION
1720  (crsMat == NULL, std::logic_error, prefix << "The matrix is NULL. This "
1721  "should never happen. Please report this bug to the Ifpack2 developers.");
1722  TEUCHOS_TEST_FOR_EXCEPTION
1723  (! crsMat->isFillComplete (), std::runtime_error, prefix << "The input "
1724  "CrsMatrix is not fill complete. Please call fillComplete on the matrix,"
1725  " then do setup again, before calling apply(). \"Do setup\" means that "
1726  "if only the matrix's values have changed since last setup, you need only"
1727  " call compute(). If the matrix's structure may also have changed, you "
1728  "must first call initialize(), then call compute(). If you have not set"
1729  " up this preconditioner for this matrix before, you must first call "
1730  "initialize(), then call compute().");
1731  TEUCHOS_TEST_FOR_EXCEPTION
1732  (numSweeps < 0, std::invalid_argument, prefix << "The number of sweeps "
1733  "must be nonnegative, but you provided numSweeps = " << numSweeps <<
1734  " < 0.");
1735  if (numSweeps == 0) {
1736  return;
1737  }
1738 
1739  typedef typename Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> MV;
1740  typedef typename crs_matrix_type::import_type import_type;
1741  typedef typename crs_matrix_type::export_type export_type;
1742  typedef typename crs_matrix_type::map_type map_type;
1743 
1744  RCP<const import_type> importer = crsMat->getGraph ()->getImporter ();
1745  RCP<const export_type> exporter = crsMat->getGraph ()->getExporter ();
1746  TEUCHOS_TEST_FOR_EXCEPTION(
1747  ! exporter.is_null (), std::runtime_error,
1748  "This method's implementation currently requires that the matrix's row, "
1749  "domain, and range Maps be the same. This cannot be the case, because "
1750  "the matrix has a nontrivial Export object.");
1751 
1752  RCP<const map_type> domainMap = crsMat->getDomainMap ();
1753  RCP<const map_type> rangeMap = crsMat->getRangeMap ();
1754  RCP<const map_type> rowMap = crsMat->getGraph ()->getRowMap ();
1755  RCP<const map_type> colMap = crsMat->getGraph ()->getColMap ();
1756 
1757 #ifdef HAVE_IFPACK2_DEBUG
1758  {
1759  // The relation 'isSameAs' is transitive. It's also a
1760  // collective, so we don't have to do a "shared" test for
1761  // exception (i.e., a global reduction on the test value).
1762  TEUCHOS_TEST_FOR_EXCEPTION(
1763  ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
1764  "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1765  "multivector X be in the domain Map of the matrix.");
1766  TEUCHOS_TEST_FOR_EXCEPTION(
1767  ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
1768  "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1769  "B be in the range Map of the matrix.");
1770  TEUCHOS_TEST_FOR_EXCEPTION(
1771  ! D.getMap ()->isSameAs (*rowMap), std::runtime_error,
1772  "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1773  "D be in the row Map of the matrix.");
1774  TEUCHOS_TEST_FOR_EXCEPTION(
1775  ! rowMap->isSameAs (*rangeMap), std::runtime_error,
1776  "Ifpack2::Relaxation::MTGaussSeidel requires that the row Map and the "
1777  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1778  TEUCHOS_TEST_FOR_EXCEPTION(
1779  ! domainMap->isSameAs (*rangeMap), std::runtime_error,
1780  "Ifpack2::Relaxation::MTGaussSeidel requires that the domain Map and "
1781  "the range Map of the matrix be the same.");
1782  }
1783 #else
1784  // Forestall any compiler warnings for unused variables.
1785  (void) rangeMap;
1786  (void) rowMap;
1787 #endif // HAVE_IFPACK2_DEBUG
1788 
1789  // Fetch a (possibly cached) temporary column Map multivector
1790  // X_colMap, and a domain Map view X_domainMap of it. Both have
1791  // constant stride by construction. We know that the domain Map
1792  // must include the column Map, because our Gauss-Seidel kernel
1793  // requires that the row Map, domain Map, and range Map are all
1794  // the same, and that each process owns all of its own diagonal
1795  // entries of the matrix.
1796 
1797  RCP<MV> X_colMap;
1798  RCP<MV> X_domainMap;
1799  bool copyBackOutput = false;
1800  if (importer.is_null ()) {
1801  if (X.isConstantStride ()) {
1802  X_colMap = rcpFromRef (X);
1803  X_domainMap = rcpFromRef (X);
1804 
1805  // Column Map and domain Map are the same, so there are no
1806  // remote entries. Thus, if we are not setting the initial
1807  // guess to zero, we don't have to worry about setting remote
1808  // entries to zero, even though we are not doing an Import in
1809  // this case.
1810  if (zeroInitialGuess) {
1811  X_colMap->putScalar (ZERO);
1812  }
1813  // No need to copy back to X at end.
1814  }
1815  else {
1816  // We must copy X into a constant stride multivector.
1817  // Just use the cached column Map multivector for that.
1818  // force=true means fill with zeros, so no need to fill
1819  // remote entries (not in domain Map) with zeros.
1820  //X_colMap = crsMat->getColumnMapMultiVector (X, true);
1821  X_colMap = rcp (new MV (colMap, X.getNumVectors ()));
1822  // X_domainMap is always a domain Map view of the column Map
1823  // multivector. In this case, the domain and column Maps are
1824  // the same, so X_domainMap _is_ X_colMap.
1825  X_domainMap = X_colMap;
1826  if (! zeroInitialGuess) { // Don't copy if zero initial guess
1827  try {
1828  deep_copy (*X_domainMap , X); // Copy X into constant stride MV
1829  } catch (std::exception& e) {
1830  std::ostringstream os;
1831  os << "Ifpack2::Relaxation::MTGaussSeidel: "
1832  "deep_copy(*X_domainMap, X) threw an exception: "
1833  << e.what () << ".";
1834  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
1835  }
1836  }
1837  copyBackOutput = true; // Don't forget to copy back at end.
1838  /*
1839  TPETRA_EFFICIENCY_WARNING(
1840  ! X.isConstantStride (),
1841  std::runtime_error,
1842  "MTGaussSeidel: The current implementation of the Gauss-Seidel "
1843  "kernel requires that X and B both have constant stride. Since X "
1844  "does not have constant stride, we had to make a copy. This is a "
1845  "limitation of the current implementation and not your fault, but we "
1846  "still report it as an efficiency warning for your information.");
1847  */
1848  }
1849  }
1850  else { // Column Map and domain Map are _not_ the same.
1851  //X_colMap = crsMat->getColumnMapMultiVector (X);
1852  X_colMap = rcp (new MV (colMap, X.getNumVectors ()));
1853 
1854  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
1855 
1856 #ifdef HAVE_IFPACK2_DEBUG
1857  auto X_colMap_host_view = X_colMap->template getLocalView<Kokkos::HostSpace> ();
1858  auto X_domainMap_host_view = X_domainMap->template getLocalView<Kokkos::HostSpace> ();
1859 
1860  if (X_colMap->getLocalLength () != 0 && X_domainMap->getLocalLength ()) {
1861  TEUCHOS_TEST_FOR_EXCEPTION(
1862  X_colMap_host_view.ptr_on_device () != X_domainMap_host_view.ptr_on_device (),
1863  std::logic_error, "Ifpack2::Relaxation::MTGaussSeidel: "
1864  "Pointer to start of column Map view of X is not equal to pointer to "
1865  "start of (domain Map view of) X. This may mean that "
1866  "Tpetra::MultiVector::offsetViewNonConst is broken. "
1867  "Please report this bug to the Tpetra developers.");
1868  }
1869 
1870  TEUCHOS_TEST_FOR_EXCEPTION(
1871  X_colMap_host_view.dimension_0 () < X_domainMap_host_view.dimension_0 () ||
1872  X_colMap->getLocalLength () < X_domainMap->getLocalLength (),
1873  std::logic_error, "Ifpack2::Relaxation::MTGaussSeidel: "
1874  "X_colMap has fewer local rows than X_domainMap. "
1875  "X_colMap_host_view.dimension_0() = " << X_colMap_host_view.dimension_0 ()
1876  << ", X_domainMap_host_view.dimension_0() = "
1877  << X_domainMap_host_view.dimension_0 ()
1878  << ", X_colMap->getLocalLength() = " << X_colMap->getLocalLength ()
1879  << ", and X_domainMap->getLocalLength() = "
1880  << X_domainMap->getLocalLength ()
1881  << ". This means that Tpetra::MultiVector::offsetViewNonConst "
1882  "is broken. Please report this bug to the Tpetra developers.");
1883 
1884  TEUCHOS_TEST_FOR_EXCEPTION(
1885  X_colMap->getNumVectors () != X_domainMap->getNumVectors (),
1886  std::logic_error, "Ifpack2::Relaxation::MTGaussSeidel: "
1887  "X_colMap has a different number of columns than X_domainMap. "
1888  "X_colMap->getNumVectors() = " << X_colMap->getNumVectors ()
1889  << " != X_domainMap->getNumVectors() = "
1890  << X_domainMap->getNumVectors ()
1891  << ". This means that Tpetra::MultiVector::offsetViewNonConst "
1892  "is broken. Please report this bug to the Tpetra developers.");
1893 #endif // HAVE_IFPACK2_DEBUG
1894 
1895  if (zeroInitialGuess) {
1896  // No need for an Import, since we're filling with zeros.
1897  X_colMap->putScalar (ZERO);
1898  } else {
1899  // We could just copy X into X_domainMap. However, that
1900  // wastes a copy, because the Import also does a copy (plus
1901  // communication). Since the typical use case for
1902  // Gauss-Seidel is a small number of sweeps (2 is typical), we
1903  // don't want to waste that copy. Thus, we do the Import
1904  // here, and skip the first Import in the first sweep.
1905  // Importing directly from X effects the copy into X_domainMap
1906  // (which is a view of X_colMap).
1907  X_colMap->doImport (X, *importer, Tpetra::CombineMode::INSERT);
1908  }
1909  copyBackOutput = true; // Don't forget to copy back at end.
1910  } // if column and domain Maps are (not) the same
1911 
1912  // The Gauss-Seidel / SOR kernel expects multivectors of constant
1913  // stride. X_colMap is by construction, but B might not be. If
1914  // it's not, we have to make a copy.
1915  RCP<const MV> B_in;
1916  if (B.isConstantStride ()) {
1917  B_in = rcpFromRef (B);
1918  }
1919  else {
1920  // Range Map and row Map are the same in this case, so we can
1921  // use the cached row Map multivector to store a constant stride
1922  // copy of B.
1923  //RCP<MV> B_in_nonconst = crsMat->getRowMapMultiVector (B, true);
1924  RCP<MV> B_in_nonconst = rcp (new MV (rowMap, B.getNumVectors()));
1925  try {
1926  deep_copy (*B_in_nonconst, B);
1927  } catch (std::exception& e) {
1928  std::ostringstream os;
1929  os << "Ifpack2::Relaxation::MTGaussSeidel: "
1930  "deep_copy(*B_in_nonconst, B) threw an exception: "
1931  << e.what () << ".";
1932  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
1933  }
1934  B_in = rcp_const_cast<const MV> (B_in_nonconst);
1935 
1936  /*
1937  TPETRA_EFFICIENCY_WARNING(
1938  ! B.isConstantStride (),
1939  std::runtime_error,
1940  "MTGaussSeidel: The current implementation requires that B have "
1941  "constant stride. Since B does not have constant stride, we had to "
1942  "copy it into a separate constant-stride multivector. This is a "
1943  "limitation of the current implementation and not your fault, but we "
1944  "still report it as an efficiency warning for your information.");
1945  */
1946  }
1947 
1948  local_matrix_type kcsr = crsMat->getLocalMatrix ();
1949  const size_t NumVectors = X.getNumVectors ();
1950 
1951  bool update_y_vector = true;
1952  //false as it was done up already, and we dont want to zero it in each sweep.
1953  bool zero_x_vector = false;
1954  for (int sweep = 0; sweep < numSweeps; ++sweep) {
1955  if (! importer.is_null () && sweep > 0) {
1956  // We already did the first Import for the zeroth sweep above,
1957  // if it was necessary.
1958  X_colMap->doImport (*X_domainMap, *importer, Tpetra::CombineMode::INSERT);
1959  }
1960 
1961  for (size_t indVec = 0; indVec < NumVectors; ++indVec) {
1962  if (direction == Tpetra::Symmetric) {
1963  KokkosKernels::Experimental::Graph::symmetric_gauss_seidel_apply
1964  (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
1965  kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
1966  Kokkos::subview(X_colMap->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
1967  Kokkos::subview(B_in->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
1968  zero_x_vector, update_y_vector);
1969  }
1970  else if (direction == Tpetra::Forward) {
1971  KokkosKernels::Experimental::Graph::forward_sweep_gauss_seidel_apply
1972  (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
1973  kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
1974  Kokkos::subview(X_colMap->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec ),
1975  Kokkos::subview(B_in->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
1976  zero_x_vector, update_y_vector);
1977  }
1978  else if (direction == Tpetra::Backward) {
1979  KokkosKernels::Experimental::Graph::backward_sweep_gauss_seidel_apply
1980  (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
1981  kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
1982  Kokkos::subview(X_colMap->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec ),
1983  Kokkos::subview(B_in->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
1984  zero_x_vector, update_y_vector);
1985  }
1986  else {
1987  TEUCHOS_TEST_FOR_EXCEPTION(
1988  true, std::invalid_argument,
1989  prefix << "The 'direction' enum does not have any of its valid "
1990  "values: Forward, Backward, or Symmetric.");
1991  }
1992  }
1993 
1994  if (NumVectors > 1){
1995  update_y_vector = true;
1996  }
1997  else {
1998  update_y_vector = false;
1999  }
2000  }
2001 
2002  if (copyBackOutput) {
2003  try {
2004  deep_copy (X , *X_domainMap); // Copy result back into X.
2005  } catch (std::exception& e) {
2006  TEUCHOS_TEST_FOR_EXCEPTION(
2007  true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
2008  "threw an exception: " << e.what ());
2009  }
2010  }
2011 
2012 #else
2013  TEUCHOS_TEST_FOR_EXCEPTION
2014  (true, std::logic_error, "The multithreaded implementation of Gauss-Seidel "
2015  "is not enabled. Please talk to the Ifpack2 developers about how to "
2016  "enable this method.");
2017 #endif // HAVE_IFPACK2_EXPERIMENTAL_KOKKOSKERNELS_FEATURES
2018 }
2019 
2020 template<class MatrixType>
2021 void
2023 ApplyInverseMTSGS_CrsMatrix (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
2024  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X) const
2025 {
2026  const crs_matrix_type* crsMat = dynamic_cast<const crs_matrix_type*> (&(*A_));
2027  TEUCHOS_TEST_FOR_EXCEPTION
2028  (crsMat == NULL, std::logic_error, "Ifpack2::Relaxation::apply: "
2029  "Multithreaded Gauss-Seidel methods currently only work when the input "
2030  "matrix is a Tpetra::CrsMatrix.");
2031 
2032  using Teuchos::as;
2033  const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
2034 
2035  //Teuchos::ArrayView<local_ordinal_type> rowIndices; // unused, as of 04 Jan 2017
2036  TEUCHOS_TEST_FOR_EXCEPTION
2037  (! localSmoothingIndices_.is_null (), std::logic_error,
2038  "Our implementation of Multithreaded Gauss-Seidel does not implement the "
2039  "use case where the user supplies an iteration order. "
2040  "This error used to appear as \"MT GaussSeidel ignores the given "
2041  "order\". "
2042  "I tried to add more explanation, but I didn't implement \"MT "
2043  "GaussSeidel\" [sic]. "
2044  "You'll have to ask the person who did.");
2045  this->MTGaussSeidel (crsMat, X, B, *Diagonal_, DampingFactor_, direction,
2046  NumSweeps_, ZeroStartingSolution_);
2047 
2048  const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
2049  const double numVectors = as<double> (X.getNumVectors ());
2050  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2051  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2052  ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
2053  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2054 }
2055 
2056 
2057 template<class MatrixType>
2059  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
2060  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X) const {
2061 
2062  const crs_matrix_type* crsMat = dynamic_cast<const crs_matrix_type*> (&(*A_));
2063  TEUCHOS_TEST_FOR_EXCEPTION(
2064  crsMat == NULL, std::runtime_error, "Ifpack2::Relaxation::compute: "
2065  "MT methods works for CRSMatrix Only.");
2066 
2067  using Teuchos::as;
2068  const Tpetra::ESweepDirection direction =
2069  DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
2070 
2071  //Teuchos::ArrayView<local_ordinal_type> rowIndices; // unused, as of 04 Jan 2017
2072  TEUCHOS_TEST_FOR_EXCEPTION
2073  (! localSmoothingIndices_.is_null (), std::logic_error,
2074  "Our implementation of Multithreaded Gauss-Seidel does not implement the "
2075  "use case where the user supplies an iteration order. "
2076  "This error used to appear as \"MT GaussSeidel ignores the given "
2077  "order\". "
2078  "I tried to add more explanation, but I didn't implement \"MT "
2079  "GaussSeidel\" [sic]. "
2080  "You'll have to ask the person who did.");
2081  this->MTGaussSeidel (crsMat, X, B, *Diagonal_, DampingFactor_, direction,
2082  NumSweeps_, ZeroStartingSolution_);
2083 
2084  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2085  const double numVectors = as<double> (X.getNumVectors ());
2086  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2087  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2088  ApplyFlops_ += NumSweeps_ * numVectors *
2089  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2090 }
2091 
2092 template<class MatrixType>
2093 void
2095 ApplyInverseSGS (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2096  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
2097 {
2098  typedef Relaxation<MatrixType> this_type;
2099  // The CrsMatrix version is faster, because it can access the sparse
2100  // matrix data directly, rather than by copying out each row's data
2101  // in turn. Thus, we check whether the RowMatrix is really a
2102  // CrsMatrix.
2103  //
2104  // FIXME (mfh 07 Jul 2013) See note on crs_matrix_type typedef
2105  // declaration in Ifpack2_Relaxation_decl.hpp header file. The code
2106  // will still be correct if the cast fails, but it will use an
2107  // unoptimized kernel.
2108  const block_crs_matrix_type* blockCrsMat = dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr());
2109  const crs_matrix_type* crsMat = dynamic_cast<const crs_matrix_type*> (&(*A_));
2110  if (blockCrsMat != NULL) {
2111  const_cast<this_type*> (this)->ApplyInverseSGS_BlockCrsMatrix(*blockCrsMat, X, Y);
2112  }
2113  else if (crsMat != NULL) {
2114  ApplyInverseSGS_CrsMatrix (*crsMat, X, Y);
2115  } else {
2116  ApplyInverseSGS_RowMatrix (X, Y);
2117  }
2118 }
2119 
2120 
2121 template<class MatrixType>
2122 void
2124 ApplyInverseSGS_RowMatrix (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2125  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
2126 {
2127  using Teuchos::Array;
2128  using Teuchos::ArrayRCP;
2129  using Teuchos::ArrayView;
2130  using Teuchos::as;
2131  using Teuchos::RCP;
2132  using Teuchos::rcp;
2133  using Teuchos::rcpFromRef;
2134  typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
2135 
2136 
2137  // Tpetra's GS implementation for CrsMatrix handles zeroing out the
2138  // starting multivector itself. The generic RowMatrix version here
2139  // does not, so we have to zero out Y here.
2140  if (ZeroStartingSolution_) {
2141  Y.putScalar (STS::zero ());
2142  }
2143 
2144  const size_t NumVectors = X.getNumVectors ();
2145  const size_t maxLength = A_->getNodeMaxNumRowEntries ();
2146  Array<local_ordinal_type> Indices (maxLength);
2147  Array<scalar_type> Values (maxLength);
2148 
2149  // Local smoothing stuff
2150  const size_t numMyRows = A_->getNodeNumRows();
2151  const local_ordinal_type * rowInd = 0;
2152  size_t numActive = numMyRows;
2153  bool do_local = !localSmoothingIndices_.is_null();
2154  if(do_local) {
2155  rowInd = localSmoothingIndices_.getRawPtr();
2156  numActive = localSmoothingIndices_.size();
2157  }
2158 
2159 
2160  RCP<MV> Y2;
2161  if (IsParallel_) {
2162  if (Importer_.is_null ()) { // domain and column Maps are the same.
2163  // We will copy Y into Y2 below, so no need to fill with zeros here.
2164  Y2 = rcp (new MV (Y.getMap (), NumVectors, false));
2165  } else {
2166  // FIXME (mfh 21 Mar 2013) We probably don't need to fill with
2167  // zeros here, since we are doing an Import into Y2 below
2168  // anyway. However, it doesn't hurt correctness.
2169  Y2 = rcp (new MV (Importer_->getTargetMap (), NumVectors));
2170  }
2171  }
2172  else {
2173  Y2 = rcpFromRef (Y);
2174  }
2175 
2176  // Diagonal
2177  ArrayRCP<const scalar_type> d_rcp = Diagonal_->get1dView();
2178  ArrayView<const scalar_type> d_ptr = d_rcp();
2179 
2180  // Constant stride check
2181  bool constant_stride = X.isConstantStride() && Y2->isConstantStride();
2182 
2183  if(constant_stride) {
2184  // extract 1D RCPs
2185  size_t x_stride = X.getStride();
2186  size_t y2_stride = Y2->getStride();
2187  ArrayRCP<scalar_type> y2_rcp = Y2->get1dViewNonConst();
2188  ArrayRCP<const scalar_type> x_rcp = X.get1dView();
2189  ArrayView<scalar_type> y2_ptr = y2_rcp();
2190  ArrayView<const scalar_type> x_ptr = x_rcp();
2191  Array<scalar_type> dtemp(NumVectors,STS::zero());
2192 
2193  for (int j = 0; j < NumSweeps_; j++) {
2194  // data exchange is here, once per sweep
2195  if (IsParallel_) {
2196  if (Importer_.is_null ()) {
2197  // just copy, since domain and column Maps are the same
2198  Tpetra::deep_copy (*Y2, Y);
2199  } else {
2200  Y2->doImport (Y, *Importer_, Tpetra::INSERT);
2201  }
2202  }
2203  for (size_t ii = 0; ii < numActive; ++ii) {
2204  local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2205  size_t NumEntries;
2206  A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
2207  dtemp.assign(NumVectors,STS::zero());
2208 
2209  for (size_t k = 0; k < NumEntries; ++k) {
2210  const local_ordinal_type col = Indices[k];
2211  for (size_t m = 0; m < NumVectors; ++m) {
2212  dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
2213  }
2214  }
2215 
2216  for (size_t m = 0; m < NumVectors; ++m) {
2217  y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
2218  }
2219  }
2220 
2221  // ptrdiff_t is the same size as size_t, but is signed. Being
2222  // signed is important so that i >= 0 is not trivially true.
2223  for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
2224  local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2225  size_t NumEntries;
2226  A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
2227  dtemp.assign(NumVectors,STS::zero());
2228 
2229  for (size_t k = 0; k < NumEntries; ++k) {
2230  const local_ordinal_type col = Indices[k];
2231  for (size_t m = 0; m < NumVectors; ++m) {
2232  dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
2233  }
2234  }
2235 
2236  for (size_t m = 0; m < NumVectors; ++m) {
2237  y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
2238  }
2239  }
2240 
2241  // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
2242  if (IsParallel_) {
2243  Tpetra::deep_copy (Y, *Y2);
2244  }
2245  }
2246  }
2247  else {
2248  // extract 2D RCPs
2249  ArrayRCP<ArrayRCP<scalar_type> > y2_ptr = Y2->get2dViewNonConst ();
2250  ArrayRCP<ArrayRCP<const scalar_type> > x_ptr = X.get2dView ();
2251 
2252  for (int iter = 0; iter < NumSweeps_; ++iter) {
2253  // only one data exchange per sweep
2254  if (IsParallel_) {
2255  if (Importer_.is_null ()) {
2256  // just copy, since domain and column Maps are the same
2257  Tpetra::deep_copy (*Y2, Y);
2258  } else {
2259  Y2->doImport (Y, *Importer_, Tpetra::INSERT);
2260  }
2261  }
2262 
2263  for (size_t ii = 0; ii < numActive; ++ii) {
2264  local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2265  const scalar_type diag = d_ptr[i];
2266  size_t NumEntries;
2267  A_->getLocalRowCopy (as<local_ordinal_type> (i), Indices (), Values (), NumEntries);
2268 
2269  for (size_t m = 0; m < NumVectors; ++m) {
2270  scalar_type dtemp = STS::zero ();
2271  ArrayView<const scalar_type> x_local = (x_ptr())[m]();
2272  ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
2273 
2274  for (size_t k = 0; k < NumEntries; ++k) {
2275  const local_ordinal_type col = Indices[k];
2276  dtemp += Values[k] * y2_local[col];
2277  }
2278  y2_local[i] += DampingFactor_ * (x_local[i] - dtemp) * diag;
2279  }
2280  }
2281 
2282  // ptrdiff_t is the same size as size_t, but is signed. Being
2283  // signed is important so that i >= 0 is not trivially true.
2284  for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
2285  local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2286  const scalar_type diag = d_ptr[i];
2287  size_t NumEntries;
2288  A_->getLocalRowCopy (as<local_ordinal_type> (i), Indices (), Values (), NumEntries);
2289 
2290  for (size_t m = 0; m < NumVectors; ++m) {
2291  scalar_type dtemp = STS::zero ();
2292  ArrayView<const scalar_type> x_local = (x_ptr())[m]();
2293  ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
2294 
2295  for (size_t k = 0; k < NumEntries; ++k) {
2296  const local_ordinal_type col = Indices[k];
2297  dtemp += Values[k] * y2_local[col];
2298  }
2299  y2_local[i] += DampingFactor_ * (x_local[i] - dtemp) * diag;
2300  }
2301  }
2302 
2303  // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
2304  if (IsParallel_) {
2305  Tpetra::deep_copy (Y, *Y2);
2306  }
2307  }
2308  }
2309 
2310  // See flop count discussion in implementation of ApplyInverseSGS_CrsMatrix().
2311  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2312  const double numVectors = as<double> (X.getNumVectors ());
2313  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2314  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2315  ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
2316  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2317 }
2318 
2319 
2320 template<class MatrixType>
2321 void
2323 ApplyInverseSGS_CrsMatrix (const crs_matrix_type& A,
2324  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2325  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
2326 {
2327  using Teuchos::as;
2328  const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
2329  if (localSmoothingIndices_.is_null ()) {
2330  A.gaussSeidelCopy (Y, X, *Diagonal_, DampingFactor_, direction,
2331  NumSweeps_, ZeroStartingSolution_);
2332  }
2333  else {
2334  A.reorderedGaussSeidelCopy (Y, X, *Diagonal_, localSmoothingIndices_ (),
2335  DampingFactor_, direction,
2336  NumSweeps_, ZeroStartingSolution_);
2337  }
2338 
2339  // For each column of output, for each sweep over the matrix:
2340  //
2341  // - One + and one * for each matrix entry
2342  // - One / and one + for each row of the matrix
2343  // - If the damping factor is not one: one * for each row of the
2344  // matrix. (It's not fair to count this if the damping factor is
2345  // one, since the implementation could skip it. Whether it does
2346  // or not is the implementation's choice.)
2347  //
2348  // Each sweep of symmetric Gauss-Seidel / SOR counts as two sweeps,
2349  // one forward and one backward.
2350 
2351  // Floating-point operations due to the damping factor, per matrix
2352  // row, per direction, per columm of output.
2353  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2354  const double numVectors = as<double> (X.getNumVectors ());
2355  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2356  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2357  ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
2358  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2359 }
2360 
2361 template<class MatrixType>
2362 void
2364 ApplyInverseSGS_BlockCrsMatrix (const block_crs_matrix_type& A,
2365  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2366  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
2367 {
2368  using Teuchos::RCP;
2369  using Teuchos::rcp;
2370  using Teuchos::rcpFromRef;
2371  typedef Tpetra::Experimental::BlockMultiVector<scalar_type,
2373  typedef Tpetra::MultiVector<scalar_type,
2375 
2376  //FIXME: (tcf) 8/21/2014 -- may be problematic for multiple right hand sides
2377  //
2378  // NOTE (mfh 12 Sep 2014) I don't think it should be a problem for
2379  // multiple right-hand sides, unless the input or output MultiVector
2380  // does not have constant stride. We should check for that case
2381  // here, in case it doesn't work in localGaussSeidel (which is
2382  // entirely possible).
2383  BMV yBlock (Y, * (A.getGraph ()->getDomainMap ()), A.getBlockSize ());
2384  const BMV xBlock (X, * (A.getColMap ()), A.getBlockSize ());
2385 
2386  bool performImport = false;
2387  RCP<BMV> yBlockCol;
2388  if (Importer_.is_null ()) {
2389  yBlockCol = Teuchos::rcpFromRef (yBlock);
2390  }
2391  else {
2392  if (yBlockColumnPointMap_.is_null () ||
2393  yBlockColumnPointMap_->getNumVectors () != yBlock.getNumVectors () ||
2394  yBlockColumnPointMap_->getBlockSize () != yBlock.getBlockSize ()) {
2395  yBlockColumnPointMap_ =
2396  rcp (new BMV (* (A.getColMap ()), A.getBlockSize (),
2397  static_cast<local_ordinal_type> (yBlock.getNumVectors ())));
2398  }
2399  yBlockCol = yBlockColumnPointMap_;
2400  performImport = true;
2401  }
2402 
2403  if (ZeroStartingSolution_) {
2404  yBlockCol->putScalar (STS::zero ());
2405  }
2406  else if (performImport) {
2407  yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
2408  }
2409 
2410  // FIXME (mfh 12 Sep 2014) Shouldn't this come from the user's parameter?
2411  const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
2412 
2413  for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
2414  if (performImport && sweep > 0) {
2415  yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
2416  }
2417  A.localGaussSeidel (xBlock, *yBlockCol, blockDiag_,
2418  DampingFactor_, direction);
2419  if (performImport) {
2420  RCP<const MV> yBlockColPointDomain =
2421  yBlockCol->getMultiVectorView ().offsetView (A.getDomainMap (), 0);
2422  MV yBlockView = yBlock.getMultiVectorView ();
2423  Tpetra::deep_copy (yBlockView, *yBlockColPointDomain);
2424  }
2425  }
2426 }
2427 
2428 
2429 template<class MatrixType>
2431 {
2432  std::ostringstream os;
2433 
2434  // Output is a valid YAML dictionary in flow style. If you don't
2435  // like everything on a single line, you should call describe()
2436  // instead.
2437  os << "\"Ifpack2::Relaxation\": {";
2438 
2439  os << "Initialized: " << (isInitialized () ? "true" : "false") << ", "
2440  << "Computed: " << (isComputed () ? "true" : "false") << ", ";
2441 
2442  // It's useful to print this instance's relaxation method (Jacobi,
2443  // Gauss-Seidel, or symmetric Gauss-Seidel). If you want more info
2444  // than that, call describe() instead.
2445  os << "Type: ";
2446  if (PrecType_ == Ifpack2::Details::JACOBI) {
2447  os << "Jacobi";
2448  } else if (PrecType_ == Ifpack2::Details::GS) {
2449  os << "Gauss-Seidel";
2450  } else if (PrecType_ == Ifpack2::Details::SGS) {
2451  os << "Symmetric Gauss-Seidel";
2452  } else if (PrecType_ == Ifpack2::Details::MTGS) {
2453  os << "MT Gauss-Seidel";
2454  } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2455  os << "MT Symmetric Gauss-Seidel";
2456  }
2457  else {
2458  os << "INVALID";
2459  }
2460 
2461  os << ", " << "sweeps: " << NumSweeps_ << ", "
2462  << "damping factor: " << DampingFactor_ << ", ";
2463  if (DoL1Method_) {
2464  os << "use l1: " << DoL1Method_ << ", "
2465  << "l1 eta: " << L1Eta_ << ", ";
2466  }
2467 
2468  if (A_.is_null ()) {
2469  os << "Matrix: null";
2470  }
2471  else {
2472  os << "Global matrix dimensions: ["
2473  << A_->getGlobalNumRows () << ", " << A_->getGlobalNumCols () << "]"
2474  << ", Global nnz: " << A_->getGlobalNumEntries();
2475  }
2476 
2477  os << "}";
2478  return os.str ();
2479 }
2480 
2481 
2482 template<class MatrixType>
2483 void
2485 describe (Teuchos::FancyOStream &out,
2486  const Teuchos::EVerbosityLevel verbLevel) const
2487 {
2488  using Teuchos::OSTab;
2489  using Teuchos::TypeNameTraits;
2490  using Teuchos::VERB_DEFAULT;
2491  using Teuchos::VERB_NONE;
2492  using Teuchos::VERB_LOW;
2493  using Teuchos::VERB_MEDIUM;
2494  using Teuchos::VERB_HIGH;
2495  using Teuchos::VERB_EXTREME;
2496  using std::endl;
2497 
2498  const Teuchos::EVerbosityLevel vl =
2499  (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
2500 
2501  const int myRank = this->getComm ()->getRank ();
2502 
2503  // none: print nothing
2504  // low: print O(1) info from Proc 0
2505  // medium:
2506  // high:
2507  // extreme:
2508 
2509  if (vl != VERB_NONE && myRank == 0) {
2510  // Describable interface asks each implementation to start with a tab.
2511  OSTab tab1 (out);
2512  // Output is valid YAML; hence the quotes, to protect the colons.
2513  out << "\"Ifpack2::Relaxation\":" << endl;
2514  OSTab tab2 (out);
2515  out << "MatrixType: \"" << TypeNameTraits<MatrixType>::name () << "\""
2516  << endl;
2517  if (this->getObjectLabel () != "") {
2518  out << "Label: " << this->getObjectLabel () << endl;
2519  }
2520  out << "Initialized: " << (isInitialized () ? "true" : "false") << endl
2521  << "Computed: " << (isComputed () ? "true" : "false") << endl
2522  << "Parameters: " << endl;
2523  {
2524  OSTab tab3 (out);
2525  out << "\"relaxation: type\": ";
2526  if (PrecType_ == Ifpack2::Details::JACOBI) {
2527  out << "Jacobi";
2528  } else if (PrecType_ == Ifpack2::Details::GS) {
2529  out << "Gauss-Seidel";
2530  } else if (PrecType_ == Ifpack2::Details::SGS) {
2531  out << "Symmetric Gauss-Seidel";
2532  } else if (PrecType_ == Ifpack2::Details::MTGS) {
2533  out << "MT Gauss-Seidel";
2534  } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2535  out << "MT Symmetric Gauss-Seidel";
2536  } else {
2537  out << "INVALID";
2538  }
2539  // We quote these parameter names because they contain colons.
2540  // YAML uses the colon to distinguish key from value.
2541  out << endl
2542  << "\"relaxation: sweeps\": " << NumSweeps_ << endl
2543  << "\"relaxation: damping factor\": " << DampingFactor_ << endl
2544  << "\"relaxation: min diagonal value\": " << MinDiagonalValue_ << endl
2545  << "\"relaxation: zero starting solution\": " << ZeroStartingSolution_ << endl
2546  << "\"relaxation: backward mode\": " << DoBackwardGS_ << endl
2547  << "\"relaxation: use l1\": " << DoL1Method_ << endl
2548  << "\"relaxation: l1 eta\": " << L1Eta_ << endl;
2549  }
2550  out << "Computed quantities:" << endl;
2551  {
2552  OSTab tab3 (out);
2553  out << "Global number of rows: " << A_->getGlobalNumRows () << endl
2554  << "Global number of columns: " << A_->getGlobalNumCols () << endl;
2555  }
2556  if (checkDiagEntries_ && isComputed ()) {
2557  out << "Properties of input diagonal entries:" << endl;
2558  {
2559  OSTab tab3 (out);
2560  out << "Magnitude of minimum-magnitude diagonal entry: "
2561  << globalMinMagDiagEntryMag_ << endl
2562  << "Magnitude of maximum-magnitude diagonal entry: "
2563  << globalMaxMagDiagEntryMag_ << endl
2564  << "Number of diagonal entries with small magnitude: "
2565  << globalNumSmallDiagEntries_ << endl
2566  << "Number of zero diagonal entries: "
2567  << globalNumZeroDiagEntries_ << endl
2568  << "Number of diagonal entries with negative real part: "
2569  << globalNumNegDiagEntries_ << endl
2570  << "Abs 2-norm diff between computed and actual inverse "
2571  << "diagonal: " << globalDiagNormDiff_ << endl;
2572  }
2573  }
2574  if (isComputed ()) {
2575  out << "Saved diagonal offsets: "
2576  << (savedDiagOffsets_ ? "true" : "false") << endl;
2577  }
2578  out << "Call counts and total times (in seconds): " << endl;
2579  {
2580  OSTab tab3 (out);
2581  out << "initialize: " << endl;
2582  {
2583  OSTab tab4 (out);
2584  out << "Call count: " << NumInitialize_ << endl;
2585  out << "Total time: " << InitializeTime_ << endl;
2586  }
2587  out << "compute: " << endl;
2588  {
2589  OSTab tab4 (out);
2590  out << "Call count: " << NumCompute_ << endl;
2591  out << "Total time: " << ComputeTime_ << endl;
2592  }
2593  out << "apply: " << endl;
2594  {
2595  OSTab tab4 (out);
2596  out << "Call count: " << NumApply_ << endl;
2597  out << "Total time: " << ApplyTime_ << endl;
2598  }
2599  }
2600  }
2601 }
2602 
2603 } // namespace Ifpack2
2604 
2605 #define IFPACK2_RELAXATION_INSTANT(S,LO,GO,N) \
2606  template class Ifpack2::Relaxation< Tpetra::RowMatrix<S, LO, GO, N> >;
2607 
2608 #endif // IFPACK2_RELAXATION_DEF_HPP
std::string description() const
A simple one-line description of this object.
Definition: Ifpack2_Relaxation_def.hpp:2430
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition: Ifpack2_Relaxation_def.hpp:377
int getNumInitialize() const
Total number of calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:393
double getApplyTime() const
Total time in seconds spent in all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:423
void compute()
Compute the preconditioner.
Definition: Ifpack2_Relaxation_def.hpp:845
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:455
bool hasTransposeApply() const
Whether apply() and applyMat() let you apply the transpose or conjugate transpose.
Definition: Ifpack2_Relaxation_def.hpp:387
int getNumApply() const
Total number of calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:405
bool isInitialized() const
Returns true if the preconditioner has been successfully initialized.
Definition: Ifpack2_Relaxation_decl.hpp:401
double getInitializeTime() const
Total time in seconds spent in all calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:411
Ifpack2 implementation details.
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition: Ifpack2_Relaxation_def.hpp:364
Teuchos::RCP< const row_matrix_type > getMatrix() const
The matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:355
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:250
void setParameters(const Teuchos::ParameterList &params)
Set the relaxation / preconditioner parameters.
Definition: Ifpack2_Relaxation_def.hpp:334
File for utility functions.
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition: Ifpack2_Relaxation_def.hpp:442
double getApplyFlops() const
Total number of floating-point operations over all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:435
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
Return a list of all the parameters that this class accepts.
Definition: Ifpack2_Relaxation_def.hpp:220
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:247
double getComputeFlops() const
Total number of floating-point operations over all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:429
bool isComputed() const
Return true if compute() has been called.
Definition: Ifpack2_Relaxation_decl.hpp:416
double getComputeTime() const
Total time in seconds spent in all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:417
Definition: Ifpack2_Container.hpp:761
int getNumCompute() const
Total number of calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:399
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:244
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix and vectors are distributed.
Definition: Ifpack2_Relaxation_def.hpp:344
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:241
virtual ~Relaxation()
Destructor.
Definition: Ifpack2_Relaxation_def.hpp:215
void applyMat(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS) const
Apply the preconditioner to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:553
Relaxation preconditioners for Tpetra::RowMatrix and Tpetra::CrsMatrix sparse matrices.
Definition: Ifpack2_Relaxation_decl.hpp:226
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object&#39;s attributes to the given output stream.
Definition: Ifpack2_Relaxation_def.hpp:2485
Preconditioners and smoothers for Tpetra sparse matrices.
Definition: Ifpack2_AdditiveSchwarz_decl.hpp:72
void initialize()
Initialize the preconditioner.
Definition: Ifpack2_Relaxation_def.hpp:574
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_Relaxation_decl.hpp:253