Ifpack2 Templated Preconditioning Package  Version 1.0
Ifpack2_ILUT_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Tempated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_ILUT_DEF_HPP
44 #define IFPACK2_ILUT_DEF_HPP
45 
46 // disable clang warnings
47 #if defined (__clang__) && !defined (__INTEL_COMPILER)
48 #pragma clang system_header
49 #endif
50 
51 #include "Ifpack2_Heap.hpp"
52 #include "Ifpack2_LocalFilter.hpp"
53 #include "Ifpack2_LocalSparseTriangularSolver_decl.hpp"
54 #include "Ifpack2_Parameters.hpp"
55 #include "Tpetra_CrsMatrix.hpp"
56 #include "Teuchos_Time.hpp"
57 #include "Teuchos_TypeNameTraits.hpp"
58 
59 namespace Ifpack2 {
60 
61  namespace {
62 
87  template<class ScalarType>
88  inline typename Teuchos::ScalarTraits<ScalarType>::magnitudeType
89  ilutDefaultDropTolerance () {
90  using Teuchos::as;
91  typedef Teuchos::ScalarTraits<ScalarType> STS;
92  typedef typename STS::magnitudeType magnitude_type;
93  typedef Teuchos::ScalarTraits<magnitude_type> STM;
94 
95  // 1/2. Hopefully this can be represented in magnitude_type.
96  const magnitude_type oneHalf = STM::one() / (STM::one() + STM::one());
97 
98  // The min ensures that in case magnitude_type has very low
99  // precision, we'll at least get some value strictly less than
100  // one.
101  return std::min (as<magnitude_type> (1000) * STS::magnitude (STS::eps ()), oneHalf);
102  }
103 
104  // Full specialization for ScalarType = double.
105  // This specialization preserves ILUT's previous default behavior.
106  template<>
107  inline Teuchos::ScalarTraits<double>::magnitudeType
108  ilutDefaultDropTolerance<double> () {
109  return 1e-12;
110  }
111 
112  } // namespace (anonymous)
113 
114 
115 template <class MatrixType>
116 ILUT<MatrixType>::ILUT (const Teuchos::RCP<const row_matrix_type>& A) :
117  A_ (A),
118  Athresh_ (Teuchos::ScalarTraits<magnitude_type>::zero ()),
119  Rthresh_ (Teuchos::ScalarTraits<magnitude_type>::one ()),
120  RelaxValue_ (Teuchos::ScalarTraits<magnitude_type>::zero ()),
121  LevelOfFill_ (1),
122  DropTolerance_ (ilutDefaultDropTolerance<scalar_type> ()),
123  InitializeTime_ (0.0),
124  ComputeTime_ (0.0),
125  ApplyTime_ (0.0),
126  NumInitialize_ (0),
127  NumCompute_ (0),
128  NumApply_ (0),
129  IsInitialized_ (false),
130  IsComputed_ (false)
131 {}
132 
133 template <class MatrixType>
135 {}
136 
137 template <class MatrixType>
138 void ILUT<MatrixType>::setParameters (const Teuchos::ParameterList& params)
139 {
140  using Teuchos::as;
141  using Teuchos::Exceptions::InvalidParameterName;
142  using Teuchos::Exceptions::InvalidParameterType;
143 
144  // Default values of the various parameters.
145  int fillLevel = 1;
146  magnitude_type absThresh = STM::zero ();
147  magnitude_type relThresh = STM::one ();
148  magnitude_type relaxValue = STM::zero ();
149  magnitude_type dropTol = ilutDefaultDropTolerance<scalar_type> ();
150 
151  bool gotFillLevel = false;
152  try {
153  // Try getting the fill level as an int.
154  fillLevel = params.get<int> ("fact: ilut level-of-fill");
155  gotFillLevel = true;
156  }
157  catch (InvalidParameterName&) {
158  // We didn't really get it, but this tells us to stop looking.
159  gotFillLevel = true;
160  }
161  catch (InvalidParameterType&) {
162  // The name is right, but the type is wrong; try different types.
163  // We don't have to check InvalidParameterName again, since we
164  // checked it above, and it has nothing to do with the type.
165  }
166 
167  if (! gotFillLevel) {
168  // Try magnitude_type, for backwards compatibility.
169  try {
170  fillLevel = as<int> (params.get<magnitude_type> ("fact: ilut level-of-fill"));
171  }
172  catch (InvalidParameterType&) {}
173  }
174  if (! gotFillLevel) {
175  // Try double, for backwards compatibility.
176  try {
177  fillLevel = as<int> (params.get<double> ("fact: ilut level-of-fill"));
178  }
179  catch (InvalidParameterType&) {}
180  }
181  // If none of the above attempts succeed, accept the default value.
182 
183  TEUCHOS_TEST_FOR_EXCEPTION(
184  fillLevel <= 0, std::runtime_error,
185  "Ifpack2::ILUT: The \"fact: ilut level-of-fill\" parameter must be "
186  "strictly greater than zero, but you specified a value of " << fillLevel
187  << ". Remember that for ILUT, the fill level p means something different "
188  "than it does for ILU(k). ILU(0) produces factors with the same sparsity "
189  "structure as the input matrix A; ILUT with p = 0 always produces a "
190  "diagonal matrix, and is thus probably not what you want.");
191 
192  try {
193  absThresh = params.get<magnitude_type> ("fact: absolute threshold");
194  }
195  catch (InvalidParameterType&) {
196  // Try double, for backwards compatibility.
197  // The cast from double to magnitude_type must succeed.
198  absThresh = as<magnitude_type> (params.get<double> ("fact: absolute threshold"));
199  }
200  catch (InvalidParameterName&) {
201  // Accept the default value.
202  }
203 
204  try {
205  relThresh = params.get<magnitude_type> ("fact: relative threshold");
206  }
207  catch (InvalidParameterType&) {
208  // Try double, for backwards compatibility.
209  // The cast from double to magnitude_type must succeed.
210  relThresh = as<magnitude_type> (params.get<double> ("fact: relative threshold"));
211  }
212  catch (InvalidParameterName&) {
213  // Accept the default value.
214  }
215 
216  try {
217  relaxValue = params.get<magnitude_type> ("fact: relax value");
218  }
219  catch (InvalidParameterType&) {
220  // Try double, for backwards compatibility.
221  // The cast from double to magnitude_type must succeed.
222  relaxValue = as<magnitude_type> (params.get<double> ("fact: relax value"));
223  }
224  catch (InvalidParameterName&) {
225  // Accept the default value.
226  }
227 
228  try {
229  dropTol = params.get<magnitude_type> ("fact: drop tolerance");
230  }
231  catch (InvalidParameterType&) {
232  // Try double, for backwards compatibility.
233  // The cast from double to magnitude_type must succeed.
234  dropTol = as<magnitude_type> (params.get<double> ("fact: drop tolerance"));
235  }
236  catch (InvalidParameterName&) {
237  // Accept the default value.
238  }
239 
240  // "Commit" the values only after validating all of them. This
241  // ensures that there are no side effects if this routine throws an
242  // exception.
243 
244  // mfh 28 Nov 2012: The previous code would not assign Athresh_,
245  // Rthresh_, RelaxValue_, or DropTolerance_ if the read-in value was
246  // -1. I don't know if keeping this behavior is correct, but I'll
247  // keep it just so as not to change previous behavior.
248 
249  LevelOfFill_ = fillLevel;
250  if (absThresh != -STM::one ()) {
251  Athresh_ = absThresh;
252  }
253  if (relThresh != -STM::one ()) {
254  Rthresh_ = relThresh;
255  }
256  if (relaxValue != -STM::one ()) {
257  RelaxValue_ = relaxValue;
258  }
259  if (dropTol != -STM::one ()) {
260  DropTolerance_ = dropTol;
261  }
262 }
263 
264 
265 template <class MatrixType>
266 Teuchos::RCP<const Teuchos::Comm<int> >
268  TEUCHOS_TEST_FOR_EXCEPTION(
269  A_.is_null (), std::runtime_error, "Ifpack2::ILUT::getComm: "
270  "The matrix is null. Please call setMatrix() with a nonnull input "
271  "before calling this method.");
272  return A_->getComm ();
273 }
274 
275 
276 template <class MatrixType>
277 Teuchos::RCP<const typename ILUT<MatrixType>::row_matrix_type>
279  return A_;
280 }
281 
282 
283 template <class MatrixType>
284 Teuchos::RCP<const typename ILUT<MatrixType>::map_type>
286 {
287  TEUCHOS_TEST_FOR_EXCEPTION(
288  A_.is_null (), std::runtime_error, "Ifpack2::ILUT::getDomainMap: "
289  "The matrix is null. Please call setMatrix() with a nonnull input "
290  "before calling this method.");
291  return A_->getDomainMap ();
292 }
293 
294 
295 template <class MatrixType>
296 Teuchos::RCP<const typename ILUT<MatrixType>::map_type>
298 {
299  TEUCHOS_TEST_FOR_EXCEPTION(
300  A_.is_null (), std::runtime_error, "Ifpack2::ILUT::getRangeMap: "
301  "The matrix is null. Please call setMatrix() with a nonnull input "
302  "before calling this method.");
303  return A_->getRangeMap ();
304 }
305 
306 
307 template <class MatrixType>
309  return true;
310 }
311 
312 
313 template <class MatrixType>
315  return NumInitialize_;
316 }
317 
318 
319 template <class MatrixType>
321  return NumCompute_;
322 }
323 
324 
325 template <class MatrixType>
327  return NumApply_;
328 }
329 
330 
331 template <class MatrixType>
333  return InitializeTime_;
334 }
335 
336 
337 template<class MatrixType>
339  return ComputeTime_;
340 }
341 
342 
343 template<class MatrixType>
345  return ApplyTime_;
346 }
347 
348 
349 template<class MatrixType>
351  TEUCHOS_TEST_FOR_EXCEPTION(
352  A_.is_null (), std::runtime_error, "Ifpack2::ILUT::getNodeSmootherComplexity: "
353  "The input matrix A is null. Please call setMatrix() with a nonnull "
354  "input matrix, then call compute(), before calling this method.");
355  // ILUT methods cost roughly one apply + the nnz in the upper+lower triangles
356  return A_->getNodeNumEntries() + getNodeNumEntries();
357 }
358 
359 
360 template<class MatrixType>
362  return L_->getGlobalNumEntries () + U_->getGlobalNumEntries ();
363 }
364 
365 
366 template<class MatrixType>
368  return L_->getNodeNumEntries () + U_->getNodeNumEntries ();
369 }
370 
371 
372 template<class MatrixType>
373 void ILUT<MatrixType>::setMatrix (const Teuchos::RCP<const row_matrix_type>& A)
374 {
375  if (A.getRawPtr () != A_.getRawPtr ()) {
376  // Check in serial or one-process mode if the matrix is square.
377  TEUCHOS_TEST_FOR_EXCEPTION(
378  ! A.is_null () && A->getComm ()->getSize () == 1 &&
379  A->getNodeNumRows () != A->getNodeNumCols (),
380  std::runtime_error, "Ifpack2::ILUT::setMatrix: If A's communicator only "
381  "contains one process, then A must be square. Instead, you provided a "
382  "matrix A with " << A->getNodeNumRows () << " rows and "
383  << A->getNodeNumCols () << " columns.");
384 
385  // It's legal for A to be null; in that case, you may not call
386  // initialize() until calling setMatrix() with a nonnull input.
387  // Regardless, setting the matrix invalidates any previous
388  // factorization.
389  IsInitialized_ = false;
390  IsComputed_ = false;
391  A_local_ = Teuchos::null;
392 
393  // The sparse triangular solvers get a triangular factor as their
394  // input matrix. The triangular factors L_ and U_ are getting
395  // reset, so we reset the solvers' matrices to null. Do that
396  // before setting L_ and U_ to null, so that latter step actually
397  // frees the factors.
398  if (! L_solver_.is_null ()) {
399  L_solver_->setMatrix (Teuchos::null);
400  }
401  if (! U_solver_.is_null ()) {
402  U_solver_->setMatrix (Teuchos::null);
403  }
404 
405  L_ = Teuchos::null;
406  U_ = Teuchos::null;
407  A_ = A;
408  }
409 }
410 
411 
412 template<class MatrixType>
414 {
415  Teuchos::Time timer ("ILUT::initialize");
416  {
417  Teuchos::TimeMonitor timeMon (timer);
418 
419  // Check that the matrix is nonnull.
420  TEUCHOS_TEST_FOR_EXCEPTION(
421  A_.is_null (), std::runtime_error, "Ifpack2::ILUT::initialize: "
422  "The matrix to precondition is null. Please call setMatrix() with a "
423  "nonnull input before calling this method.");
424 
425  // Clear any previous computations.
426  IsInitialized_ = false;
427  IsComputed_ = false;
428  A_local_ = Teuchos::null;
429  L_ = Teuchos::null;
430  U_ = Teuchos::null;
431 
432  A_local_ = makeLocalFilter (A_); // Compute the local filter.
433 
434  IsInitialized_ = true;
435  ++NumInitialize_;
436  }
437  InitializeTime_ += timer.totalElapsedTime ();
438 }
439 
440 
441 template<typename ScalarType>
442 typename Teuchos::ScalarTraits<ScalarType>::magnitudeType
443 scalar_mag (const ScalarType& s)
444 {
445  return Teuchos::ScalarTraits<ScalarType>::magnitude(s);
446 }
447 
448 
449 template<class MatrixType>
451 {
452  using Teuchos::Array;
453  using Teuchos::ArrayRCP;
454  using Teuchos::ArrayView;
455  using Teuchos::as;
456  using Teuchos::rcp;
457  using Teuchos::reduceAll;
458 
459  //--------------------------------------------------------------------------
460  // Ifpack2::ILUT is a translation of the Aztec ILUT implementation. The Aztec
461  // ILUT implementation was written by Ray Tuminaro.
462  //
463  // This isn't an exact translation of the Aztec ILUT algorithm, for the
464  // following reasons:
465  // 1. Minor differences result from the fact that Aztec factors a MSR format
466  // matrix in place, while the code below factors an input CrsMatrix which
467  // remains untouched and stores the resulting factors in separate L and U
468  // CrsMatrix objects.
469  // Also, the Aztec code begins by shifting the matrix pointers back
470  // by one, and the pointer contents back by one, and then using 1-based
471  // Fortran-style indexing in the algorithm. This Ifpack2 code uses C-style
472  // 0-based indexing throughout.
473  // 2. Aztec stores the inverse of the diagonal of U. This Ifpack2 code
474  // stores the non-inverted diagonal in U.
475  // The triangular solves (in Ifpack2::ILUT::apply()) are performed by
476  // calling the Tpetra::CrsMatrix::solve method on the L and U objects, and
477  // this requires U to contain the non-inverted diagonal.
478  //
479  // ABW.
480  //--------------------------------------------------------------------------
481 
482  // Don't count initialization in the compute() time.
483  if (! isInitialized ()) {
484  initialize ();
485  }
486 
487  Teuchos::Time timer ("ILUT::compute");
488  { // Timer scope for timing compute()
489  Teuchos::TimeMonitor timeMon (timer, true);
490  const scalar_type zero = STS::zero ();
491  const scalar_type one = STS::one ();
492 
493  const local_ordinal_type myNumRows = A_local_->getNodeNumRows ();
494  L_ = rcp (new crs_matrix_type (A_local_->getRowMap (), A_local_->getColMap (), 0));
495  U_ = rcp (new crs_matrix_type (A_local_->getRowMap (), A_local_->getColMap (), 0));
496 
497  // CGB: note, this caching approach may not be necessary anymore
498  // We will store ArrayView objects that are views of the rows of U, so that
499  // we don't have to repeatedly retrieve the view for each row. These will
500  // be populated row by row as the factorization proceeds.
501  Array<ArrayView<const local_ordinal_type> > Uindices (myNumRows);
502  Array<ArrayView<const scalar_type> > Ucoefs (myNumRows);
503 
504  // If this macro is defined, files containing the L and U factors
505  // will be written. DON'T CHECK IN THE CODE WITH THIS MACRO ENABLED!!!
506  // #define IFPACK2_WRITE_FACTORS
507 #ifdef IFPACK2_WRITE_FACTORS
508  std::ofstream ofsL("L.tif.mtx", std::ios::out);
509  std::ofstream ofsU("U.tif.mtx", std::ios::out);
510 #endif
511 
512  // The code here uses double for fill calculations, even though
513  // the fill level is actually an integer. The point is to avoid
514  // rounding and overflow for integer calculations. If int is <=
515  // 32 bits, it can never overflow double, so this cast is always
516  // OK as long as int has <= 32 bits.
517 
518  // Calculate how much fill will be allowed in addition to the
519  // space that corresponds to the input matrix entries.
520  double local_nnz = static_cast<double> (A_local_->getNodeNumEntries ());
521  double fill;
522  {
523  const double fillLevel = as<double> (getLevelOfFill ());
524  fill = ((fillLevel - 1) * local_nnz) / (2 * myNumRows);
525  }
526 
527  // std::ceil gives the smallest integer larger than the argument.
528  // this may give a slightly different result than Aztec's fill value in
529  // some cases.
530  double fill_ceil=std::ceil(fill);
531 
532  // Similarly to Aztec, we will allow the same amount of fill for each
533  // row, half in L and half in U.
534  size_type fillL = static_cast<size_type>(fill_ceil);
535  size_type fillU = static_cast<size_type>(fill_ceil);
536 
537  Array<scalar_type> InvDiagU (myNumRows, zero);
538 
539  Array<local_ordinal_type> tmp_idx;
540  Array<scalar_type> tmpv;
541 
542  enum { UNUSED, ORIG, FILL };
543  local_ordinal_type max_col = myNumRows;
544 
545  Array<int> pattern(max_col, UNUSED);
546  Array<scalar_type> cur_row(max_col, zero);
547  Array<magnitude_type> unorm(max_col);
548  magnitude_type rownorm;
549  Array<local_ordinal_type> L_cols_heap;
550  Array<local_ordinal_type> U_cols;
551  Array<local_ordinal_type> L_vals_heap;
552  Array<local_ordinal_type> U_vals_heap;
553 
554  // A comparison object which will be used to create 'heaps' of indices
555  // that are ordered according to the corresponding values in the
556  // 'cur_row' array.
557  greater_indirect<scalar_type,local_ordinal_type> vals_comp(cur_row);
558 
559  // =================== //
560  // start factorization //
561  // =================== //
562 
563  ArrayRCP<local_ordinal_type> ColIndicesARCP;
564  ArrayRCP<scalar_type> ColValuesARCP;
565  if (! A_local_->supportsRowViews ()) {
566  const size_t maxnz = A_local_->getNodeMaxNumRowEntries ();
567  ColIndicesARCP.resize (maxnz);
568  ColValuesARCP.resize (maxnz);
569  }
570 
571  for (local_ordinal_type row_i = 0 ; row_i < myNumRows ; ++row_i) {
572  ArrayView<const local_ordinal_type> ColIndicesA;
573  ArrayView<const scalar_type> ColValuesA;
574  size_t RowNnz;
575 
576  if (A_local_->supportsRowViews ()) {
577  A_local_->getLocalRowView (row_i, ColIndicesA, ColValuesA);
578  RowNnz = ColIndicesA.size ();
579  }
580  else {
581  A_local_->getLocalRowCopy (row_i, ColIndicesARCP (), ColValuesARCP (), RowNnz);
582  ColIndicesA = ColIndicesARCP (0, RowNnz);
583  ColValuesA = ColValuesARCP (0, RowNnz);
584  }
585 
586  // Always include the diagonal in the U factor. The value should get
587  // set in the next loop below.
588  U_cols.push_back(row_i);
589  cur_row[row_i] = zero;
590  pattern[row_i] = ORIG;
591 
592  size_type L_cols_heaplen = 0;
593  rownorm = STM::zero ();
594  for (size_t i = 0; i < RowNnz; ++i) {
595  if (ColIndicesA[i] < myNumRows) {
596  if (ColIndicesA[i] < row_i) {
597  add_to_heap(ColIndicesA[i], L_cols_heap, L_cols_heaplen);
598  }
599  else if (ColIndicesA[i] > row_i) {
600  U_cols.push_back(ColIndicesA[i]);
601  }
602 
603  cur_row[ColIndicesA[i]] = ColValuesA[i];
604  pattern[ColIndicesA[i]] = ORIG;
605  rownorm += scalar_mag(ColValuesA[i]);
606  }
607  }
608 
609  // Alter the diagonal according to the absolute-threshold and
610  // relative-threshold values. If not set, those values default
611  // to zero and one respectively.
612  const magnitude_type rthresh = getRelativeThreshold();
613  const scalar_type& v = cur_row[row_i];
614  cur_row[row_i] = as<scalar_type> (getAbsoluteThreshold() * IFPACK2_SGN(v)) + rthresh*v;
615 
616  size_type orig_U_len = U_cols.size();
617  RowNnz = L_cols_heap.size() + orig_U_len;
618  rownorm = getDropTolerance() * rownorm/RowNnz;
619 
620  // The following while loop corresponds to the 'L30' goto's in Aztec.
621  size_type L_vals_heaplen = 0;
622  while (L_cols_heaplen > 0) {
623  local_ordinal_type row_k = L_cols_heap.front();
624 
625  scalar_type multiplier = cur_row[row_k] * InvDiagU[row_k];
626  cur_row[row_k] = multiplier;
627  magnitude_type mag_mult = scalar_mag(multiplier);
628  if (mag_mult*unorm[row_k] < rownorm) {
629  pattern[row_k] = UNUSED;
630  rm_heap_root(L_cols_heap, L_cols_heaplen);
631  continue;
632  }
633  if (pattern[row_k] != ORIG) {
634  if (L_vals_heaplen < fillL) {
635  add_to_heap(row_k, L_vals_heap, L_vals_heaplen, vals_comp);
636  }
637  else if (L_vals_heaplen==0 ||
638  mag_mult < scalar_mag(cur_row[L_vals_heap.front()])) {
639  pattern[row_k] = UNUSED;
640  rm_heap_root(L_cols_heap, L_cols_heaplen);
641  continue;
642  }
643  else {
644  pattern[L_vals_heap.front()] = UNUSED;
645  rm_heap_root(L_vals_heap, L_vals_heaplen, vals_comp);
646  add_to_heap(row_k, L_vals_heap, L_vals_heaplen, vals_comp);
647  }
648  }
649 
650  /* Reduce current row */
651 
652  ArrayView<const local_ordinal_type>& ColIndicesU = Uindices[row_k];
653  ArrayView<const scalar_type>& ColValuesU = Ucoefs[row_k];
654  size_type ColNnzU = ColIndicesU.size();
655 
656  for(size_type j=0; j<ColNnzU; ++j) {
657  if (ColIndicesU[j] > row_k) {
658  scalar_type tmp = multiplier * ColValuesU[j];
659  local_ordinal_type col_j = ColIndicesU[j];
660  if (pattern[col_j] != UNUSED) {
661  cur_row[col_j] -= tmp;
662  }
663  else if (scalar_mag(tmp) > rownorm) {
664  cur_row[col_j] = -tmp;
665  pattern[col_j] = FILL;
666  if (col_j > row_i) {
667  U_cols.push_back(col_j);
668  }
669  else {
670  add_to_heap(col_j, L_cols_heap, L_cols_heaplen);
671  }
672  }
673  }
674  }
675 
676  rm_heap_root(L_cols_heap, L_cols_heaplen);
677  }//end of while(L_cols_heaplen) loop
678 
679 
680  // Put indices and values for L into arrays and then into the L_ matrix.
681 
682  // first, the original entries from the L section of A:
683  for (size_type i = 0; i < ColIndicesA.size (); ++i) {
684  if (ColIndicesA[i] < row_i) {
685  tmp_idx.push_back(ColIndicesA[i]);
686  tmpv.push_back(cur_row[ColIndicesA[i]]);
687  pattern[ColIndicesA[i]] = UNUSED;
688  }
689  }
690 
691  // next, the L entries resulting from fill:
692  for (size_type j = 0; j < L_vals_heaplen; ++j) {
693  tmp_idx.push_back(L_vals_heap[j]);
694  tmpv.push_back(cur_row[L_vals_heap[j]]);
695  pattern[L_vals_heap[j]] = UNUSED;
696  }
697 
698  // L has a one on the diagonal, but we don't explicitly store
699  // it. If we don't store it, then the kernel which performs the
700  // triangular solve can assume a unit diagonal, take a short-cut
701  // and perform faster.
702 
703  L_->insertLocalValues (row_i, tmp_idx (), tmpv ());
704 #ifdef IFPACK2_WRITE_FACTORS
705  for (size_type ii = 0; ii < tmp_idx.size (); ++ii) {
706  ofsL << row_i << " " << tmp_idx[ii] << " " << tmpv[ii] << std::endl;
707  }
708 #endif
709 
710  tmp_idx.clear();
711  tmpv.clear();
712 
713  // Pick out the diagonal element, store its reciprocal.
714  if (cur_row[row_i] == zero) {
715  std::cerr << "Ifpack2::ILUT::Compute: zero pivot encountered! Replacing with rownorm and continuing...(You may need to set the parameter 'fact: absolute threshold'.)" << std::endl;
716  cur_row[row_i] = rownorm;
717  }
718  InvDiagU[row_i] = one / cur_row[row_i];
719 
720  // Non-inverted diagonal is stored for U:
721  tmp_idx.push_back(row_i);
722  tmpv.push_back(cur_row[row_i]);
723  unorm[row_i] = scalar_mag(cur_row[row_i]);
724  pattern[row_i] = UNUSED;
725 
726  // Now put indices and values for U into arrays and then into the U_ matrix.
727  // The first entry in U_cols is the diagonal, which we just handled, so we'll
728  // start our loop at j=1.
729 
730  size_type U_vals_heaplen = 0;
731  for(size_type j=1; j<U_cols.size(); ++j) {
732  local_ordinal_type col = U_cols[j];
733  if (pattern[col] != ORIG) {
734  if (U_vals_heaplen < fillU) {
735  add_to_heap(col, U_vals_heap, U_vals_heaplen, vals_comp);
736  }
737  else if (U_vals_heaplen!=0 && scalar_mag(cur_row[col]) >
738  scalar_mag(cur_row[U_vals_heap.front()])) {
739  rm_heap_root(U_vals_heap, U_vals_heaplen, vals_comp);
740  add_to_heap(col, U_vals_heap, U_vals_heaplen, vals_comp);
741  }
742  }
743  else {
744  tmp_idx.push_back(col);
745  tmpv.push_back(cur_row[col]);
746  unorm[row_i] += scalar_mag(cur_row[col]);
747  }
748  pattern[col] = UNUSED;
749  }
750 
751  for(size_type j=0; j<U_vals_heaplen; ++j) {
752  tmp_idx.push_back(U_vals_heap[j]);
753  tmpv.push_back(cur_row[U_vals_heap[j]]);
754  unorm[row_i] += scalar_mag(cur_row[U_vals_heap[j]]);
755  }
756 
757  unorm[row_i] /= (orig_U_len + U_vals_heaplen);
758 
759  U_->insertLocalValues(row_i, tmp_idx(), tmpv() );
760 #ifdef IFPACK2_WRITE_FACTORS
761  for(int ii=0; ii<tmp_idx.size(); ++ii) {
762  ofsU <<row_i<< " " <<tmp_idx[ii]<< " " <<tmpv[ii]<< std::endl;
763  }
764 #endif
765  tmp_idx.clear();
766  tmpv.clear();
767 
768  U_->getLocalRowView(row_i, Uindices[row_i], Ucoefs[row_i] );
769 
770  L_cols_heap.clear();
771  U_cols.clear();
772  L_vals_heap.clear();
773  U_vals_heap.clear();
774  } // end of for(row_i) loop
775 
776  // FIXME (mfh 03 Apr 2013) Do we need to supply a domain and range Map?
777  L_->fillComplete();
778  U_->fillComplete();
779 
780  L_solver_ = Teuchos::rcp (new LocalSparseTriangularSolver<row_matrix_type> (L_));
781  L_solver_->initialize ();
782  L_solver_->compute ();
783 
784  U_solver_ = Teuchos::rcp (new LocalSparseTriangularSolver<row_matrix_type> (U_));
785  U_solver_->initialize ();
786  U_solver_->compute ();
787  }
788  ComputeTime_ += timer.totalElapsedTime ();
789  IsComputed_ = true;
790  ++NumCompute_;
791 }
792 
793 
794 template <class MatrixType>
796 apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
797  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
798  Teuchos::ETransp mode,
799  scalar_type alpha,
800  scalar_type beta) const
801 {
802  using Teuchos::RCP;
803  using Teuchos::rcp;
804  using Teuchos::rcpFromRef;
805  typedef Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type> MV;
806 
807  Teuchos::Time timer ("ILUT::apply");
808  { // Timer scope for timing apply()
809  Teuchos::TimeMonitor timeMon (timer, true);
810 
811  TEUCHOS_TEST_FOR_EXCEPTION(
812  ! isComputed (), std::runtime_error,
813  "Ifpack2::ILUT::apply: You must call compute() to compute the incomplete "
814  "factorization, before calling apply().");
815 
816  TEUCHOS_TEST_FOR_EXCEPTION(
817  X.getNumVectors() != Y.getNumVectors(), std::runtime_error,
818  "Ifpack2::ILUT::apply: X and Y must have the same number of columns. "
819  "X has " << X.getNumVectors () << " columns, but Y has "
820  << Y.getNumVectors () << " columns.");
821 
822  if (alpha == Teuchos::ScalarTraits<scalar_type>::zero ()) {
823  // alpha == 0, so we don't need to apply the operator.
824  //
825  // The special case for beta == 0 ensures that if Y contains Inf
826  // or NaN values, we replace them with 0 (following BLAS
827  // convention), rather than multiplying them by 0 to get NaN.
828  if (beta == Teuchos::ScalarTraits<scalar_type>::zero ()) {
829  Y.putScalar (beta);
830  } else {
831  Y.scale (beta);
832  }
833  return;
834  }
835 
836  // If beta != 0, create a temporary multivector Y_temp to hold the
837  // contents of alpha*M^{-1}*X. Otherwise, alias Y_temp to Y.
838  RCP<MV> Y_temp;
839  if (beta == Teuchos::ScalarTraits<scalar_type>::zero ()) {
840  Y_temp = rcpFromRef (Y);
841  } else {
842  Y_temp = rcp (new MV (Y.getMap (), Y.getNumVectors ()));
843  }
844 
845  // If X and Y are pointing to the same memory location, create an
846  // auxiliary vector, X_temp, so that we don't clobber the input
847  // when computing the output. Otherwise, alias X_temp to X.
848  RCP<const MV> X_temp;
849  {
850  auto X_lcl_host = X.template getLocalView<Kokkos::HostSpace> ();
851  auto Y_lcl_host = Y.template getLocalView<Kokkos::HostSpace> ();
852  if (X_lcl_host.ptr_on_device () == Y_lcl_host.ptr_on_device ()) {
853  X_temp = rcp (new MV (X, Teuchos::Copy));
854  } else {
855  X_temp = rcpFromRef (X);
856  }
857  }
858 
859  // Create a temporary multivector Y_mid to hold the intermediate
860  // between the L and U (or U and L, for the transpose or conjugate
861  // transpose case) solves.
862  RCP<MV> Y_mid = rcp (new MV (Y.getMap (), Y.getNumVectors ()));
863 
864  if (mode == Teuchos::NO_TRANS) { // Solve L U Y = X
865  L_solver_->apply (*X_temp, *Y_mid, mode);
866 
867  // FIXME (mfh 20 Aug 2013) Is it OK to use Y_temp for both the
868  // input and the output?
869 
870  U_solver_->apply (*Y_mid, *Y_temp, mode);
871  }
872  else { // Solve U^* L^* Y = X
873  U_solver_->apply (*X_temp, *Y_mid, mode);
874 
875  // FIXME (mfh 20 Aug 2013) Is it OK to use Y_temp for both the
876  // input and the output?
877 
878  L_solver_->apply (*Y_mid, *Y_temp, mode);
879  }
880 
881  if (beta == Teuchos::ScalarTraits<scalar_type>::zero ()) {
882  Y.scale (alpha);
883  } else { // beta != 0
884  Y.update (alpha, *Y_temp, beta);
885  }
886  }
887  ++NumApply_;
888  ApplyTime_ += timer.totalElapsedTime ();
889 }
890 
891 
892 template <class MatrixType>
893 std::string ILUT<MatrixType>::description () const
894 {
895  std::ostringstream os;
896 
897  // Output is a valid YAML dictionary in flow style. If you don't
898  // like everything on a single line, you should call describe()
899  // instead.
900  os << "\"Ifpack2::ILUT\": {";
901  os << "Initialized: " << (isInitialized () ? "true" : "false") << ", "
902  << "Computed: " << (isComputed () ? "true" : "false") << ", ";
903 
904  os << "Level-of-fill: " << getLevelOfFill() << ", "
905  << "absolute threshold: " << getAbsoluteThreshold() << ", "
906  << "relative threshold: " << getRelativeThreshold() << ", "
907  << "relaxation value: " << getRelaxValue() << ", ";
908 
909  if (A_.is_null ()) {
910  os << "Matrix: null";
911  }
912  else {
913  os << "Global matrix dimensions: ["
914  << A_->getGlobalNumRows () << ", " << A_->getGlobalNumCols () << "]"
915  << ", Global nnz: " << A_->getGlobalNumEntries();
916  }
917 
918  os << "}";
919  return os.str ();
920 }
921 
922 
923 template <class MatrixType>
924 void
926 describe (Teuchos::FancyOStream& out,
927  const Teuchos::EVerbosityLevel verbLevel) const
928 {
929  using Teuchos::Comm;
930  using Teuchos::OSTab;
931  using Teuchos::RCP;
932  using Teuchos::TypeNameTraits;
933  using std::endl;
934  using Teuchos::VERB_DEFAULT;
935  using Teuchos::VERB_NONE;
936  using Teuchos::VERB_LOW;
937  using Teuchos::VERB_MEDIUM;
938  using Teuchos::VERB_HIGH;
939  using Teuchos::VERB_EXTREME;
940 
941  const Teuchos::EVerbosityLevel vl =
942  (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
943  OSTab tab0 (out);
944 
945  if (vl > VERB_NONE) {
946  out << "\"Ifpack2::ILUT\":" << endl;
947  OSTab tab1 (out);
948  out << "MatrixType: " << TypeNameTraits<MatrixType>::name () << endl;
949  if (this->getObjectLabel () != "") {
950  out << "Label: \"" << this->getObjectLabel () << "\"" << endl;
951  }
952  out << "Initialized: " << (isInitialized () ? "true" : "false")
953  << endl
954  << "Computed: " << (isComputed () ? "true" : "false")
955  << endl
956  << "Level of fill: " << getLevelOfFill () << endl
957  << "Absolute threshold: " << getAbsoluteThreshold () << endl
958  << "Relative threshold: " << getRelativeThreshold () << endl
959  << "Relax value: " << getRelaxValue () << endl;
960 
961  if (isComputed () && vl >= VERB_HIGH) {
962  const double fillFraction =
963  (double) getGlobalNumEntries () / (double) A_->getGlobalNumEntries ();
964  const double nnzToRows =
965  (double) getGlobalNumEntries () / (double) U_->getGlobalNumRows ();
966 
967  out << "Dimensions of L: [" << L_->getGlobalNumRows () << ", "
968  << L_->getGlobalNumRows () << "]" << endl
969  << "Dimensions of U: [" << U_->getGlobalNumRows () << ", "
970  << U_->getGlobalNumRows () << "]" << endl
971  << "Number of nonzeros in factors: " << getGlobalNumEntries () << endl
972  << "Fill fraction of factors over A: " << fillFraction << endl
973  << "Ratio of nonzeros to rows: " << nnzToRows << endl;
974  }
975 
976  out << "Number of initialize calls: " << getNumInitialize () << endl
977  << "Number of compute calls: " << getNumCompute () << endl
978  << "Number of apply calls: " << getNumApply () << endl
979  << "Total time in seconds for initialize: " << getInitializeTime () << endl
980  << "Total time in seconds for compute: " << getComputeTime () << endl
981  << "Total time in seconds for apply: " << getApplyTime () << endl;
982 
983  out << "Local matrix:" << endl;
984  A_local_->describe (out, vl);
985  }
986 }
987 
988 template <class MatrixType>
989 Teuchos::RCP<const typename ILUT<MatrixType>::row_matrix_type>
990 ILUT<MatrixType>::makeLocalFilter (const Teuchos::RCP<const row_matrix_type>& A)
991 {
992  if (A->getComm ()->getSize () > 1) {
993  return Teuchos::rcp (new LocalFilter<row_matrix_type> (A));
994  } else {
995  return A;
996  }
997 }
998 
999 }//namespace Ifpack2
1000 
1001 
1002 // FIXME (mfh 16 Sep 2014) We should really only use RowMatrix here!
1003 // There's no need to instantiate for CrsMatrix too. All Ifpack2
1004 // preconditioners can and should do dynamic casts if they need a type
1005 // more specific than RowMatrix.
1006 
1007 #define IFPACK2_ILUT_INSTANT(S,LO,GO,N) \
1008  template class Ifpack2::ILUT< Tpetra::RowMatrix<S, LO, GO, N> >;
1009 
1010 #endif /* IFPACK2_ILUT_DEF_HPP */
magnitude_type getRelaxValue() const
Get the relax value.
Definition: Ifpack2_ILUT_decl.hpp:332
int getNumCompute() const
Returns the number of calls to Compute().
Definition: Ifpack2_ILUT_def.hpp:320
ILUT(const Teuchos::RCP< const row_matrix_type > &A)
Constructor.
Definition: Ifpack2_ILUT_def.hpp:116
virtual ~ILUT()
Destructor.
Definition: Ifpack2_ILUT_def.hpp:134
global_size_t getGlobalNumEntries() const
Returns the number of nonzero entries in the global graph.
Definition: Ifpack2_ILUT_def.hpp:361
bool hasTransposeApply() const
Whether this object&#39;s apply() method can apply the transpose (or conjugate transpose, if applicable).
Definition: Ifpack2_ILUT_def.hpp:308
size_t getNodeNumEntries() const
Returns the number of nonzero entries in the local graph.
Definition: Ifpack2_ILUT_def.hpp:367
void initialize()
Clear any previously computed factors.
Definition: Ifpack2_ILUT_def.hpp:413
magnitude_type getDropTolerance() const
Gets the dropping tolerance.
Definition: Ifpack2_ILUT_decl.hpp:337
magnitude_type getAbsoluteThreshold() const
Get absolute threshold value.
Definition: Ifpack2_ILUT_decl.hpp:322
bool isComputed() const
If compute() is completed, this query returns true, otherwise it returns false.
Definition: Ifpack2_ILUT_decl.hpp:215
int getNumApply() const
Returns the number of calls to apply().
Definition: Ifpack2_ILUT_def.hpp:326
int getNumInitialize() const
Returns the number of calls to Initialize().
Definition: Ifpack2_ILUT_def.hpp:314
ILUT (incomplete LU factorization with threshold) of a Tpetra sparse matrix.
Definition: Ifpack2_ILUT_decl.hpp:91
Teuchos::RCP< const map_type > getRangeMap() const
Tpetra::Map representing the range of this operator.
Definition: Ifpack2_ILUT_def.hpp:297
void rm_heap_root(Teuchos::Array< Ordinal > &heap, SizeType &heap_len)
Definition: Ifpack2_Heap.hpp:92
double getInitializeTime() const
Returns the time spent in Initialize().
Definition: Ifpack2_ILUT_def.hpp:332
bool isInitialized() const
Returns true if the preconditioner has been successfully initialized.
Definition: Ifpack2_ILUT_decl.hpp:200
"Preconditioner" that solves local sparse triangular systems.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:83
Teuchos::RCP< const map_type > getDomainMap() const
Tpetra::Map representing the domain of this operator.
Definition: Ifpack2_ILUT_def.hpp:285
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_ILUT_decl.hpp:106
void compute()
Compute factors L and U using the specified diagonal perturbation thresholds and relaxation parameter...
Definition: Ifpack2_ILUT_def.hpp:450
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_ILUT_def.hpp:373
magnitude_type getRelativeThreshold() const
Get relative threshold value.
Definition: Ifpack2_ILUT_decl.hpp:327
Teuchos::RCP< const row_matrix_type > getMatrix() const
Returns a reference to the matrix to be preconditioned.
Definition: Ifpack2_ILUT_def.hpp:278
std::string description() const
Return a simple one-line description of this object.
Definition: Ifpack2_ILUT_def.hpp:893
Definition: Ifpack2_Container.hpp:761
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with some verbosity level to an FancyOStream object.
Definition: Ifpack2_ILUT_def.hpp:926
double getComputeTime() const
Returns the time spent in Compute().
Definition: Ifpack2_ILUT_def.hpp:338
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Type of the Tpetra::CrsMatrix specialization that this class uses for the L and U factors...
Definition: Ifpack2_ILUT_decl.hpp:126
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Returns the input matrix&#39;s communicator.
Definition: Ifpack2_ILUT_def.hpp:267
void add_to_heap(const Ordinal &idx, Teuchos::Array< Ordinal > &heap, SizeType &heap_len)
Definition: Ifpack2_Heap.hpp:70
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition: Ifpack2_ILUT_def.hpp:350
Access only local rows and columns of a sparse matrix.
Definition: Ifpack2_LocalFilter_decl.hpp:160
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the ILUT preconditioner to X, resulting in Y.
Definition: Ifpack2_ILUT_def.hpp:796
Preconditioners and smoothers for Tpetra sparse matrices.
Definition: Ifpack2_AdditiveSchwarz_decl.hpp:72
double getApplyTime() const
Returns the time spent in apply().
Definition: Ifpack2_ILUT_def.hpp:344
int getLevelOfFill() const
The level of fill.
Definition: Ifpack2_ILUT_decl.hpp:317
void setParameters(const Teuchos::ParameterList &params)
Set preconditioner parameters.
Definition: Ifpack2_ILUT_def.hpp:138
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_ILUT_decl.hpp:109