MueLu  Version of the Day
MueLu_TentativePFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_TENTATIVEPFACTORY_DEF_HPP
47 #define MUELU_TENTATIVEPFACTORY_DEF_HPP
48 
49 #include <Xpetra_MapFactory.hpp>
50 #include <Xpetra_Map.hpp>
51 #include <Xpetra_CrsMatrix.hpp>
52 #include <Xpetra_Matrix.hpp>
53 #include <Xpetra_MultiVector.hpp>
54 #include <Xpetra_MultiVectorFactory.hpp>
55 #include <Xpetra_VectorFactory.hpp>
56 #include <Xpetra_Import.hpp>
57 #include <Xpetra_ImportFactory.hpp>
58 #include <Xpetra_CrsMatrixWrap.hpp>
59 #include <Xpetra_StridedMap.hpp>
60 #include <Xpetra_StridedMapFactory.hpp>
61 
63 
64 #include "MueLu_Aggregates.hpp"
65 #include "MueLu_AmalgamationFactory.hpp"
66 #include "MueLu_AmalgamationInfo.hpp"
67 #include "MueLu_CoarseMapFactory.hpp"
68 #include "MueLu_MasterList.hpp"
69 #include "MueLu_Monitor.hpp"
70 #include "MueLu_NullspaceFactory.hpp"
71 #include "MueLu_PerfUtils.hpp"
72 #include "MueLu_Utilities.hpp"
73 
74 namespace MueLu {
75 
76  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
78  RCP<ParameterList> validParamList = rcp(new ParameterList());
79 
80 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
81  SET_VALID_ENTRY("tentative: calculate qr");
82 #undef SET_VALID_ENTRY
83 
84  validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
85  validParamList->set< RCP<const FactoryBase> >("Aggregates", Teuchos::null, "Generating factory of the aggregates");
86  validParamList->set< RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
87  validParamList->set< RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory of UnAmalgamationInfo");
88  validParamList->set< RCP<const FactoryBase> >("CoarseMap", Teuchos::null, "Generating factory of the coarse map");
89 
90  // Make sure we don't recursively validate options for the matrixmatrix kernels
91  ParameterList norecurse;
92  norecurse.disableRecursiveValidation();
93  validParamList->set<ParameterList> ("matrixmatrix: kernel params", norecurse, "MatrixMatrix kernel parameters");
94 
95  return validParamList;
96  }
97 
98  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
100  Input(fineLevel, "A");
101  Input(fineLevel, "Aggregates");
102  Input(fineLevel, "Nullspace");
103  Input(fineLevel, "UnAmalgamationInfo");
104  Input(fineLevel, "CoarseMap");
105  }
106 
107  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
109  return BuildP(fineLevel, coarseLevel);
110  }
111 
112  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
114  FactoryMonitor m(*this, "Build", coarseLevel);
115 
116  RCP<Matrix> A = Get< RCP<Matrix> > (fineLevel, "A");
117  RCP<Aggregates> aggregates = Get< RCP<Aggregates> > (fineLevel, "Aggregates");
118  RCP<AmalgamationInfo> amalgInfo = Get< RCP<AmalgamationInfo> >(fineLevel, "UnAmalgamationInfo");
119  RCP<MultiVector> fineNullspace = Get< RCP<MultiVector> > (fineLevel, "Nullspace");
120  RCP<const Map> coarseMap = Get< RCP<const Map> > (fineLevel, "CoarseMap");
121 
122  RCP<Matrix> Ptentative;
123  RCP<MultiVector> coarseNullspace;
124  if (!aggregates->AggregatesCrossProcessors())
125  BuildPuncoupled(A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace,coarseLevel.GetLevelID());
126  else
127  BuildPcoupled (A, aggregates, amalgInfo, fineNullspace, coarseMap, Ptentative, coarseNullspace);
128 
129  // If available, use striding information of fine level matrix A for range
130  // map and coarseMap as domain map; otherwise use plain range map of
131  // Ptent = plain range map of A for range map and coarseMap as domain map.
132  // NOTE:
133  // The latter is not really safe, since there is no striding information
134  // for the range map. This is not really a problem, since striding
135  // information is always available on the intermedium levels and the
136  // coarsest levels.
137  if (A->IsView("stridedMaps") == true)
138  Ptentative->CreateView("stridedMaps", A->getRowMap("stridedMaps"), coarseMap);
139  else
140  Ptentative->CreateView("stridedMaps", Ptentative->getRangeMap(), coarseMap);
141 
142  Set(coarseLevel, "Nullspace", coarseNullspace);
143  Set(coarseLevel, "P", Ptentative);
144 
145  if (IsPrint(Statistics1)) {
146  RCP<ParameterList> params = rcp(new ParameterList());
147  params->set("printLoadBalancingInfo", true);
148  GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*Ptentative, "Ptent", params);
149  }
150  }
151 
152  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
154  BuildPuncoupled(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
155  RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace, const int levelID) const {
156  RCP<const Map> rowMap = A->getRowMap();
157  RCP<const Map> colMap = A->getColMap();
158 
159  const size_t numRows = rowMap->getNodeNumElements();
160 
161  typedef Teuchos::ScalarTraits<SC> STS;
162  typedef typename STS::magnitudeType Magnitude;
163  const SC zero = STS::zero();
164  const SC one = STS::one();
165  const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
166 
167  const GO numAggs = aggregates->GetNumAggregates();
168  const size_t NSDim = fineNullspace->getNumVectors();
169 
170  // Aggregates map is based on the amalgamated column map
171  // We can skip global-to-local conversion if LIDs in row map are
172  // same as LIDs in column map
173  bool goodMap = isGoodMap(*rowMap, *colMap);
174 
175  // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
176  // aggStart is a pointer into aggToRowMapLO
177  // aggStart[i]..aggStart[i+1] are indices into aggToRowMapLO
178  // aggToRowMapLO[aggStart[i]]..aggToRowMapLO[aggStart[i+1]-1] are the DOFs in aggregate i
179  ArrayRCP<LO> aggStart;
180  ArrayRCP<LO> aggToRowMapLO;
181  ArrayRCP<GO> aggToRowMapGO;
182  if (goodMap) {
183  amalgInfo->UnamalgamateAggregatesLO(*aggregates, aggStart, aggToRowMapLO);
184  GetOStream(Runtime1) << "Column map is consistent with the row map, good." << std::endl;
185 
186  } else {
187  amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMapGO);
188  GetOStream(Warnings0) << "Column map is not consistent with the row map\n"
189  << "using GO->LO conversion with performance penalty" << std::endl;
190  }
191 
192  coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
193 
194  const ParameterList& pL = GetParameterList();
195  const bool &doQRStep = pL.get<bool>("tentative: calculate qr");
196 
197  // Pull out the nullspace vectors so that we can have random access.
198  ArrayRCP<ArrayRCP<const SC> > fineNS (NSDim);
199  ArrayRCP<ArrayRCP<SC> > coarseNS(NSDim);
200  for (size_t i = 0; i < NSDim; i++) {
201  fineNS[i] = fineNullspace->getData(i);
202  if (coarseMap->getNodeNumElements() > 0)
203  coarseNS[i] = coarseNullspace->getDataNonConst(i);
204  }
205 
206  size_t nnzEstimate = numRows * NSDim;
207 
208  // Time to construct the matrix and fill in the values
209  Ptentative = rcp(new CrsMatrixWrap(rowMap, coarseMap, 0, Xpetra::StaticProfile));
210  RCP<CrsMatrix> PtentCrs = rcp_dynamic_cast<CrsMatrixWrap>(Ptentative)->getCrsMatrix();
211 
212  ArrayRCP<size_t> iaPtent;
213  ArrayRCP<LO> jaPtent;
214  ArrayRCP<SC> valPtent;
215 
216  PtentCrs->allocateAllValues(nnzEstimate, iaPtent, jaPtent, valPtent);
217 
218  ArrayView<size_t> ia = iaPtent();
219  ArrayView<LO> ja = jaPtent();
220  ArrayView<SC> val = valPtent();
221 
222  ia[0] = 0;
223  for (size_t i = 1; i <= numRows; i++)
224  ia[i] = ia[i-1] + NSDim;
225 
226  for (size_t j = 0; j < nnzEstimate; j++) {
227  ja [j] = INVALID;
228  val[j] = zero;
229  }
230 
231 
232  if (doQRStep) {
234  // Standard aggregate-wise QR //
236  for (GO agg = 0; agg < numAggs; agg++) {
237  LO aggSize = aggStart[agg+1] - aggStart[agg];
238 
239  Xpetra::global_size_t offset = agg*NSDim;
240 
241  // Extract the piece of the nullspace corresponding to the aggregate, and
242  // put it in the flat array, "localQR" (in column major format) for the
243  // QR routine.
244  Teuchos::SerialDenseMatrix<LO,SC> localQR(aggSize, NSDim);
245  if (goodMap) {
246  for (size_t j = 0; j < NSDim; j++)
247  for (LO k = 0; k < aggSize; k++)
248  localQR(k,j) = fineNS[j][aggToRowMapLO[aggStart[agg]+k]];
249  } else {
250  for (size_t j = 0; j < NSDim; j++)
251  for (LO k = 0; k < aggSize; k++)
252  localQR(k,j) = fineNS[j][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg]+k])];
253  }
254 
255  // Test for zero columns
256  for (size_t j = 0; j < NSDim; j++) {
257  bool bIsZeroNSColumn = true;
258 
259  for (LO k = 0; k < aggSize; k++)
260  if (localQR(k,j) != zero)
261  bIsZeroNSColumn = false;
262 
263  TEUCHOS_TEST_FOR_EXCEPTION(bIsZeroNSColumn == true, Exceptions::RuntimeError,
264  "MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column");
265  }
266 
267  // Calculate QR decomposition (standard)
268  // NOTE: Q is stored in localQR and R is stored in coarseNS
269  if (aggSize >= Teuchos::as<LO>(NSDim)) {
270 
271  if (NSDim == 1) {
272  // Only one nullspace vector, calculate Q and R by hand
273  Magnitude norm = STS::magnitude(zero);
274  for (size_t k = 0; k < Teuchos::as<size_t>(aggSize); k++)
275  norm += STS::magnitude(localQR(k,0)*localQR(k,0));
276  norm = Teuchos::ScalarTraits<Magnitude>::squareroot(norm);
277 
278  // R = norm
279  coarseNS[0][offset] = norm;
280 
281  // Q = localQR(:,0)/norm
282  for (LO i = 0; i < aggSize; i++)
283  localQR(i,0) /= norm;
284 
285  } else {
286  Teuchos::SerialQRDenseSolver<LO,SC> qrSolver;
287  qrSolver.setMatrix(Teuchos::rcp(&localQR, false));
288  qrSolver.factor();
289 
290  // R = upper triangular part of localQR
291  for (size_t j = 0; j < NSDim; j++)
292  for (size_t k = 0; k <= j; k++)
293  coarseNS[j][offset+k] = localQR(k,j); //TODO is offset+k the correct local ID?!
294 
295  // Calculate Q, the tentative prolongator.
296  // The Lapack GEQRF call only works for myAggsize >= NSDim
297  qrSolver.formQ();
298  Teuchos::RCP<Teuchos::SerialDenseMatrix<LO,SC> > qFactor = qrSolver.getQ();
299  for (size_t j = 0; j < NSDim; j++)
300  for (size_t i = 0; i < Teuchos::as<size_t>(aggSize); i++)
301  localQR(i,j) = (*qFactor)(i,j);
302  }
303 
304  } else {
305  // Special handling for aggSize < NSDim (i.e. single node aggregates in structural mechanics)
306 
307  // The local QR decomposition is not possible in the "overconstrained"
308  // case (i.e. number of columns in localQR > number of rows), which
309  // corresponds to #DOFs in Aggregate < NSDim. For usual problems this
310  // is only possible for single node aggregates in structural mechanics.
311  // (Similar problems may arise in discontinuous Galerkin problems...)
312  // We bypass the QR decomposition and use an identity block in the
313  // tentative prolongator for the single node aggregate and transfer the
314  // corresponding fine level null space information 1-to-1 to the coarse
315  // level null space part.
316 
317  // NOTE: The resulting tentative prolongation operator has
318  // (aggSize*DofsPerNode-NSDim) zero columns leading to a singular
319  // coarse level operator A. To deal with that one has the following
320  // options:
321  // - Use the "RepairMainDiagonal" flag in the RAPFactory (default:
322  // false) to add some identity block to the diagonal of the zero rows
323  // in the coarse level operator A, such that standard level smoothers
324  // can be used again.
325  // - Use special (projection-based) level smoothers, which can deal
326  // with singular matrices (very application specific)
327  // - Adapt the code below to avoid zero columns. However, we do not
328  // support a variable number of DOFs per node in MueLu/Xpetra which
329  // makes the implementation really hard.
330 
331  // R = extended (by adding identity rows) localQR
332  for (size_t j = 0; j < NSDim; j++)
333  for (size_t k = 0; k < NSDim; k++)
334  if (k < as<size_t>(aggSize))
335  coarseNS[j][offset+k] = localQR(k,j);
336  else
337  coarseNS[j][offset+k] = (k == j ? one : zero);
338 
339  // Q = I (rectangular)
340  for (size_t i = 0; i < as<size_t>(aggSize); i++)
341  for (size_t j = 0; j < NSDim; j++)
342  localQR(i,j) = (j == i ? one : zero);
343  }
344 
345 
346  // Process each row in the local Q factor
347  // FIXME: What happens if maps are blocked?
348  for (LO j = 0; j < aggSize; j++) {
349  LO localRow = (goodMap ? aggToRowMapLO[aggStart[agg]+j] : rowMap->getLocalElement(aggToRowMapGO[aggStart[agg]+j]));
350 
351  size_t rowStart = ia[localRow];
352  for (size_t k = 0, lnnz = 0; k < NSDim; k++) {
353  // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
354  if (localQR(j,k) != zero) {
355  ja [rowStart+lnnz] = offset + k;
356  val[rowStart+lnnz] = localQR(j,k);
357  lnnz++;
358  }
359  }
360  }
361  }
362 
363  } else {
364  GetOStream(Runtime1) << "TentativePFactory : bypassing local QR phase" << std::endl;
365  if (NSDim>1)
366  GetOStream(Warnings0) << "TentativePFactor : for nontrivial nullspace, this may degrade performance" << std::endl;
368  // "no-QR" option //
370  // Local Q factor is just the fine nullspace support over the current aggregate.
371  // Local R factor is the identity.
372  // TODO I have not implemented any special handling for aggregates that are too
373  // TODO small to locally support the nullspace, as is done in the standard QR
374  // TODO case above.
375  if (goodMap) {
376  for (GO agg = 0; agg < numAggs; agg++) {
377  const LO aggSize = aggStart[agg+1] - aggStart[agg];
378  Xpetra::global_size_t offset = agg*NSDim;
379 
380  // Process each row in the local Q factor
381  // FIXME: What happens if maps are blocked?
382  for (LO j = 0; j < aggSize; j++) {
383 
384  //TODO Here I do not check for a zero nullspace column on the aggregate.
385  // as is done in the standard QR case.
386 
387  const LO localRow = aggToRowMapLO[aggStart[agg]+j];
388 
389  const size_t rowStart = ia[localRow];
390 
391  for (size_t k = 0, lnnz = 0; k < NSDim; k++) {
392  // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
393  const SC qr_jk = fineNS[k][aggToRowMapLO[aggStart[agg]+j]];
394  if (qr_jk != zero) {
395  ja [rowStart+lnnz] = offset + k;
396  val[rowStart+lnnz] = qr_jk;
397  lnnz++;
398  }
399  }
400  }
401  for (size_t j = 0; j < NSDim; j++)
402  coarseNS[j][offset+j] = one;
403  } //for (GO agg = 0; agg < numAggs; agg++)
404 
405  } else {
406  for (GO agg = 0; agg < numAggs; agg++) {
407  const LO aggSize = aggStart[agg+1] - aggStart[agg];
408  Xpetra::global_size_t offset = agg*NSDim;
409  for (LO j = 0; j < aggSize; j++) {
410 
411  const LO localRow = rowMap->getLocalElement(aggToRowMapGO[aggStart[agg]+j]);
412 
413  const size_t rowStart = ia[localRow];
414 
415  for (size_t k = 0, lnnz = 0; k < NSDim; k++) {
416  // Skip zeros (there may be plenty of them, i.e., NSDim > 1 or boundary conditions)
417  const SC qr_jk = fineNS[k][rowMap->getLocalElement(aggToRowMapGO[aggStart[agg]+j])];
418  if (qr_jk != zero) {
419  ja [rowStart+lnnz] = offset + k;
420  val[rowStart+lnnz] = qr_jk;
421  lnnz++;
422  }
423  }
424  }
425  for (size_t j = 0; j < NSDim; j++)
426  coarseNS[j][offset+j] = one;
427  } //for (GO agg = 0; agg < numAggs; agg++)
428 
429  } //if (goodmap) else ...
430 
431  } //if doQRStep ... else
432 
433  // Compress storage (remove all INVALID, which happen when we skip zeros)
434  // We do that in-place
435  size_t ia_tmp = 0, nnz = 0;
436  for (size_t i = 0; i < numRows; i++) {
437  for (size_t j = ia_tmp; j < ia[i+1]; j++)
438  if (ja[j] != INVALID) {
439  ja [nnz] = ja [j];
440  val[nnz] = val[j];
441  nnz++;
442  }
443  ia_tmp = ia[i+1];
444  ia[i+1] = nnz;
445  }
446  if (rowMap->lib() == Xpetra::UseTpetra) {
447  // - Cannot resize for Epetra, as it checks for same pointers
448  // - Need to resize for Tpetra, as it check ().size() == ia[numRows]
449  // NOTE: these invalidate ja and val views
450  jaPtent .resize(nnz);
451  valPtent.resize(nnz);
452  }
453 
454  GetOStream(Runtime1) << "TentativePFactory : aggregates do not cross process boundaries" << std::endl;
455 
456  PtentCrs->setAllValues(iaPtent, jaPtent, valPtent);
457 
458 
459  // Managing labels & constants for ESFC
460  RCP<ParameterList> FCparams;
461  if(pL.isSublist("matrixmatrix: kernel params"))
462  FCparams=rcp(new ParameterList(pL.sublist("matrixmatrix: kernel params")));
463  else
464  FCparams= rcp(new ParameterList);
465  // By default, we don't need global constants for TentativeP
466  FCparams->set("compute global constants",FCparams->get("compute global constants",false));
467  std::string levelIDs = toString(levelID);
468  FCparams->set("Timer Label",std::string("MueLu::TentativeP-")+levelIDs);
469  RCP<const Export> dummy_e;
470  RCP<const Import> dummy_i;
471 
472  PtentCrs->expertStaticFillComplete(coarseMap, A->getDomainMap(),dummy_i,dummy_e,FCparams);
473  }
474 
475  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
477  BuildPcoupled(RCP<Matrix> A, RCP<Aggregates> aggregates, RCP<AmalgamationInfo> amalgInfo, RCP<MultiVector> fineNullspace,
478  RCP<const Map> coarseMap, RCP<Matrix>& Ptentative, RCP<MultiVector>& coarseNullspace) const {
479  typedef Teuchos::ScalarTraits<SC> STS;
480  typedef typename STS::magnitudeType Magnitude;
481  const SC zero = STS::zero();
482  const SC one = STS::one();
483 
484  // number of aggregates
485  GO numAggs = aggregates->GetNumAggregates();
486 
487  // Create a lookup table to determine the rows (fine DOFs) that belong to a given aggregate.
488  // aggStart is a pointer into aggToRowMap
489  // aggStart[i]..aggStart[i+1] are indices into aggToRowMap
490  // aggToRowMap[aggStart[i]]..aggToRowMap[aggStart[i+1]-1] are the DOFs in aggregate i
491  ArrayRCP<LO> aggStart;
492  ArrayRCP< GO > aggToRowMap;
493  amalgInfo->UnamalgamateAggregates(*aggregates, aggStart, aggToRowMap);
494 
495  // find size of the largest aggregate
496  LO maxAggSize=0;
497  for (GO i=0; i<numAggs; ++i) {
498  LO sizeOfThisAgg = aggStart[i+1] - aggStart[i];
499  if (sizeOfThisAgg > maxAggSize) maxAggSize = sizeOfThisAgg;
500  }
501 
502  // dimension of fine level nullspace
503  const size_t NSDim = fineNullspace->getNumVectors();
504 
505  // index base for coarse Dof map (usually 0)
506  GO indexBase=A->getRowMap()->getIndexBase();
507 
508  const RCP<const Map> nonUniqueMap = amalgInfo->ComputeUnamalgamatedImportDofMap(*aggregates);
509  const RCP<const Map> uniqueMap = A->getDomainMap();
510  RCP<const Import> importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
511  RCP<MultiVector> fineNullspaceWithOverlap = MultiVectorFactory::Build(nonUniqueMap,NSDim);
512  fineNullspaceWithOverlap->doImport(*fineNullspace,*importer,Xpetra::INSERT);
513 
514  // Pull out the nullspace vectors so that we can have random access.
515  ArrayRCP< ArrayRCP<const SC> > fineNS(NSDim);
516  for (size_t i=0; i<NSDim; ++i)
517  fineNS[i] = fineNullspaceWithOverlap->getData(i);
518 
519  //Allocate storage for the coarse nullspace.
520  coarseNullspace = MultiVectorFactory::Build(coarseMap, NSDim);
521 
522  ArrayRCP< ArrayRCP<SC> > coarseNS(NSDim);
523  for (size_t i=0; i<NSDim; ++i)
524  if (coarseMap->getNodeNumElements() > 0) coarseNS[i] = coarseNullspace->getDataNonConst(i);
525 
526  //This makes the rowmap of Ptent the same as that of A->
527  //This requires moving some parts of some local Q's to other processors
528  //because aggregates can span processors.
529  RCP<const Map > rowMapForPtent = A->getRowMap();
530  const Map& rowMapForPtentRef = *rowMapForPtent;
531 
532  // Set up storage for the rows of the local Qs that belong to other processors.
533  // FIXME This is inefficient and could be done within the main loop below with std::vector's.
534  RCP<const Map> colMap = A->getColMap();
535 
536  RCP<const Map > ghostQMap;
537  RCP<MultiVector> ghostQvalues;
538  Array<RCP<Xpetra::Vector<GO,LO,GO,Node> > > ghostQcolumns;
539  RCP<Xpetra::Vector<GO,LO,GO,Node> > ghostQrowNums;
540  ArrayRCP< ArrayRCP<SC> > ghostQvals;
541  ArrayRCP< ArrayRCP<GO> > ghostQcols;
542  ArrayRCP< GO > ghostQrows;
543 
544  Array<GO> ghostGIDs;
545  for (LO j=0; j<numAggs; ++j) {
546  for (LO k=aggStart[j]; k<aggStart[j+1]; ++k) {
547  if (rowMapForPtentRef.isNodeGlobalElement(aggToRowMap[k]) == false) {
548  ghostGIDs.push_back(aggToRowMap[k]);
549  }
550  }
551  }
552  ghostQMap = MapFactory::Build(A->getRowMap()->lib(),
553  Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
554  ghostGIDs,
555  indexBase, A->getRowMap()->getComm()); //JG:Xpetra::global_size_t>?
556  //Vector to hold bits of Q that go to other processors.
557  ghostQvalues = MultiVectorFactory::Build(ghostQMap,NSDim);
558  //Note that Epetra does not support MultiVectors templated on Scalar != double.
559  //So to work around this, we allocate an array of Vectors. This shouldn't be too
560  //expensive, as the number of Vectors is NSDim.
561  ghostQcolumns.resize(NSDim);
562  for (size_t i=0; i<NSDim; ++i)
563  ghostQcolumns[i] = Xpetra::VectorFactory<GO,LO,GO,Node>::Build(ghostQMap);
564  ghostQrowNums = Xpetra::VectorFactory<GO,LO,GO,Node>::Build(ghostQMap);
565  if (ghostQvalues->getLocalLength() > 0) {
566  ghostQvals.resize(NSDim);
567  ghostQcols.resize(NSDim);
568  for (size_t i=0; i<NSDim; ++i) {
569  ghostQvals[i] = ghostQvalues->getDataNonConst(i);
570  ghostQcols[i] = ghostQcolumns[i]->getDataNonConst(0);
571  }
572  ghostQrows = ghostQrowNums->getDataNonConst(0);
573  }
574 
575  //importer to handle moving Q
576  importer = ImportFactory::Build(ghostQMap, A->getRowMap());
577 
578  // Dense QR solver
579  Teuchos::SerialQRDenseSolver<LO,SC> qrSolver;
580 
581  //Allocate temporary storage for the tentative prolongator.
582  Array<GO> globalColPtr(maxAggSize*NSDim,0);
583  Array<LO> localColPtr(maxAggSize*NSDim,0);
584  Array<SC> valPtr(maxAggSize*NSDim,0.);
585 
586  //Create column map for Ptent, estimate local #nonzeros in Ptent, and create Ptent itself.
587  const Map& coarseMapRef = *coarseMap;
588 
589  // For the 3-arrays constructor
590  ArrayRCP<size_t> ptent_rowptr;
591  ArrayRCP<LO> ptent_colind;
592  ArrayRCP<Scalar> ptent_values;
593 
594  // Because ArrayRCPs are slow...
595  ArrayView<size_t> rowptr_v;
596  ArrayView<LO> colind_v;
597  ArrayView<Scalar> values_v;
598 
599  // For temporary usage
600  Array<size_t> rowptr_temp;
601  Array<LO> colind_temp;
602  Array<Scalar> values_temp;
603 
604  RCP<CrsMatrix> PtentCrs;
605 
606  RCP<CrsMatrixWrap> PtentCrsWrap = rcp(new CrsMatrixWrap(rowMapForPtent, NSDim, Xpetra::StaticProfile));
607  PtentCrs = PtentCrsWrap->getCrsMatrix();
608  Ptentative = PtentCrsWrap;
609 
610  //*****************************************************************
611  //Loop over all aggregates and calculate local QR decompositions.
612  //*****************************************************************
613  GO qctr=0; //for indexing into Ptent data vectors
614  const Map& nonUniqueMapRef = *nonUniqueMap;
615 
616  size_t total_nnz_count=0;
617 
618  for (GO agg=0; agg<numAggs; ++agg)
619  {
620  LO myAggSize = aggStart[agg+1]-aggStart[agg];
621  // For each aggregate, extract the corresponding piece of the nullspace and put it in the flat array,
622  // "localQR" (in column major format) for the QR routine.
623  Teuchos::SerialDenseMatrix<LO,SC> localQR(myAggSize, NSDim);
624  for (size_t j=0; j<NSDim; ++j) {
625  bool bIsZeroNSColumn = true;
626  for (LO k=0; k<myAggSize; ++k)
627  {
628  // aggToRowMap[aggPtr[i]+k] is the kth DOF in the ith aggregate
629  // fineNS[j][n] is the nth entry in the jth NS vector
630  try{
631  SC nsVal = fineNS[j][ nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg]+k]) ]; // extract information from fine level NS
632  localQR(k,j) = nsVal;
633  if (nsVal != zero) bIsZeroNSColumn = false;
634  }
635  catch(...) {
636  GetOStream(Runtime1,-1) << "length of fine level nsp: " << fineNullspace->getGlobalLength() << std::endl;
637  GetOStream(Runtime1,-1) << "length of fine level nsp w overlap: " << fineNullspaceWithOverlap->getGlobalLength() << std::endl;
638  GetOStream(Runtime1,-1) << "(local?) aggId=" << agg << std::endl;
639  GetOStream(Runtime1,-1) << "aggSize=" << myAggSize << std::endl;
640  GetOStream(Runtime1,-1) << "agg DOF=" << k << std::endl;
641  GetOStream(Runtime1,-1) << "NS vector j=" << j << std::endl;
642  GetOStream(Runtime1,-1) << "j*myAggSize + k = " << j*myAggSize + k << std::endl;
643  GetOStream(Runtime1,-1) << "aggToRowMap["<<agg<<"][" << k << "] = " << aggToRowMap[aggStart[agg]+k] << std::endl;
644  GetOStream(Runtime1,-1) << "id aggToRowMap[agg][k]=" << aggToRowMap[aggStart[agg]+k] << " is global element in nonUniqueMap = " <<
645  nonUniqueMapRef.isNodeGlobalElement(aggToRowMap[aggStart[agg]+k]) << std::endl;
646  GetOStream(Runtime1,-1) << "colMap local id aggToRowMap[agg][k]=" << nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg]+k]) << std::endl;
647  GetOStream(Runtime1,-1) << "fineNS...=" << fineNS[j][ nonUniqueMapRef.getLocalElement(aggToRowMap[aggStart[agg]+k]) ] << std::endl;
648  GetOStream(Errors,-1) << "caught an error!" << std::endl;
649  }
650  } //for (LO k=0 ...
651  TEUCHOS_TEST_FOR_EXCEPTION(bIsZeroNSColumn == true, Exceptions::RuntimeError, "MueLu::TentativePFactory::MakeTentative: fine level NS part has a zero column. Error.");
652  } //for (LO j=0 ...
653 
654  Xpetra::global_size_t offset=agg*NSDim;
655 
656  if(myAggSize >= Teuchos::as<LocalOrdinal>(NSDim)) {
657  // calculate QR decomposition (standard)
658  // R is stored in localQR (size: myAggSize x NSDim)
659 
660  // Householder multiplier
661  SC tau = localQR(0,0);
662 
663  if (NSDim == 1) {
664  // Only one nullspace vector, so normalize by hand
665  Magnitude dtemp=0;
666  for (size_t k = 0; k < Teuchos::as<size_t>(myAggSize); ++k) {
667  Magnitude tmag = STS::magnitude(localQR(k,0));
668  dtemp += tmag*tmag;
669  }
670  dtemp = Teuchos::ScalarTraits<Magnitude>::squareroot(dtemp);
671  tau = localQR(0,0);
672  localQR(0,0) = dtemp;
673  } else {
674  qrSolver.setMatrix( Teuchos::rcp(&localQR, false) );
675  qrSolver.factor();
676  }
677 
678  // Extract R, the coarse nullspace. This is stored in upper triangular part of localQR.
679  // Note: coarseNS[i][.] is the ith coarse nullspace vector, which may be counter to your intuition.
680  // This stores the (offset+k)th entry only if it is local according to the coarseMap.
681  for (size_t j=0; j<NSDim; ++j) {
682  for (size_t k=0; k<=j; ++k) {
683  try {
684  if (coarseMapRef.isNodeLocalElement(offset+k)) {
685  coarseNS[j][offset+k] = localQR(k, j); //TODO is offset+k the correct local ID?!
686  }
687  }
688  catch(...) {
689  GetOStream(Errors,-1) << "caught error in coarseNS insert, j="<<j<<", offset+k = "<<offset+k<<std::endl;
690  }
691  }
692  }
693 
694  // Calculate Q, the tentative prolongator.
695  // The Lapack GEQRF call only works for myAggsize >= NSDim
696 
697  if (NSDim == 1) {
698  // Only one nullspace vector, so calculate Q by hand
699  Magnitude dtemp = Teuchos::ScalarTraits<SC>::magnitude(localQR(0,0));
700  localQR(0,0) = tau;
701  dtemp = 1 / dtemp;
702  for (LocalOrdinal i=0; i<myAggSize; ++i) {
703  localQR(i,0) *= dtemp ;
704  }
705  } else {
706  qrSolver.formQ();
707  Teuchos::RCP<Teuchos::SerialDenseMatrix<LO,SC> > qFactor = qrSolver.getQ();
708  for (size_t j=0; j<NSDim; j++) {
709  for (size_t i = 0; i < Teuchos::as<size_t>(myAggSize); i++) {
710  localQR(i,j) = (*qFactor)(i,j);
711  }
712  }
713  }
714 
715  // end default case (myAggSize >= NSDim)
716  } else { // special handling for myAggSize < NSDim (i.e. 1pt nodes)
717  // See comments for the uncoupled case
718 
719  // R = extended (by adding identity rows) localQR
720  for (size_t j = 0; j < NSDim; j++)
721  for (size_t k = 0; k < NSDim; k++) {
722  TEUCHOS_TEST_FOR_EXCEPTION(!coarseMapRef.isNodeLocalElement(offset+k), Exceptions::RuntimeError,
723  "Caught error in coarseNS insert, j=" << j << ", offset+k = " << offset+k);
724 
725  if (k < as<size_t>(myAggSize))
726  coarseNS[j][offset+k] = localQR(k,j);
727  else
728  coarseNS[j][offset+k] = (k == j ? one : zero);
729  }
730 
731  // Q = I (rectangular)
732  for (size_t i = 0; i < as<size_t>(myAggSize); i++)
733  for (size_t j = 0; j < NSDim; j++)
734  localQR(i,j) = (j == i ? one : zero);
735  } // end else (special handling for 1pt aggregates)
736 
737  //Process each row in the local Q factor. If the row is local to the current processor
738  //according to the rowmap, insert it into Ptentative. Otherwise, save it in ghostQ
739  //to be communicated later to the owning processor.
740  //FIXME -- what happens if maps are blocked?
741  for (GO j=0; j<myAggSize; ++j) {
742  //This loop checks whether row associated with current DOF is local, according to rowMapForPtent.
743  //If it is, the row is inserted. If not, the row number, columns, and values are saved in
744  //MultiVectors that will be sent to other processors.
745  GO globalRow = aggToRowMap[aggStart[agg]+j];
746 
747  //TODO is the use of Xpetra::global_size_t below correct?
748  if (rowMapForPtentRef.isNodeGlobalElement(globalRow) == false ) {
749  ghostQrows[qctr] = globalRow;
750  for (size_t k=0; k<NSDim; ++k) {
751  ghostQcols[k][qctr] = coarseMapRef.getGlobalElement(agg*NSDim+k);
752  ghostQvals[k][qctr] = localQR(j,k);
753  }
754  ++qctr;
755  } else {
756  size_t nnz=0;
757  for (size_t k=0; k<NSDim; ++k) {
758  try{
759  if (localQR(j,k) != Teuchos::ScalarTraits<SC>::zero()) {
760  localColPtr[nnz] = agg * NSDim + k;
761  globalColPtr[nnz] = coarseMapRef.getGlobalElement(localColPtr[nnz]);
762  valPtr[nnz] = localQR(j,k);
763  ++total_nnz_count;
764  ++nnz;
765  }
766  }
767  catch(...) {
768  GetOStream(Errors,-1) << "caught error in colPtr/valPtr insert, current index="<<nnz<<std::endl;
769  }
770  } //for (size_t k=0; k<NSDim; ++k)
771 
772  try{
773  Ptentative->insertGlobalValues(globalRow,globalColPtr.view(0,nnz),valPtr.view(0,nnz));
774  }
775  catch(...) {
776  GetOStream(Errors,-1) << "pid " << A->getRowMap()->getComm()->getRank()
777  << "caught error during Ptent row insertion, global row "
778  << globalRow << std::endl;
779  }
780  }
781  } //for (GO j=0; j<myAggSize; ++j)
782 
783  } // for (LO agg=0; agg<numAggs; ++agg)
784 
785 
786  // ***********************************************************
787  // ************* end of aggregate-wise QR ********************
788  // ***********************************************************
789  GetOStream(Runtime1) << "TentativePFactory : aggregates may cross process boundaries" << std::endl;
790  // Import ghost parts of Q factors and insert into Ptentative.
791  // First import just the global row numbers.
792  RCP<Xpetra::Vector<GO,LO,GO,Node> > targetQrowNums = Xpetra::VectorFactory<GO,LO,GO,Node>::Build(rowMapForPtent);
793  targetQrowNums->putScalar(-1);
794  targetQrowNums->doImport(*ghostQrowNums,*importer,Xpetra::INSERT);
795  ArrayRCP< GO > targetQrows = targetQrowNums->getDataNonConst(0);
796 
797  // Now create map based on just the row numbers imported.
798  Array<GO> gidsToImport;
799  gidsToImport.reserve(targetQrows.size());
800  for (typename ArrayRCP<GO>::iterator r=targetQrows.begin(); r!=targetQrows.end(); ++r) {
801  if (*r > -1) {
802  gidsToImport.push_back(*r);
803  }
804  }
805  RCP<const Map > reducedMap = MapFactory::Build( A->getRowMap()->lib(),
806  Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
807  gidsToImport, indexBase, A->getRowMap()->getComm() );
808 
809  // Import using the row numbers that this processor will receive.
810  importer = ImportFactory::Build(ghostQMap, reducedMap);
811 
812  Array<RCP<Xpetra::Vector<GO,LO,GO,Node> > > targetQcolumns(NSDim);
813  for (size_t i=0; i<NSDim; ++i) {
814  targetQcolumns[i] = Xpetra::VectorFactory<GO,LO,GO,Node>::Build(reducedMap);
815  targetQcolumns[i]->doImport(*(ghostQcolumns[i]),*importer,Xpetra::INSERT);
816  }
817  RCP<MultiVector> targetQvalues = MultiVectorFactory::Build(reducedMap,NSDim);
818  targetQvalues->doImport(*ghostQvalues,*importer,Xpetra::INSERT);
819 
820  ArrayRCP< ArrayRCP<SC> > targetQvals;
821  ArrayRCP<ArrayRCP<GO> > targetQcols;
822  if (targetQvalues->getLocalLength() > 0) {
823  targetQvals.resize(NSDim);
824  targetQcols.resize(NSDim);
825  for (size_t i=0; i<NSDim; ++i) {
826  targetQvals[i] = targetQvalues->getDataNonConst(i);
827  targetQcols[i] = targetQcolumns[i]->getDataNonConst(0);
828  }
829  }
830 
831  valPtr = Array<SC>(NSDim,0.);
832  globalColPtr = Array<GO>(NSDim,0);
833  for (typename Array<GO>::iterator r=gidsToImport.begin(); r!=gidsToImport.end(); ++r) {
834  if (targetQvalues->getLocalLength() > 0) {
835  for (size_t j=0; j<NSDim; ++j) {
836  valPtr[j] = targetQvals[j][reducedMap->getLocalElement(*r)];
837  globalColPtr[j] = targetQcols[j][reducedMap->getLocalElement(*r)];
838  }
839  Ptentative->insertGlobalValues(*r, globalColPtr.view(0,NSDim), valPtr.view(0,NSDim));
840  } //if (targetQvalues->getLocalLength() > 0)
841  }
842 
843  Ptentative->fillComplete(coarseMap, A->getDomainMap());
844  }
845 
846  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
847  bool TentativePFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::isGoodMap(const Map& rowMap, const Map& colMap) const {
848  ArrayView<const GO> rowElements = rowMap.getNodeElementList();
849  ArrayView<const GO> colElements = colMap.getNodeElementList();
850 
851  const size_t numElements = rowElements.size();
852 
853  bool goodMap = true;
854  for (size_t i = 0; i < numElements; i++)
855  if (rowElements[i] != colElements[i]) {
856  goodMap = false;
857  break;
858  }
859 
860  return goodMap;
861  }
862 
863 } //namespace MueLu
864 
865 // TODO ReUse: If only P or Nullspace is missing, TentativePFactory can be smart and skip part of the computation.
866 
867 #define MUELU_TENTATIVEPFACTORY_SHORT
868 #endif // MUELU_TENTATIVEPFACTORY_DEF_HPP
Important warning messages (one line)
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
Timer to be used in factories. Similar to Monitor but with additional timers.
Print more statistics.
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
Namespace for MueLu classes and methods.
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:76
#define SET_VALID_ENTRY(name)
void BuildPcoupled(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace) const
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
void BuildPuncoupled(RCP< Matrix > A, RCP< Aggregates > aggregates, RCP< AmalgamationInfo > amalgInfo, RCP< MultiVector > fineNullspace, RCP< const Map > coarseMap, RCP< Matrix > &Ptentative, RCP< MultiVector > &coarseNullspace, const int levelID) const
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
Exception throws to report errors in the internal logical of the program.
Description of what is happening (more verbose)
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
bool isGoodMap(const Map &rowMap, const Map &colMap) const