MueLu  Version of the Day
MueLu_SmooVecCoalesceDropFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #include "MueLu_SmootherBase.hpp"
47 #ifndef MUELU_SMOOVECCOALESCEDROPFACTORY_DEF_HPP
48 #define MUELU_SMOOVECCOALESCEDROPFACTORY_DEF_HPP
49 
50 #include <Xpetra_CrsGraphFactory.hpp>
51 #include <Xpetra_CrsGraph.hpp>
52 #include <Xpetra_ImportFactory.hpp>
53 #include <Xpetra_MapFactory.hpp>
54 #include <Xpetra_Map.hpp>
55 #include <Xpetra_Matrix.hpp>
56 #include <Xpetra_MultiVectorFactory.hpp>
57 #include <Xpetra_MultiVector.hpp>
58 #include <Xpetra_StridedMap.hpp>
59 #include <Xpetra_VectorFactory.hpp>
60 #include <Xpetra_Vector.hpp>
61 
63 
64 #include "MueLu_AmalgamationFactory.hpp"
65 #include "MueLu_AmalgamationInfo.hpp"
66 #include "MueLu_Exceptions.hpp"
67 #include "MueLu_GraphBase.hpp"
68 #include "MueLu_Graph.hpp"
69 #include "MueLu_Level.hpp"
70 #include "MueLu_LWGraph.hpp"
71 #include "MueLu_MasterList.hpp"
72 #include "MueLu_Monitor.hpp"
73 #include "MueLu_PreDropFunctionBaseClass.hpp"
74 #include "MueLu_PreDropFunctionConstVal.hpp"
75 #include "MueLu_Utilities.hpp"
76 #include "MueLu_TopSmootherFactory.hpp"
78 #include "MueLu_SmootherFactory.hpp"
79 
80 
81 #include <Xpetra_IO.hpp>
82 
83 #include <algorithm>
84 #include <cstdlib>
85 #include <string>
86 
87 // If defined, read environment variables.
88 // Should be removed once we are confident that this works.
89 // #define DJS_READ_ENV_VARIABLES
90 
91 #include <stdio.h>
92 #include <stdlib.h>
93 #include <math.h>
94 
95 
96 #define poly0thOrderCoef 0
97 #define poly1stOrderCoef 1
98 #define poly2ndOrderCoef 2
99 #define poly3rdOrderCoef 3
100 #define poly4thOrderCoef 4
101 
102 namespace MueLu {
103 
104  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
106  RCP<ParameterList> validParamList = rcp(new ParameterList());
107 
108 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
109  SET_VALID_ENTRY("aggregation: drop scheme");
110  {
111  typedef Teuchos::StringToIntegralParameterEntryValidator<int> validatorType;
112  validParamList->getEntry("aggregation: drop scheme").setValidator(
113  rcp(new validatorType(Teuchos::tuple<std::string>("unsupported vector smoothing"), "aggregation: drop scheme")));
114  }
115  SET_VALID_ENTRY("aggregation: number of random vectors");
116  SET_VALID_ENTRY("aggregation: number of times to pre or post smooth");
117  SET_VALID_ENTRY("aggregation: penalty parameters");
118 #undef SET_VALID_ENTRY
119 
120  validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
121  validParamList->set< RCP<const FactoryBase> >("PreSmoother", Teuchos::null, "Generating factory of the PreSmoother");
122  validParamList->set< RCP<const FactoryBase> >("PostSmoother", Teuchos::null, "Generating factory of the PostSmoother");
123 
124  return validParamList;
125  }
126 
127  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
129 
130  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
132  Input(currentLevel, "A");
133  if (currentLevel.IsAvailable("PreSmoother")) { // rst: totally unsure that this is legal
134  Input(currentLevel, "PreSmoother"); // my guess is that this is not yet available
135  } // so this always comes out false.
136  else if (currentLevel.IsAvailable("PostSmoother")) { // perhaps we can look on the param list?
137  Input(currentLevel, "PostSmoother");
138  }
139  }
140 
141  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
143 
144  FactoryMonitor m(*this, "Build", currentLevel);
145 
146  typedef Teuchos::ScalarTraits<SC> STS;
147 
148  if (predrop_ != Teuchos::null)
149  GetOStream(Parameters0) << predrop_->description();
150 
151  RCP<Matrix> A = Get< RCP<Matrix> >(currentLevel, "A");
152 
153  const ParameterList & pL = GetParameterList();
154 
155  LO nPDEs = A->GetFixedBlockSize();
156 
157  RCP< MultiVector > testVecs;
158  RCP< MultiVector > nearNull;
159 
160 #ifdef takeOut
161  testVecs = Xpetra::IO<SC,LO,GO,Node>::ReadMultiVector("TpetraTVecs.mm", A->getRowMap());
162 #endif
163  size_t numRandom= as<size_t>(pL.get<int>("aggregation: number of random vectors"));
164  testVecs = MultiVectorFactory::Build(A->getRowMap(), numRandom, true);
165  // use random test vectors but should be positive in order to not get
166  // crummy results ... so take abs() of randomize().
167  testVecs->randomize();
168  for (size_t kk = 0; kk < testVecs->getNumVectors(); kk++ ) {
169  Teuchos::ArrayRCP< Scalar > curVec = testVecs->getDataNonConst(kk);
170  for (size_t ii = kk; ii < as<size_t>(A->getRowMap()->getNodeNumElements()); ii++ ) curVec[ii] = Teuchos::ScalarTraits<SC>::magnitude(curVec[ii]);
171  }
172  nearNull = MultiVectorFactory::Build(A->getRowMap(), nPDEs, true);
173 
174  // initialize null space to constants
175  for (size_t kk = 0; kk < nearNull->getNumVectors(); kk++ ) {
176  Teuchos::ArrayRCP< Scalar > curVec = nearNull->getDataNonConst(kk);
177  for (size_t ii = kk; ii < as<size_t>(A->getRowMap()->getNodeNumElements()); ii += nearNull->getNumVectors() ) curVec[ii] = Teuchos::ScalarTraits<Scalar>::one();
178  }
179 
180  RCP< MultiVector > zeroVec_TVecs;
181  RCP< MultiVector > zeroVec_Null;
182 
183  zeroVec_TVecs = MultiVectorFactory::Build(A->getRowMap(), testVecs->getNumVectors(), true);
184  zeroVec_Null = MultiVectorFactory::Build(A->getRowMap(), nPDEs, true);
185  zeroVec_TVecs->putScalar(Teuchos::ScalarTraits<Scalar>::zero());
186  zeroVec_Null->putScalar( Teuchos::ScalarTraits<Scalar>::zero());
187 
188  size_t nInvokeSmoother=as<size_t>(pL.get<int>("aggregation: number of times to pre or post smooth"));
189  if (currentLevel.IsAvailable("PreSmoother")) {
190  RCP<SmootherBase> preSmoo = currentLevel.Get< RCP<SmootherBase> >("PreSmoother");
191  for (size_t ii = 0; ii < nInvokeSmoother; ii++) preSmoo->Apply(*testVecs,*zeroVec_TVecs,false);
192  for (size_t ii = 0; ii < nInvokeSmoother; ii++) preSmoo->Apply(*nearNull,*zeroVec_Null,false);
193  }
194  else if (currentLevel.IsAvailable("PostSmoother")) {
195  RCP<SmootherBase> postSmoo = currentLevel.Get< RCP<SmootherBase> >("PostSmoother");
196  for (size_t ii = 0; ii < nInvokeSmoother; ii++) postSmoo->Apply(*testVecs,*zeroVec_TVecs,false);
197  for (size_t ii = 0; ii < nInvokeSmoother; ii++) postSmoo->Apply(*nearNull, *zeroVec_Null,false);
198  }
199  else
200  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Must set a smoother");
201 
202  Teuchos::ArrayRCP<Scalar> penaltyPolyCoef(5);
203  Teuchos::ArrayView<const double> inputPolyCoef;
204 
205  penaltyPolyCoef[poly0thOrderCoef] = 12.;
206  penaltyPolyCoef[poly1stOrderCoef] = -.2;
207  penaltyPolyCoef[poly2ndOrderCoef] = 0.0;
208  penaltyPolyCoef[poly3rdOrderCoef] = 0.0;
209  penaltyPolyCoef[poly4thOrderCoef] = 0.0;
210 
211  if(pL.isParameter("aggregation: penalty parameters") && pL.get<Teuchos::Array<double> >("aggregation: penalty parameters").size() > 0) {
212  if (pL.get<Teuchos::Array<double> >("aggregation: penalty parameters").size() > penaltyPolyCoef.size())
213  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Number of penalty parameters must be " << penaltyPolyCoef.size() << " or less");
214  inputPolyCoef = pL.get<Teuchos::Array<double> >("aggregation: penalty parameters")();
215 
216  for (size_t i = 0; i < as<size_t>(inputPolyCoef.size()) ; i++) penaltyPolyCoef[i] = as<Scalar>(inputPolyCoef[i]);
217  for (size_t i = as<size_t>(inputPolyCoef.size()); i < as<size_t>(penaltyPolyCoef.size()); i++) penaltyPolyCoef[i] = Teuchos::ScalarTraits<Scalar>::zero();
218  }
219 
220 
221  RCP<GraphBase> filteredGraph;
222  badGuysCoalesceDrop(*A, penaltyPolyCoef, nPDEs, *testVecs, *nearNull, filteredGraph);
223 
224 
225 #ifdef takeOut
226  /* write out graph for serial debugging purposes only. */
227 
228  FILE* fp = fopen("codeOutput","w");
229  fprintf(fp,"%d %d %d\n",(int) filteredGraph->GetNodeNumVertices(),(int) filteredGraph->GetNodeNumVertices(),
230  (int) filteredGraph->GetNodeNumEdges());
231  for (size_t i = 0; i < filteredGraph->GetNodeNumVertices(); i++) {
232  ArrayView<const LO> inds = filteredGraph->getNeighborVertices(as<LO>(i));
233  for (size_t j = 0; j < as<size_t>(inds.size()); j++) {
234  fprintf(fp,"%d %d 1.00e+00\n",(int) i+1,(int) inds[j]+1);
235  }
236  }
237  fclose(fp);
238 #endif
239 
240  SC threshold = .01;
241  Set<bool>(currentLevel, "Filtering", (threshold != STS::zero()));
242  Set(currentLevel, "Graph", filteredGraph);
243  Set(currentLevel, "DofsPerNode", 1);
244 
245  } //Build
246 
247  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
248  void SmooVecCoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::badGuysCoalesceDrop(const Matrix& Amat, Teuchos::ArrayRCP<Scalar> & penaltyPolyCoef , LO nPDEs, const MultiVector& testVecs, const MultiVector& nearNull, RCP<GraphBase>& filteredGraph) const {
249  /*
250  * Compute coalesce/drop graph (in filteredGraph) for A. The basic idea is to
251  * balance trade-offs associated with
252  *
253  * (I - P inv(R P) R ) testVecs
254  *
255  * being worse for larger aggregates (less dropping) while MG cycle costs are
256  * cheaper with larger aggregates. MG costs are "penalties" in the
257  * optimization while (I - P inv(R P) R ) is the "fit" (how well a
258  * a fine grid function can be approximated on the coarse grid).
259  *
260  * For MG costs, we don't actually approximate the cost. Instead, we
261  * have just hardwired penalties below. Specifically,
262  *
263  * penalties[j] is the cost if aggregates are of size j+1, where right
264  * now a linear function of the form const*(60-j) is used.
265  *
266  * (I - P inv(P^T P) P^T ) testVecs is estimated by just looking locally at
267  * the vector portion corresponding to a possible aggregate defined by
268  * all non-dropped connections in the ith row. A tentative prolognator is
269  * used for P. This prolongator corresponds to a null space vector given
270  * by 'nearNull', which is provided to dropper(). In initial testing, nearNull is
271  * first set as a vector of all 1's and then smoothed with a relaxation
272  * method applied to a nice matrix (with the same sparsity pattern as A).
273  * Originally, nearNull was used to handle Dir bcs where relaxation of the
274  * vector of 1's has a more pronounced effect.
275  *
276  * For PDE systems, fit only considers the same dof at each node. That is,
277  * it effectively assumes that we have a tentative prolongator with no
278  * coupling between different dof types. When checking the fit for the kth
279  * dof at a paritcular node, it only considers the kth dof of this node
280  * and neighboring nodes.
281  *
282  * Note: testVecs is supplied by the user, but normally is the result of
283  * applying a relaxation scheme to Au = 0 where u is initial random.
284  */
285 
286  GO numMyNnz = Teuchos::as<GO>(Amat.getNodeNumEntries());
287  size_t nLoc = Amat.getRowMap()->getNodeNumElements();
288 
289  size_t nBlks = nLoc/nPDEs;
290  if (nBlks*nPDEs != nLoc )
291  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Number of local dofs not divisible by BlkSize");
292 
293  Teuchos::ArrayRCP<LO> newRowPtr(nBlks+1); /* coalesce & drop matrix */
294  Teuchos::ArrayRCP<LO> newCols(numMyNnz); /* arrays */
295 
296  Teuchos::ArrayRCP<LO> bcols(nBlks); /* returned by dropfun(j,...) */
297  Teuchos::ArrayRCP<bool> keepOrNot(nBlks); /* gives cols for jth row and */
298  /* whether or not entry is */
299  /* kept or dropped. */
300 
301  LO maxNzPerRow = 200;
302  Teuchos::ArrayRCP<Scalar> penalties(maxNzPerRow); /* Penalty function */
303  /* described above. */
304 
305  Teuchos::ArrayRCP<bool> keepStatus(nBlks,true); /* accumulated keepOrNot info */
306  Teuchos::ArrayRCP<LO> bColList(nBlks); /* accumulated bcols info */
307  /* for an entire block as */
308  /* opposed to a single row */
309  /* Additionally, keepOrNot[j] */
310  /* refers to status of jth */
311  /* entry in a row while */
312  /* keepStatus[j] refers to */
313  /* whether the jth block is */
314  /* kept within the block row. */
315 
316  Teuchos::ArrayRCP<bool> alreadyOnBColList(nBlks,false); /* used to avoid recording the*/
317  /* same block column when */
318  /* processing different pt */
319  /* rows within a block. */
320 
321  Teuchos::ArrayRCP<bool> boundaryNodes(nBlks,false);
322 
323 
324  for (LO i = 0; i < maxNzPerRow; i++)
325  penalties[i] = penaltyPolyCoef[poly0thOrderCoef] +
326  penaltyPolyCoef[poly1stOrderCoef]*(as<Scalar>(i)) +
327  penaltyPolyCoef[poly2ndOrderCoef]*(as<Scalar>(i*i)) +
328  (penaltyPolyCoef[poly3rdOrderCoef]*(as<Scalar>(i*i))*(as<Scalar>(i))) + //perhaps avoids overflow?
329  (penaltyPolyCoef[poly4thOrderCoef]*(as<Scalar>(i*i))*(as<Scalar>(i*i)));
330 
331  LO nzTotal = 0, numBCols = 0, row = -1, Nbcols, bcol;
332  newRowPtr[0] = 0;
333 
334  /* proceed block by block */
335  for (LO i = 0; i < as<LO>(nBlks); i++) {
336  newRowPtr[i+1] = newRowPtr[i];
337  for (LO j = 0; j < nPDEs; j++) {
338  row = row + 1;
339 
340  Teuchos::ArrayView<const LocalOrdinal> indices;
341  Teuchos::ArrayView<const Scalar> vals;
342 
343  Amat.getLocalRowView(row, indices, vals);
344 
345  if (indices.size() > maxNzPerRow) {
346  LO oldSize = maxNzPerRow;
347  maxNzPerRow = indices.size() + 100;
348  penalties.resize(as<size_t>(maxNzPerRow),0.0);
349  for (LO k = oldSize; k < maxNzPerRow; k++)
350  penalties[k] = penaltyPolyCoef[poly0thOrderCoef] +
351  penaltyPolyCoef[poly1stOrderCoef]*(as<Scalar>(i)) +
352  penaltyPolyCoef[poly2ndOrderCoef]*(as<Scalar>(i*i)) +
353  (penaltyPolyCoef[poly3rdOrderCoef]*(as<Scalar>(i*i))*(as<Scalar>(i))) +
354  (penaltyPolyCoef[poly4thOrderCoef]*(as<Scalar>(i*i))*(as<Scalar>(i*i)));
355  }
356  badGuysDropfunc(row, indices, vals, testVecs, nPDEs, penalties, nearNull, bcols,keepOrNot,Nbcols,nLoc);
357  for (LO k=0; k < Nbcols; k++) {
358  bcol = bcols[k];
359 
360  /* add to bColList if not already on it */
361 
362  if (alreadyOnBColList[bcol] == false) {/* for PDE systems only record */
363  bColList[numBCols++] = bcol; /* neighboring block one time */
364  alreadyOnBColList[bcol] = true;
365  }
366  /* drop if any pt row within block indicates entry should be dropped */
367 
368  if (keepOrNot[k] == false) keepStatus[bcol] = false;
369 
370  } /* for (k=0; k < Nbcols; k++) */
371  } /* for (j = 0; i < nPDEs; j++) */
372 
373  /* finished with block row. Now record block entries that we keep */
374  /* and reset keepStatus, bColList, and alreadyOnBColList. */
375 
376  if ( numBCols < 2) boundaryNodes[i] = true;
377  for (LO j=0; j < numBCols; j++) {
378  bcol = bColList[j];
379  if (keepStatus[bcol] == true) {
380  newCols[nzTotal] = bColList[j];
381  newRowPtr[i+1]++;
382  nzTotal = nzTotal + 1;
383  }
384  keepStatus[bcol] = true;
385  alreadyOnBColList[bcol] = false;
386  bColList[j] = 0;
387  }
388  numBCols = 0;
389  } /* for (i = 0; i < nBlks; i++) */
390 
391  /* create array of the correct size and copy over newCols to it */
392 
393  Teuchos::ArrayRCP<LO> finalCols(nzTotal);
394  for (LO i = 0; i < nzTotal; i++) finalCols[i] = newCols[i];
395 
396  // Not using column map because we do not allow for any off-proc stuff.
397  // Not sure if this is okay. FIXME
398 
399  RCP<const Map> rowMap = Amat.getRowMap(); // , colMap = Amat.getColMap();
400 
401  LO nAmalgNodesOnProc = rowMap->getNodeNumElements()/nPDEs;
402  Teuchos::Array<GO> nodalGIDs(nAmalgNodesOnProc);
403  typename Teuchos::ScalarTraits<Scalar>::coordinateType temp;
404  for (size_t i = 0; i < as<size_t>(nAmalgNodesOnProc); i++ ) {
405  GO gid = rowMap->getGlobalElement(i*nPDEs);
406  temp = ((typename Teuchos::ScalarTraits<Scalar>::coordinateType) (gid))/((typename Teuchos::ScalarTraits<Scalar>::coordinateType) (nPDEs));
407  nodalGIDs[i] = as<GO>(floor(temp));
408  }
409  GO nAmalgNodesGlobal = rowMap->getGlobalNumElements();
410  GO nBlkGlobal = nAmalgNodesGlobal/nPDEs;
411  if (nBlkGlobal*nPDEs != nAmalgNodesGlobal)
412  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "Number of global dofs not divisible by BlkSize");
413 
414  Teuchos::RCP<Map> AmalgRowMap = MapFactory::Build(rowMap->lib(), nBlkGlobal,
415  nodalGIDs(),0,rowMap->getComm());
416 
417  filteredGraph = rcp(new LWGraph(newRowPtr, finalCols, AmalgRowMap, AmalgRowMap, "thresholded graph of A"));
418  filteredGraph->SetBoundaryNodeMap(boundaryNodes);
419 
420  }
421 
422  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
423  void SmooVecCoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::badGuysDropfunc(LO row, const Teuchos::ArrayView<const LocalOrdinal>& cols, const Teuchos::ArrayView<const Scalar>& vals, const MultiVector& testVecs, LO nPDEs, Teuchos::ArrayRCP<Scalar> & penalties, const MultiVector& nearNull, Teuchos::ArrayRCP<LO>& Bcols, Teuchos::ArrayRCP<bool>& keepOrNot, LO &Nbcols, LO nLoc) const {
424 
425  LO nLeng = cols.size();
426  typename Teuchos::ScalarTraits<Scalar>::coordinateType temp;
427  temp = ((typename Teuchos::ScalarTraits<Scalar>::coordinateType) (row))/((typename Teuchos::ScalarTraits<Scalar>::coordinateType) (nPDEs));
428  LO blkRow = as<LO>(floor(temp));
429  Teuchos::ArrayRCP<Scalar> badGuy( nLeng, 0.0);
430  Teuchos::ArrayRCP<Scalar> subNull(nLeng, 0.0); /* subset of nearNull */
431  /* associated with current */
432  /* dof within node. */
433 
434  /* Only consider testVecs associated with same dof & on processor. Further */
435  /* collapse testVecs to a single badGuy vector by basically taking the worst */
436  /* (least smooth) values for each of the off diags. In particular, we look at*/
437  /* the ratio of each off-diag test value / diag test value and compare this */
438  /* with the nearNull vector ratio. The further the testVec ratio is from the */
439  /* nearNull ratio, the harder is will be to accurately interpolate is these */
440  /* two guys are aggregated. So, the biggest ratio mismatch is used to choose */
441  /* the testVec entry associated with each off-diagonal entry. */
442 
443 
444  for (LO i = 0; i < nLeng; i++) keepOrNot[i] = false;
445 
446  LO diagInd = -1;
447  Nbcols = 0;
448  LO rowDof = row - blkRow*nPDEs;
449  Teuchos::ArrayRCP< const Scalar > oneNull = nearNull.getData( as<size_t>(rowDof));
450 
451  for (LO i = 0; i < nLeng; i++) {
452  if ((cols[i] < nLoc ) && (vals[i] != 0.0)) { /* on processor */
453  temp = ((typename Teuchos::ScalarTraits<Scalar>::coordinateType) (cols[i]))/((typename Teuchos::ScalarTraits<Scalar>::coordinateType) (nPDEs));
454  LO colDof = cols[i] - (as<LO>(floor( temp )))*nPDEs;
455  if (colDof == rowDof) { /* same dof within node as row */
456  Bcols[ Nbcols] = (cols[i] - colDof)/nPDEs;
457  subNull[Nbcols] = oneNull[cols[i]];
458 
459  if (cols[i] != row) { /* not diagonal */
460  Scalar worstRatio = -1.0;
461  Scalar targetRatio = subNull[Nbcols]/oneNull[row];
462  Scalar actualRatio;
463  for (size_t kk = 0; kk < testVecs.getNumVectors(); kk++ ) {
464  Teuchos::ArrayRCP< const Scalar > curVec = testVecs.getData(kk);
465  actualRatio = curVec[cols[i]]/curVec[row];
466  if (Teuchos::ScalarTraits<SC>::magnitude(actualRatio - targetRatio) > Teuchos::ScalarTraits<SC>::magnitude(worstRatio)) {
467  badGuy[Nbcols] = actualRatio;
468  worstRatio = Teuchos::ScalarTraits<SC>::magnitude(actualRatio - targetRatio);
469  }
470  }
471  }
472  else {
473  badGuy[ Nbcols] = 1.;
474  keepOrNot[Nbcols] = true;
475  diagInd = Nbcols;
476  }
477  (Nbcols)++;
478  }
479  }
480  }
481 
482  /* Make sure that diagonal entry is in block col list */
483 
484  if (diagInd == -1) {
485  Bcols[ Nbcols] = (row - rowDof)/nPDEs;
486  subNull[ Nbcols] = 1.;
487  badGuy[ Nbcols] = 1.;
488  keepOrNot[Nbcols] = true;
489  diagInd = Nbcols;
490  (Nbcols)++;
491  }
492 
493  Scalar currentRP = oneNull[row]*oneNull[row];
494  Scalar currentRTimesBadGuy= oneNull[row]*badGuy[diagInd];
495  Scalar currentScore = penalties[0]; /* (I - P inv(R*P)*R )=0 for size */
496  /* size 1 agg, so fit is perfect */
497 
498  /* starting from a set that only includes the diagonal entry consider adding */
499  /* one off-diagonal at a time until the fitValue exceeds the penalty term. */
500  /* Here, the fit value is (I - P inv(R P) R ) and we always consider the */
501  /* lowest fitValue that is not currently in the set. R and P correspond to */
502  /* a simple tentaive grid transfer associated with an aggregate that */
503  /* includes the diagonal, all already determined neighbors, and the potential*/
504  /* new neighbor */
505 
506  LO nKeep = 1, flag = 1, minId;
507  Scalar minFit, minFitRP = 0., minFitRTimesBadGuy = 0.;
508  Scalar newRP, newRTimesBadGuy;
509 
510  while (flag == 1) {
511  /* compute a fit for each possible off-diagonal neighbor */
512  /* that has not already been added as a neighbor */
513 
514  minFit = 1000000.;
515  minId = -1;
516 
517  for (LO i=0; i < Nbcols; i++) {
518  if (keepOrNot[i] == false) {
519  keepOrNot[i] = true; /* temporarily view i as non-dropped neighbor */
520  newRP = currentRP + subNull[i]*subNull[i];
521  newRTimesBadGuy= currentRTimesBadGuy + subNull[i]*badGuy[i];
522  Scalar ratio = newRTimesBadGuy/newRP;
523 
524  Scalar newFit = 0.0;
525  for (LO k=0; k < Nbcols; k++) {
526  if (keepOrNot[k] == true) {
527  Scalar diff = badGuy[k] - ratio*subNull[k];
528  newFit = newFit + diff*diff;
529  }
530  }
531  if (Teuchos::ScalarTraits<SC>::magnitude(newFit) < Teuchos::ScalarTraits<SC>::magnitude(minFit)) {
532  minId = i;
533  minFit = newFit;
534  minFitRP = newRP;
535  minFitRTimesBadGuy= newRTimesBadGuy;
536  }
537  keepOrNot[i] = false;
538  }
539  }
540  if (minId == -1) flag = 0;
541  else {
542  minFit = sqrt(minFit);
543  Scalar newScore = penalties[nKeep] + minFit;
544  if (Teuchos::ScalarTraits<SC>::magnitude(newScore) < Teuchos::ScalarTraits<SC>::magnitude(currentScore)) {
545  nKeep = nKeep + 1;
546  keepOrNot[minId]= true;
547  currentScore = newScore;
548  currentRP = minFitRP;
549  currentRTimesBadGuy= minFitRTimesBadGuy;
550  }
551  else flag = 0;
552  }
553  }
554  }
555 
556 } //namespace MueLu
557 
558 #endif // MUELU_SMOOVECCOALESCEDROPFACTORY_DEF_HPP
#define SET_VALID_ENTRY(name)
MueLu::DefaultScalar Scalar
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
Lightweight MueLu representation of a compressed row storage graph.
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access)....
void badGuysCoalesceDrop(const Matrix &Amat, Teuchos::ArrayRCP< Scalar > &dropParams, LO nPDEs, const MultiVector &smoothedTVecs, const MultiVector &smoothedNull, RCP< GraphBase > &filteredGraph) const
Methods to support compatible-relaxation style dropping.
void Build(Level &currentLevel) const
Build an object with this factory.
void badGuysDropfunc(LO row, const Teuchos::ArrayView< const LocalOrdinal > &indices, const Teuchos::ArrayView< const Scalar > &vals, const MultiVector &smoothedTVecs, LO nPDEs, Teuchos::ArrayRCP< Scalar > &penalties, const MultiVector &smoothedNull, Teuchos::ArrayRCP< LO > &Bcols, Teuchos::ArrayRCP< bool > &keepOrNot, LO &Nbcols, LO nLoc) const
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void DeclareInput(Level &currentLevel) const
Input.
Namespace for MueLu classes and methods.
@ Parameters0
Print class parameters.