MueLu  Version of the Day
MueLu_CoalesceDropFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_COALESCEDROPFACTORY_DEF_HPP
47 #define MUELU_COALESCEDROPFACTORY_DEF_HPP
48 
49 #include <Xpetra_CrsGraphFactory.hpp>
50 #include <Xpetra_CrsGraph.hpp>
51 #include <Xpetra_ImportFactory.hpp>
52 #include <Xpetra_ExportFactory.hpp>
53 #include <Xpetra_MapFactory.hpp>
54 #include <Xpetra_Map.hpp>
55 #include <Xpetra_Matrix.hpp>
56 #include <Xpetra_MultiVectorFactory.hpp>
57 #include <Xpetra_MultiVector.hpp>
58 #include <Xpetra_StridedMap.hpp>
59 #include <Xpetra_VectorFactory.hpp>
60 #include <Xpetra_Vector.hpp>
61 
62 #include <Xpetra_IO.hpp>
63 
65 
66 #include "MueLu_AmalgamationFactory.hpp"
67 #include "MueLu_AmalgamationInfo.hpp"
68 #include "MueLu_Exceptions.hpp"
69 #include "MueLu_GraphBase.hpp"
70 #include "MueLu_Graph.hpp"
71 #include "MueLu_Level.hpp"
72 #include "MueLu_LWGraph.hpp"
73 #include "MueLu_MasterList.hpp"
74 #include "MueLu_Monitor.hpp"
75 #include "MueLu_PreDropFunctionBaseClass.hpp"
76 #include "MueLu_PreDropFunctionConstVal.hpp"
77 #include "MueLu_Utilities.hpp"
78 
79 #ifdef HAVE_XPETRA_TPETRA
80 #include "Tpetra_CrsGraphTransposer.hpp"
81 #endif
82 
83 #include <algorithm>
84 #include <cstdlib>
85 #include <string>
86 
87 // If defined, read environment variables.
88 // Should be removed once we are confident that this works.
89 //#define DJS_READ_ENV_VARIABLES
90 
91 
92 namespace MueLu {
93 
94  namespace Details {
95  template<class real_type, class LO>
96  struct DropTol {
97 
98  DropTol() = default;
99  DropTol(DropTol const&) = default;
100  DropTol(DropTol &&) = default;
101 
102  DropTol& operator=(DropTol const&) = default;
103  DropTol& operator=(DropTol &&) = default;
104 
105  DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
106  : val{val_}, diag{diag_}, col{col_}, drop{drop_}
107  {}
108 
109  real_type val {Teuchos::ScalarTraits<real_type>::zero()};
110  real_type diag {Teuchos::ScalarTraits<real_type>::zero()};
111  LO col {Teuchos::OrdinalTraits<LO>::invalid()};
112  bool drop {true};
113 
114  // CMS: Auxillary information for debugging info
115  // real_type aux_val {Teuchos::ScalarTraits<real_type>::nan()};
116  };
117  }
118 
119 
120  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
122  RCP<ParameterList> validParamList = rcp(new ParameterList());
123 
124 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
125  SET_VALID_ENTRY("aggregation: drop tol");
126  SET_VALID_ENTRY("aggregation: Dirichlet threshold");
127  SET_VALID_ENTRY("aggregation: greedy Dirichlet");
128  SET_VALID_ENTRY("aggregation: row sum drop tol");
129  SET_VALID_ENTRY("aggregation: drop scheme");
130  SET_VALID_ENTRY("aggregation: block diagonal: interleaved blocksize");
131  SET_VALID_ENTRY("aggregation: distance laplacian directional weights");
132 
133  {
134  typedef Teuchos::StringToIntegralParameterEntryValidator<int> validatorType;
135  validParamList->getEntry("aggregation: drop scheme").setValidator(rcp(new validatorType(Teuchos::tuple<std::string>("classical", "distance laplacian","signed classical","block diagonal","block diagonal classical","block diagonal distance laplacian","block diagonal signed classical","block diagonal colored signed classical"), "aggregation: drop scheme")));
136 
137  }
138  SET_VALID_ENTRY("aggregation: distance laplacian algo");
139  SET_VALID_ENTRY("aggregation: classical algo");
140  SET_VALID_ENTRY("aggregation: coloring: localize color graph");
141 #undef SET_VALID_ENTRY
142  validParamList->set< bool > ("lightweight wrap", true, "Experimental option for lightweight graph access");
143 
144  validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
145  validParamList->set< RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo");
146  validParamList->set< RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory for Coordinates");
147  validParamList->set< RCP<const FactoryBase> >("BlockNumber", Teuchos::null, "Generating factory for BlockNUmber");
148 
149  return validParamList;
150  }
151 
152  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
154 
155  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
157  Input(currentLevel, "A");
158  Input(currentLevel, "UnAmalgamationInfo");
159 
160  const ParameterList& pL = GetParameterList();
161  if (pL.get<bool>("lightweight wrap") == true) {
162  std::string algo = pL.get<std::string>("aggregation: drop scheme");
163  if (algo == "distance laplacian" || algo == "block diagonal distance laplacian") {
164  Input(currentLevel, "Coordinates");
165  }
166  if (algo.find("block diagonal") != std::string::npos || algo.find("signed classical") != std::string::npos) {
167  Input(currentLevel, "BlockNumber");
168  }
169  }
170 
171  }
172 
173  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
175 
176  FactoryMonitor m(*this, "Build", currentLevel);
177 
178  typedef Teuchos::ScalarTraits<SC> STS;
179  typedef typename STS::magnitudeType real_type;
180  typedef Xpetra::MultiVector<real_type,LO,GO,NO> RealValuedMultiVector;
181  typedef Xpetra::MultiVectorFactory<real_type,LO,GO,NO> RealValuedMultiVectorFactory;
182 
183  if (predrop_ != Teuchos::null)
184  GetOStream(Parameters0) << predrop_->description();
185 
186  RCP<Matrix> realA = Get< RCP<Matrix> >(currentLevel, "A");
187  RCP<AmalgamationInfo> amalInfo = Get< RCP<AmalgamationInfo> >(currentLevel, "UnAmalgamationInfo");
188  const ParameterList & pL = GetParameterList();
189  bool doExperimentalWrap = pL.get<bool>("lightweight wrap");
190 
191  GetOStream(Parameters0) << "lightweight wrap = " << doExperimentalWrap << std::endl;
192  std::string algo = pL.get<std::string>("aggregation: drop scheme");
193 
194  RCP<RealValuedMultiVector> Coords;
195  RCP<Matrix> A;
196 
197  bool use_block_algorithm=false;
198  LO interleaved_blocksize = as<LO>(pL.get<int>("aggregation: block diagonal: interleaved blocksize"));
199  bool useSignedClassical = false;
200  bool generateColoringGraph = false;
201 
202  // NOTE: If we're doing blockDiagonal, we'll not want to do rowSum twice (we'll do it
203  // in the block diagonalizaiton). So we'll clobber the rowSumTol with -1.0 in this case
204  typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
205  RCP<LocalOrdinalVector> ghostedBlockNumber;
206  ArrayRCP<const LO> g_block_id;
207 
208  if(algo == "distance laplacian" ) {
209  // Grab the coordinates for distance laplacian
210  Coords = Get< RCP<RealValuedMultiVector > >(currentLevel, "Coordinates");
211  A = realA;
212  }
213  else if(algo == "signed classical" || algo == "block diagonal colored signed classical" || algo == "block diagonal signed classical") {
214  useSignedClassical = true;
215  // if(realA->GetFixedBlockSize() > 1) {
216  RCP<LocalOrdinalVector> BlockNumber = Get<RCP<LocalOrdinalVector> >(currentLevel, "BlockNumber");
217  // Ghost the column block numbers if we need to
218  RCP<const Import> importer = realA->getCrsGraph()->getImporter();
219  if(!importer.is_null()) {
220  SubFactoryMonitor m1(*this, "Block Number import", currentLevel);
221  ghostedBlockNumber= Xpetra::VectorFactory<LO,LO,GO,NO>::Build(importer->getTargetMap());
222  ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT);
223  }
224  else {
225  ghostedBlockNumber = BlockNumber;
226  }
227  g_block_id = ghostedBlockNumber->getData(0);
228  // }
229  if(algo == "block diagonal colored signed classical")
230  generateColoringGraph=true;
231  algo = "classical";
232  A = realA;
233 
234  }
235  else if(algo == "block diagonal") {
236  // Handle the "block diagonal" filtering and then leave
237  BlockDiagonalize(currentLevel,realA,false);
238  return;
239  }
240  else if (algo == "block diagonal classical" || algo == "block diagonal distance laplacian") {
241  // Handle the "block diagonal" filtering, and then continue onward
242  use_block_algorithm = true;
243  RCP<Matrix> filteredMatrix = BlockDiagonalize(currentLevel,realA,true);
244  if(algo == "block diagonal distance laplacian") {
245  // We now need to expand the coordinates by the interleaved blocksize
246  RCP<RealValuedMultiVector> OldCoords = Get< RCP<RealValuedMultiVector > >(currentLevel, "Coordinates");
247  if (OldCoords->getLocalLength() != realA->getNodeNumRows()) {
248  LO dim = (LO) OldCoords->getNumVectors();
249  Coords = RealValuedMultiVectorFactory::Build(realA->getRowMap(),dim);
250  for(LO k=0; k<dim; k++){
251  ArrayRCP<const real_type> old_vec = OldCoords->getData(k);
252  ArrayRCP<real_type> new_vec = Coords->getDataNonConst(k);
253  for(LO i=0; i <(LO)OldCoords->getLocalLength(); i++) {
254  LO new_base = i*dim;
255  for(LO j=0; j<interleaved_blocksize; j++)
256  new_vec[new_base + j] = old_vec[i];
257  }
258  }
259  }
260  else {
261  Coords = OldCoords;
262  }
263  algo = "distance laplacian";
264  }
265  else if(algo == "block diagonal classical") {
266  algo = "classical";
267  }
268  // All cases
269  A = filteredMatrix;
270  rowSumTol = -1.0;
271  }
272  else {
273  A = realA;
274  }
275 
276  // Distance Laplacian weights
277  Array<double> dlap_weights = pL.get<Array<double> >("aggregation: distance laplacian directional weights");
278  enum {NO_WEIGHTS=0, SINGLE_WEIGHTS, BLOCK_WEIGHTS};
279  int use_dlap_weights = NO_WEIGHTS;
280  if(algo == "distance laplacian") {
281  LO dim = (LO) Coords->getNumVectors();
282  // If anything isn't 1.0 we need to turn on the weighting
283  bool non_unity = false;
284  for (LO i=0; !non_unity && i<(LO)dlap_weights.size(); i++) {
285  if(dlap_weights[i] != 1.0) {
286  non_unity = true;
287  }
288  }
289  if(non_unity) {
290  LO blocksize = use_block_algorithm ? as<LO>(pL.get<int>("aggregation: block diagonal: interleaved blocksize")) : 1;
291  if((LO)dlap_weights.size() == dim)
292  use_dlap_weights = SINGLE_WEIGHTS;
293  else if((LO)dlap_weights.size() == blocksize * dim)
294  use_dlap_weights = BLOCK_WEIGHTS;
295  else {
296  TEUCHOS_TEST_FOR_EXCEPTION(1, Exceptions::RuntimeError,
297  "length of 'aggregation: distance laplacian directional weights' must equal the coordinate dimension OR the coordinate dimension times the blocksize");
298  }
299  if (GetVerbLevel() & Statistics1)
300  GetOStream(Statistics1) << "Using distance laplacian weights: "<<dlap_weights<<std::endl;
301  }
302  }
303 
304  // decide wether to use the fast-track code path for standard maps or the somewhat slower
305  // code path for non-standard maps
306  /*bool bNonStandardMaps = false;
307  if (A->IsView("stridedMaps") == true) {
308  Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
309  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
310  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
311  if (strMap->getStridedBlockId() != -1 || strMap->getOffset() > 0)
312  bNonStandardMaps = true;
313  }*/
314 
315  if (doExperimentalWrap) {
316  TEUCHOS_TEST_FOR_EXCEPTION(predrop_ != null && algo != "classical", Exceptions::RuntimeError, "Dropping function must not be provided for \"" << algo << "\" algorithm");
317  TEUCHOS_TEST_FOR_EXCEPTION(algo != "classical" && algo != "distance laplacian" && algo != "signed classical", Exceptions::RuntimeError, "\"algorithm\" must be one of (classical|distance laplacian|signed classical)");
318 
319  SC threshold = as<SC>(pL.get<double>("aggregation: drop tol"));
320  std::string distanceLaplacianAlgoStr = pL.get<std::string>("aggregation: distance laplacian algo");
321  std::string classicalAlgoStr = pL.get<std::string>("aggregation: classical algo");
322  real_type realThreshold = STS::magnitude(threshold);// CMS: Rename this to "magnitude threshold" sometime
323 
325  // Remove this bit once we are confident that cut-based dropping works.
326 #ifdef HAVE_MUELU_DEBUG
327  int distanceLaplacianCutVerbose = 0;
328 #endif
329 #ifdef DJS_READ_ENV_VARIABLES
330  if (getenv("MUELU_DROP_TOLERANCE_MODE")) {
331  distanceLaplacianAlgoStr = std::string(getenv("MUELU_DROP_TOLERANCE_MODE"));
332  }
333 
334  if (getenv("MUELU_DROP_TOLERANCE_THRESHOLD")) {
335  auto tmp = atoi(getenv("MUELU_DROP_TOLERANCE_THRESHOLD"));
336  realThreshold = 1e-4*tmp;
337  }
338 
339 # ifdef HAVE_MUELU_DEBUG
340  if (getenv("MUELU_DROP_TOLERANCE_VERBOSE")) {
341  distanceLaplacianCutVerbose = atoi(getenv("MUELU_DROP_TOLERANCE_VERBOSE"));
342  }
343 # endif
344 #endif
346 
347  enum decisionAlgoType {defaultAlgo, unscaled_cut, scaled_cut, scaled_cut_symmetric};
348 
349  decisionAlgoType distanceLaplacianAlgo = defaultAlgo;
350  decisionAlgoType classicalAlgo = defaultAlgo;
351  if (algo == "distance laplacian") {
352  if (distanceLaplacianAlgoStr == "default")
353  distanceLaplacianAlgo = defaultAlgo;
354  else if (distanceLaplacianAlgoStr == "unscaled cut")
355  distanceLaplacianAlgo = unscaled_cut;
356  else if (distanceLaplacianAlgoStr == "scaled cut")
357  distanceLaplacianAlgo = scaled_cut;
358  else if (distanceLaplacianAlgoStr == "scaled cut symmetric")
359  distanceLaplacianAlgo = scaled_cut_symmetric;
360  else
361  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: distance laplacian algo\" must be one of (default|unscaled cut|scaled cut), not \"" << distanceLaplacianAlgoStr << "\"");
362  GetOStream(Runtime0) << "algorithm = \"" << algo << "\" distance laplacian algorithm = \"" << distanceLaplacianAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
363  } else if (algo == "classical") {
364  if (classicalAlgoStr == "default")
365  classicalAlgo = defaultAlgo;
366  else if (classicalAlgoStr == "unscaled cut")
367  classicalAlgo = unscaled_cut;
368  else if (classicalAlgoStr == "scaled cut")
369  classicalAlgo = scaled_cut;
370  else
371  TEUCHOS_TEST_FOR_EXCEPTION(true, Exceptions::RuntimeError, "\"aggregation: classical algo\" must be one of (default|unscaled cut|scaled cut), not \"" << classicalAlgoStr << "\"");
372  GetOStream(Runtime0) << "algorithm = \"" << algo << "\" classical algorithm = \"" << classicalAlgoStr << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
373 
374  } else
375  GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
376  Set<bool>(currentLevel, "Filtering", (threshold != STS::zero()));
377 
378  const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
379 
380 
381  // NOTE: We don't support signed classical with cut drop at present
382  TEUCHOS_TEST_FOR_EXCEPTION(useSignedClassical && classicalAlgo != defaultAlgo, Exceptions::RuntimeError, "\"aggregation: classical algo\" != default is not supported for scalled classical aggregation");
383 
384 
385  GO numDropped = 0, numTotal = 0;
386  std::string graphType = "unamalgamated"; //for description purposes only
387  if (algo == "classical") {
388  if (predrop_ == null) {
389  // ap: this is a hack: had to declare predrop_ as mutable
390  predrop_ = rcp(new PreDropFunctionConstVal(threshold));
391  }
392 
393  if (predrop_ != null) {
394  RCP<PreDropFunctionConstVal> predropConstVal = rcp_dynamic_cast<PreDropFunctionConstVal>(predrop_);
395  TEUCHOS_TEST_FOR_EXCEPTION(predropConstVal == Teuchos::null, Exceptions::BadCast,
396  "MueLu::CoalesceFactory::Build: cast to PreDropFunctionConstVal failed.");
397  // If a user provided a predrop function, it overwrites the XML threshold parameter
398  SC newt = predropConstVal->GetThreshold();
399  if (newt != threshold) {
400  GetOStream(Warnings0) << "switching threshold parameter from " << threshold << " (list) to " << newt << " (user function" << std::endl;
401  threshold = newt;
402  }
403  }
404  // At this points we either have
405  // (predrop_ != null)
406  // Therefore, it is sufficient to check only threshold
407  if (A->GetFixedBlockSize() == 1 && threshold == STS::zero() && !useSignedClassical && A->hasCrsGraph()) {
408  // Case 1: scalar problem, no dropping => just use matrix graph
409  RCP<GraphBase> graph = rcp(new Graph(A->getCrsGraph(), "graph of A"));
410  // Detect and record rows that correspond to Dirichlet boundary conditions
411  ArrayRCP<bool > boundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold));
412  if (rowSumTol > 0.)
413  Utilities::ApplyRowSumCriterion(*A, rowSumTol, boundaryNodes);
414 
415  graph->SetBoundaryNodeMap(boundaryNodes);
416  numTotal = A->getNodeNumEntries();
417 
418  if (GetVerbLevel() & Statistics1) {
419  GO numLocalBoundaryNodes = 0;
420  GO numGlobalBoundaryNodes = 0;
421  for (LO i = 0; i < boundaryNodes.size(); ++i)
422  if (boundaryNodes[i])
423  numLocalBoundaryNodes++;
424  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
425  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
426  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
427  }
428 
429  Set(currentLevel, "DofsPerNode", 1);
430  Set(currentLevel, "Graph", graph);
431 
432  } else if ( (A->GetFixedBlockSize() == 1 && threshold != STS::zero()) ||
433  (A->GetFixedBlockSize() == 1 && threshold == STS::zero() && !A->hasCrsGraph()) ||
434  (A->GetFixedBlockSize() == 1 && useSignedClassical) ) {
435  // Case 2: scalar problem with dropping => record the column indices of undropped entries, but still use original
436  // graph's map information, e.g., whether index is local
437  // OR a matrix without a CrsGraph
438 
439  // allocate space for the local graph
440  ArrayRCP<LO> rows (A->getNodeNumRows()+1);
441  ArrayRCP<LO> columns(A->getNodeNumEntries());
442 
443  using MT = typename STS::magnitudeType;
444  RCP<Vector> ghostedDiag;
445  ArrayRCP<const SC> ghostedDiagVals;
446  ArrayRCP<const MT> negMaxOffDiagonal;
447  if(useSignedClassical) {
448  if(ghostedBlockNumber.is_null()) {
450  if (GetVerbLevel() & Statistics1)
451  GetOStream(Statistics1) << "Calculated max point off-diagonal" << std::endl;
452  }
453  else {
454  negMaxOffDiagonal = MueLu::Utilities<SC,LO,GO,NO>::GetMatrixMaxMinusOffDiagonal(*A,*ghostedBlockNumber);
455  if (GetVerbLevel() & Statistics1)
456  GetOStream(Statistics1) << "Calculating max block off-diagonal" << std::endl;
457  }
458  }
459  else {
461  ghostedDiagVals = ghostedDiag->getData(0);
462  }
463  ArrayRCP<bool> boundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold));
464  if (rowSumTol > 0.) {
465  if(ghostedBlockNumber.is_null()) {
466  if (GetVerbLevel() & Statistics1)
467  GetOStream(Statistics1) << "Applying point row sum criterion." << std::endl;
468  Utilities::ApplyRowSumCriterion(*A, rowSumTol, boundaryNodes);
469  } else {
470  if (GetVerbLevel() & Statistics1)
471  GetOStream(Statistics1) << "Applying block row sum criterion." << std::endl;
472  Utilities::ApplyRowSumCriterion(*A, *ghostedBlockNumber, rowSumTol, boundaryNodes);
473  }
474  }
475 
476  LO realnnz = 0;
477  rows[0] = 0;
478  for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getNodeNumElements()); ++row) {
479  size_t nnz = A->getNumEntriesInLocalRow(row);
480  bool rowIsDirichlet = boundaryNodes[row];
481  ArrayView<const LO> indices;
482  ArrayView<const SC> vals;
483  A->getLocalRowView(row, indices, vals);
484 
485  if(classicalAlgo == defaultAlgo) {
486  //FIXME the current predrop function uses the following
487  //FIXME if(std::abs(vals[k]) > std::abs(threshold_) || grow == gcid )
488  //FIXME but the threshold doesn't take into account the rows' diagonal entries
489  //FIXME For now, hardwiring the dropping in here
490 
491  LO rownnz = 0;
492  if(useSignedClassical) {
493  // Signed classical
494  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
495  LO col = indices[colID];
496  MT max_neg_aik = realThreshold * STS::real(negMaxOffDiagonal[row]);
497  MT neg_aij = - STS::real(vals[colID]);
498  /* if(row==1326) printf("A(%d,%d) = %6.4e, block = (%d,%d) neg_aij = %6.4e max_neg_aik = %6.4e\n",row,col,vals[colID],
499  g_block_id.is_null() ? -1 : g_block_id[row],
500  g_block_id.is_null() ? -1 : g_block_id[col],
501  neg_aij, max_neg_aik);*/
502  if ((!rowIsDirichlet && (g_block_id.is_null() || g_block_id[row] == g_block_id[col]) && neg_aij > max_neg_aik) || row == col) {
503  columns[realnnz++] = col;
504  rownnz++;
505  } else
506  numDropped++;
507  }
508  rows[row+1] = realnnz;
509  }
510  else {
511  // Standard abs classical
512  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
513  LO col = indices[colID];
514  MT aiiajj = STS::magnitude(threshold*threshold * ghostedDiagVals[col]*ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
515  MT aij = STS::magnitude(vals[colID]*vals[colID]); // |a_ij|^2
516 
517  if ((!rowIsDirichlet && aij > aiiajj) || row == col) {
518  columns[realnnz++] = col;
519  rownnz++;
520  } else
521  numDropped++;
522  }
523  rows[row+1] = realnnz;
524  }
525  }
526  else {
527  /* Cut Algorithm */
528  //CMS
529  using DropTol = Details::DropTol<real_type,LO>;
530  std::vector<DropTol> drop_vec;
531  drop_vec.reserve(nnz);
532  const real_type zero = Teuchos::ScalarTraits<real_type>::zero();
533  const real_type one = Teuchos::ScalarTraits<real_type>::one();
534  LO rownnz = 0;
535  // NOTE: This probably needs to be fixed for rowsum
536 
537  // find magnitudes
538  for (LO colID = 0; colID < (LO)nnz; colID++) {
539  LO col = indices[colID];
540  if (row == col) {
541  drop_vec.emplace_back( zero, one, colID, false);
542  continue;
543  }
544 
545  // Don't aggregate boundaries
546  if(boundaryNodes[colID]) continue;
547  typename STS::magnitudeType aiiajj = STS::magnitude(threshold*threshold * ghostedDiagVals[col]*ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj|
548  typename STS::magnitudeType aij = STS::magnitude(vals[colID]*vals[colID]); // |a_ij|^2
549  drop_vec.emplace_back(aij, aiiajj, colID, false);
550  }
551 
552  const size_t n = drop_vec.size();
553 
554  if (classicalAlgo == unscaled_cut) {
555  std::sort( drop_vec.begin(), drop_vec.end()
556  , [](DropTol const& a, DropTol const& b) {
557  return a.val > b.val;
558  });
559 
560  bool drop = false;
561  for (size_t i=1; i<n; ++i) {
562  if (!drop) {
563  auto const& x = drop_vec[i-1];
564  auto const& y = drop_vec[i];
565  auto a = x.val;
566  auto b = y.val;
567  if (a > realThreshold*b) {
568  drop = true;
569 #ifdef HAVE_MUELU_DEBUG
570  if (distanceLaplacianCutVerbose) {
571  std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl;
572  }
573 #endif
574  }
575  }
576  drop_vec[i].drop = drop;
577  }
578  } else if (classicalAlgo == scaled_cut) {
579  std::sort( drop_vec.begin(), drop_vec.end()
580  , [](DropTol const& a, DropTol const& b) {
581  return a.val/a.diag > b.val/b.diag;
582  });
583  bool drop = false;
584  // printf("[%d] Scaled Cut: ",(int)row);
585  // printf("%3d(%4s) ",indices[drop_vec[0].col],"keep");
586  for (size_t i=1; i<n; ++i) {
587  if (!drop) {
588  auto const& x = drop_vec[i-1];
589  auto const& y = drop_vec[i];
590  auto a = x.val/x.diag;
591  auto b = y.val/y.diag;
592  if (a > realThreshold*b) {
593  drop = true;
594 
595 #ifdef HAVE_MUELU_DEBUG
596  if (distanceLaplacianCutVerbose) {
597  std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl;
598  }
599 #endif
600  }
601  // printf("%3d(%4s) ",indices[drop_vec[i].col],drop?"drop":"keep");
602 
603  }
604  drop_vec[i].drop = drop;
605  }
606  // printf("\n");
607  }
608  std::sort( drop_vec.begin(), drop_vec.end()
609  , [](DropTol const& a, DropTol const& b) {
610  return a.col < b.col;
611  }
612  );
613 
614  for (LO idxID =0; idxID<(LO)drop_vec.size(); idxID++) {
615  LO col = indices[drop_vec[idxID].col];
616  // don't drop diagonal
617  if (row == col) {
618  columns[realnnz++] = col;
619  rownnz++;
620  continue;
621  }
622 
623  if (!drop_vec[idxID].drop) {
624  columns[realnnz++] = col;
625  rownnz++;
626  } else {
627  numDropped++;
628  }
629  }
630  // CMS
631  rows[row+1] = realnnz;
632 
633  }
634  }//end for row
635 
636  columns.resize(realnnz);
637  numTotal = A->getNodeNumEntries();
638  RCP<GraphBase> graph = rcp(new LWGraph(rows, columns, A->getRowMap(), A->getColMap(), "thresholded graph of A"));
639  graph->SetBoundaryNodeMap(boundaryNodes);
640  if (GetVerbLevel() & Statistics1) {
641  GO numLocalBoundaryNodes = 0;
642  GO numGlobalBoundaryNodes = 0;
643  for (LO i = 0; i < boundaryNodes.size(); ++i)
644  if (boundaryNodes[i])
645  numLocalBoundaryNodes++;
646  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
647  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
648  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
649  }
650  Set(currentLevel, "Graph", graph);
651  Set(currentLevel, "DofsPerNode", 1);
652 
653  // If we're doing signed classical, we might want to block-diagonalize *after* the dropping
654  if(generateColoringGraph) {
655  RCP<GraphBase> colorGraph;
656  RCP<const Import> importer = A->getCrsGraph()->getImporter();
657  BlockDiagonalizeGraph(graph,ghostedBlockNumber,colorGraph,importer);
658  Set(currentLevel, "Coloring Graph",colorGraph);
659  // #define CMS_DUMP
660 #ifdef CMS_DUMP
661  {
662  Xpetra::IO<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Write("m_regular_graph."+std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast<LWGraph>(graph)->GetCrsGraph());
663  Xpetra::IO<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Write("m_color_graph."+std::to_string(currentLevel.GetLevelID()), *rcp_dynamic_cast<LWGraph>(colorGraph)->GetCrsGraph());
664  // int rank = graph->GetDomainMap()->getComm()->getRank();
665  // {
666  // std::ofstream ofs(std::string("m_color_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out);
667  // RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
668  // colorGraph->print(*fancy,Debug);
669  // }
670  // {
671  // std::ofstream ofs(std::string("m_regular_graph_") + std::to_string(currentLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out);
672  // RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
673  // graph->print(*fancy,Debug);
674  // }
675 
676  }
677 #endif
678  }//end generateColoringGraph
679  } else if (A->GetFixedBlockSize() > 1 && threshold == STS::zero()) {
680  // Case 3: Multiple DOF/node problem without dropping
681  const RCP<const Map> rowMap = A->getRowMap();
682  const RCP<const Map> colMap = A->getColMap();
683 
684  graphType = "amalgamated";
685 
686  // build node row map (uniqueMap) and node column map (nonUniqueMap)
687  // the arrays rowTranslation and colTranslation contain the local node id
688  // given a local dof id. The data is calculated by the AmalgamationFactory and
689  // stored in the variable container "UnAmalgamationInfo"
690  RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
691  RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
692  Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
693  Array<LO> colTranslation = *(amalInfo->getColTranslation());
694 
695  // get number of local nodes
696  LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getNodeNumElements());
697 
698  // Allocate space for the local graph
699  ArrayRCP<LO> rows = ArrayRCP<LO>(numRows+1);
700  ArrayRCP<LO> columns = ArrayRCP<LO>(A->getNodeNumEntries());
701 
702  const ArrayRCP<bool> amalgBoundaryNodes(numRows, false);
703 
704  // Detect and record rows that correspond to Dirichlet boundary conditions
705  // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
706  // TODO the array one bigger than the number of local rows, and the last entry can
707  // TODO hold the actual number of boundary nodes. Clever, huh?
708  ArrayRCP<bool > pointBoundaryNodes;
709  pointBoundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold));
710  if (rowSumTol > 0.)
711  Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes);
712 
713 
714  // extract striding information
715  LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
716  LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map)
717  LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map
718  if (A->IsView("stridedMaps") == true) {
719  Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
720  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
721  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
722  blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
723  blkId = strMap->getStridedBlockId();
724  if (blkId > -1)
725  blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
726  }
727 
728  // loop over all local nodes
729  LO realnnz = 0;
730  rows[0] = 0;
731  Array<LO> indicesExtra;
732  for (LO row = 0; row < numRows; row++) {
733  ArrayView<const LO> indices;
734  indicesExtra.resize(0);
735 
736  // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
737  // Note, that pointBoundaryNodes lives on the dofmap (and not the node map).
738  // Therefore, looping over all dofs is fine here. We use blkPartSize as we work
739  // with local ids.
740  // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet)
741  // node.
742  bool isBoundary = false;
743  if (pL.get<bool>("aggregation: greedy Dirichlet") == true) {
744  for (LO j = 0; j < blkPartSize; j++) {
745  if (pointBoundaryNodes[row*blkPartSize+j]) {
746  isBoundary = true;
747  break;
748  }
749  }
750  } else {
751  isBoundary = true;
752  for (LO j = 0; j < blkPartSize; j++) {
753  if (!pointBoundaryNodes[row*blkPartSize+j]) {
754  isBoundary = false;
755  break;
756  }
757  }
758  }
759 
760  // Merge rows of A
761  // The array indicesExtra contains local column node ids for the current local node "row"
762  if (!isBoundary)
763  MergeRows(*A, row, indicesExtra, colTranslation);
764  else
765  indicesExtra.push_back(row);
766  indices = indicesExtra;
767  numTotal += indices.size();
768 
769  // add the local column node ids to the full columns array which
770  // contains the local column node ids for all local node rows
771  LO nnz = indices.size(), rownnz = 0;
772  for (LO colID = 0; colID < nnz; colID++) {
773  LO col = indices[colID];
774  columns[realnnz++] = col;
775  rownnz++;
776  }
777 
778  if (rownnz == 1) {
779  // If the only element remaining after filtering is diagonal, mark node as boundary
780  // FIXME: this should really be replaced by the following
781  // if (indices.size() == 1 && indices[0] == row)
782  // boundaryNodes[row] = true;
783  // We do not do it this way now because there is no framework for distinguishing isolated
784  // and boundary nodes in the aggregation algorithms
785  amalgBoundaryNodes[row] = true;
786  }
787  rows[row+1] = realnnz;
788  } //for (LO row = 0; row < numRows; row++)
789  columns.resize(realnnz);
790 
791  RCP<GraphBase> graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
792  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
793 
794  if (GetVerbLevel() & Statistics1) {
795  GO numLocalBoundaryNodes = 0;
796  GO numGlobalBoundaryNodes = 0;
797 
798  for (LO i = 0; i < amalgBoundaryNodes.size(); ++i)
799  if (amalgBoundaryNodes[i])
800  numLocalBoundaryNodes++;
801 
802  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
803  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
804  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes
805  << " agglomerated Dirichlet nodes" << std::endl;
806  }
807 
808  Set(currentLevel, "Graph", graph);
809  Set(currentLevel, "DofsPerNode", blkSize); // full block size
810 
811  } else if (A->GetFixedBlockSize() > 1 && threshold != STS::zero()) {
812  // Case 4: Multiple DOF/node problem with dropping
813  const RCP<const Map> rowMap = A->getRowMap();
814  const RCP<const Map> colMap = A->getColMap();
815  graphType = "amalgamated";
816 
817  // build node row map (uniqueMap) and node column map (nonUniqueMap)
818  // the arrays rowTranslation and colTranslation contain the local node id
819  // given a local dof id. The data is calculated by the AmalgamationFactory and
820  // stored in the variable container "UnAmalgamationInfo"
821  RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
822  RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
823  Array<LO> rowTranslation = *(amalInfo->getRowTranslation());
824  Array<LO> colTranslation = *(amalInfo->getColTranslation());
825 
826  // get number of local nodes
827  LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getNodeNumElements());
828 
829  // Allocate space for the local graph
830  ArrayRCP<LO> rows = ArrayRCP<LO>(numRows+1);
831  ArrayRCP<LO> columns = ArrayRCP<LO>(A->getNodeNumEntries());
832 
833  const ArrayRCP<bool> amalgBoundaryNodes(numRows, false);
834 
835  // Detect and record rows that correspond to Dirichlet boundary conditions
836  // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
837  // TODO the array one bigger than the number of local rows, and the last entry can
838  // TODO hold the actual number of boundary nodes. Clever, huh?
839  ArrayRCP<bool > pointBoundaryNodes;
840  pointBoundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold));
841  if (rowSumTol > 0.)
842  Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes);
843 
844 
845  // extract striding information
846  LO blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
847  LO blkId = -1; //< the block id within the strided map (or -1 if it is a full block map)
848  LO blkPartSize = A->GetFixedBlockSize(); //< stores the size of the block within the strided map
849  if (A->IsView("stridedMaps") == true) {
850  Teuchos::RCP<const Map> myMap = A->getRowMap("stridedMaps");
851  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
852  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
853  blkSize = Teuchos::as<const LO>(strMap->getFixedBlockSize());
854  blkId = strMap->getStridedBlockId();
855  if (blkId > -1)
856  blkPartSize = Teuchos::as<LO>(strMap->getStridingData()[blkId]);
857  }
858 
859  // extract diagonal data for dropping strategy
861  const ArrayRCP<const SC> ghostedDiagVals = ghostedDiag->getData(0);
862 
863  // loop over all local nodes
864  LO realnnz = 0;
865  rows[0] = 0;
866  Array<LO> indicesExtra;
867  for (LO row = 0; row < numRows; row++) {
868  ArrayView<const LO> indices;
869  indicesExtra.resize(0);
870 
871  // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
872  // Note, that pointBoundaryNodes lives on the dofmap (and not the node map).
873  // Therefore, looping over all dofs is fine here. We use blkPartSize as we work
874  // with local ids.
875  // TODO: Here we have different options of how to define a node to be a boundary (or Dirichlet)
876  // node.
877  bool isBoundary = false;
878  if (pL.get<bool>("aggregation: greedy Dirichlet") == true) {
879  for (LO j = 0; j < blkPartSize; j++) {
880  if (pointBoundaryNodes[row*blkPartSize+j]) {
881  isBoundary = true;
882  break;
883  }
884  }
885  } else {
886  isBoundary = true;
887  for (LO j = 0; j < blkPartSize; j++) {
888  if (!pointBoundaryNodes[row*blkPartSize+j]) {
889  isBoundary = false;
890  break;
891  }
892  }
893  }
894 
895  // Merge rows of A
896  // The array indicesExtra contains local column node ids for the current local node "row"
897  if (!isBoundary)
898  MergeRowsWithDropping(*A, row, ghostedDiagVals, threshold, indicesExtra, colTranslation);
899  else
900  indicesExtra.push_back(row);
901  indices = indicesExtra;
902  numTotal += indices.size();
903 
904  // add the local column node ids to the full columns array which
905  // contains the local column node ids for all local node rows
906  LO nnz = indices.size(), rownnz = 0;
907  for (LO colID = 0; colID < nnz; colID++) {
908  LO col = indices[colID];
909  columns[realnnz++] = col;
910  rownnz++;
911  }
912 
913  if (rownnz == 1) {
914  // If the only element remaining after filtering is diagonal, mark node as boundary
915  // FIXME: this should really be replaced by the following
916  // if (indices.size() == 1 && indices[0] == row)
917  // boundaryNodes[row] = true;
918  // We do not do it this way now because there is no framework for distinguishing isolated
919  // and boundary nodes in the aggregation algorithms
920  amalgBoundaryNodes[row] = true;
921  }
922  rows[row+1] = realnnz;
923  } //for (LO row = 0; row < numRows; row++)
924  columns.resize(realnnz);
925 
926  RCP<GraphBase> graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
927  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
928 
929  if (GetVerbLevel() & Statistics1) {
930  GO numLocalBoundaryNodes = 0;
931  GO numGlobalBoundaryNodes = 0;
932 
933  for (LO i = 0; i < amalgBoundaryNodes.size(); ++i)
934  if (amalgBoundaryNodes[i])
935  numLocalBoundaryNodes++;
936 
937  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
938  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
939  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes
940  << " agglomerated Dirichlet nodes" << std::endl;
941  }
942 
943  Set(currentLevel, "Graph", graph);
944  Set(currentLevel, "DofsPerNode", blkSize); // full block size
945  }
946 
947  } else if (algo == "distance laplacian") {
948  LO blkSize = A->GetFixedBlockSize();
949  GO indexBase = A->getRowMap()->getIndexBase();
950  // [*0*] : FIXME
951  // ap: somehow, if I move this line to [*1*], Belos throws an error
952  // I'm not sure what's going on. Do we always have to Get data, if we did
953  // DeclareInput for it?
954  // RCP<RealValuedMultiVector> Coords = Get< RCP<RealValuedMultiVector > >(currentLevel, "Coordinates");
955 
956  // Detect and record rows that correspond to Dirichlet boundary conditions
957  // TODO If we use ArrayRCP<LO>, then we can record boundary nodes as usual. Size
958  // TODO the array one bigger than the number of local rows, and the last entry can
959  // TODO hold the actual number of boundary nodes. Clever, huh?
960  ArrayRCP<bool > pointBoundaryNodes;
961  pointBoundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold));
962  if (rowSumTol > 0.)
963  Utilities::ApplyRowSumCriterion(*A, rowSumTol, pointBoundaryNodes);
964 
965  if ( (blkSize == 1) && (threshold == STS::zero()) ) {
966  // Trivial case: scalar problem, no dropping. Can return original graph
967  RCP<GraphBase> graph = rcp(new Graph(A->getCrsGraph(), "graph of A"));
968  graph->SetBoundaryNodeMap(pointBoundaryNodes);
969  graphType="unamalgamated";
970  numTotal = A->getNodeNumEntries();
971 
972  if (GetVerbLevel() & Statistics1) {
973  GO numLocalBoundaryNodes = 0;
974  GO numGlobalBoundaryNodes = 0;
975  for (LO i = 0; i < pointBoundaryNodes.size(); ++i)
976  if (pointBoundaryNodes[i])
977  numLocalBoundaryNodes++;
978  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
979  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
980  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
981  }
982 
983  Set(currentLevel, "DofsPerNode", blkSize);
984  Set(currentLevel, "Graph", graph);
985 
986  } else {
987  // ap: We make quite a few assumptions here; general case may be a lot different,
988  // but much much harder to implement. We assume that:
989  // 1) all maps are standard maps, not strided maps
990  // 2) global indices of dofs in A are related to dofs in coordinates in a simple arithmetic
991  // way: rows i*blkSize, i*blkSize+1, ..., i*blkSize + (blkSize-1) correspond to node i
992  //
993  // NOTE: Potentially, some of the code below could be simplified with UnAmalgamationInfo,
994  // but as I totally don't understand that code, here is my solution
995 
996  // [*1*]: see [*0*]
997 
998  // Check that the number of local coordinates is consistent with the #rows in A
999  TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getNodeNumElements()/blkSize != Coords->getLocalLength(), Exceptions::Incompatible,
1000  "Coordinate vector length (" << Coords->getLocalLength() << ") is incompatible with number of rows in A (" << A->getRowMap()->getNodeNumElements() << ") by modulo block size ("<< blkSize <<").");
1001 
1002  const RCP<const Map> colMap = A->getColMap();
1003  RCP<const Map> uniqueMap, nonUniqueMap;
1004  Array<LO> colTranslation;
1005  if (blkSize == 1) {
1006  uniqueMap = A->getRowMap();
1007  nonUniqueMap = A->getColMap();
1008  graphType="unamalgamated";
1009 
1010  } else {
1011  uniqueMap = Coords->getMap();
1012  TEUCHOS_TEST_FOR_EXCEPTION(uniqueMap->getIndexBase() != indexBase, Exceptions::Incompatible,
1013  "Different index bases for matrix and coordinates");
1014 
1015  AmalgamationFactory::AmalgamateMap(*(A->getColMap()), *A, nonUniqueMap, colTranslation);
1016 
1017  graphType = "amalgamated";
1018  }
1019  LO numRows = Teuchos::as<LocalOrdinal>(uniqueMap->getNodeNumElements());
1020 
1021  RCP<RealValuedMultiVector> ghostedCoords;
1022  RCP<Vector> ghostedLaplDiag;
1023  Teuchos::ArrayRCP<SC> ghostedLaplDiagData;
1024  if (threshold != STS::zero()) {
1025  // Get ghost coordinates
1026  RCP<const Import> importer;
1027  {
1028  SubFactoryMonitor m1(*this, "Import construction", currentLevel);
1029  if (blkSize == 1 && realA->getCrsGraph()->getImporter() != Teuchos::null) {
1030  GetOStream(Warnings1) << "Using existing importer from matrix graph" << std::endl;
1031  importer = realA->getCrsGraph()->getImporter();
1032  } else {
1033  GetOStream(Warnings0) << "Constructing new importer instance" << std::endl;
1034  importer = ImportFactory::Build(uniqueMap, nonUniqueMap);
1035  }
1036  } //subtimer
1037  ghostedCoords = Xpetra::MultiVectorFactory<real_type,LO,GO,NO>::Build(nonUniqueMap, Coords->getNumVectors());
1038  {
1039  SubFactoryMonitor m1(*this, "Coordinate import", currentLevel);
1040  ghostedCoords->doImport(*Coords, *importer, Xpetra::INSERT);
1041  } //subtimer
1042 
1043  // Construct Distance Laplacian diagonal
1044  RCP<Vector> localLaplDiag = VectorFactory::Build(uniqueMap);
1045  ArrayRCP<SC> localLaplDiagData = localLaplDiag->getDataNonConst(0);
1046  Array<LO> indicesExtra;
1047  Teuchos::Array<Teuchos::ArrayRCP<const real_type>> coordData;
1048  if (threshold != STS::zero()) {
1049  const size_t numVectors = ghostedCoords->getNumVectors();
1050  coordData.reserve(numVectors);
1051  for (size_t j = 0; j < numVectors; j++) {
1052  Teuchos::ArrayRCP<const real_type> tmpData=ghostedCoords->getData(j);
1053  coordData.push_back(tmpData);
1054  }
1055  }
1056  {
1057  SubFactoryMonitor m1(*this, "Laplacian local diagonal", currentLevel);
1058  for (LO row = 0; row < numRows; row++) {
1059  ArrayView<const LO> indices;
1060 
1061  if (blkSize == 1) {
1062  ArrayView<const SC> vals;
1063  A->getLocalRowView(row, indices, vals);
1064 
1065  } else {
1066  // Merge rows of A
1067  indicesExtra.resize(0);
1068  MergeRows(*A, row, indicesExtra, colTranslation);
1069  indices = indicesExtra;
1070  }
1071 
1072  LO nnz = indices.size();
1073  bool haveAddedToDiag = false;
1074  for (LO colID = 0; colID < nnz; colID++) {
1075  const LO col = indices[colID];
1076 
1077  if (row != col) {
1078  if(use_dlap_weights == SINGLE_WEIGHTS) {
1079  /*printf("[%d,%d] Unweighted Distance = %6.4e Weighted Distance = %6.4e\n",row,col,
1080  MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col),
1081  MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(),coordData, row, col));*/
1082  localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(),coordData, row, col);
1083  }
1084  else if(use_dlap_weights == BLOCK_WEIGHTS) {
1085  int block_id = row % interleaved_blocksize;
1086  int block_start = block_id * interleaved_blocksize;
1087  localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(block_start,interleaved_blocksize),coordData, row, col);
1088  }
1089  else {
1090  // printf("[%d,%d] Unweighted Distance = %6.4e\n",row,col,MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col));
1091  localLaplDiagData[row] += STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col);
1092  }
1093  haveAddedToDiag = true;
1094  }
1095  }
1096  // Deal with the situation where boundary conditions have only been enforced on rows, but not on columns.
1097  // We enforce dropping of these entries by assigning a very large number to the diagonal entries corresponding to BCs.
1098  if (!haveAddedToDiag)
1099  localLaplDiagData[row] = STS::rmax();
1100  }
1101  } //subtimer
1102  {
1103  SubFactoryMonitor m1(*this, "Laplacian distributed diagonal", currentLevel);
1104  ghostedLaplDiag = VectorFactory::Build(nonUniqueMap);
1105  ghostedLaplDiag->doImport(*localLaplDiag, *importer, Xpetra::INSERT);
1106  ghostedLaplDiagData = ghostedLaplDiag->getDataNonConst(0);
1107  } //subtimer
1108 
1109  } else {
1110  GetOStream(Runtime0) << "Skipping distance laplacian construction due to 0 threshold" << std::endl;
1111  }
1112 
1113  // NOTE: ghostedLaplDiagData might be zero if we don't actually calculate the laplacian
1114 
1115  // allocate space for the local graph
1116  ArrayRCP<LO> rows = ArrayRCP<LO>(numRows+1);
1117  ArrayRCP<LO> columns = ArrayRCP<LO>(A->getNodeNumEntries());
1118 
1119 #ifdef HAVE_MUELU_DEBUG
1120  // DEBUGGING
1121  for(LO i=0; i<(LO)columns.size(); i++) columns[i]=-666;
1122 #endif
1123 
1124  // Extra array for if we're allowing symmetrization with cutting
1125  ArrayRCP<LO> rows_stop;
1126  bool use_stop_array = threshold != STS::zero() && distanceLaplacianAlgo == scaled_cut_symmetric;
1127  if(use_stop_array)
1128  rows_stop.resize(numRows);
1129 
1130  const ArrayRCP<bool> amalgBoundaryNodes(numRows, false);
1131 
1132  LO realnnz = 0;
1133  rows[0] = 0;
1134 
1135  Array<LO> indicesExtra;
1136  {
1137  SubFactoryMonitor m1(*this, "Laplacian dropping", currentLevel);
1138  Teuchos::Array<Teuchos::ArrayRCP<const real_type>> coordData;
1139  if (threshold != STS::zero()) {
1140  const size_t numVectors = ghostedCoords->getNumVectors();
1141  coordData.reserve(numVectors);
1142  for (size_t j = 0; j < numVectors; j++) {
1143  Teuchos::ArrayRCP<const real_type> tmpData=ghostedCoords->getData(j);
1144  coordData.push_back(tmpData);
1145  }
1146  }
1147 
1148  ArrayView<const SC> vals;//CMS hackery
1149  for (LO row = 0; row < numRows; row++) {
1150  ArrayView<const LO> indices;
1151  indicesExtra.resize(0);
1152  bool isBoundary = false;
1153 
1154  if (blkSize == 1) {
1155  // ArrayView<const SC> vals;//CMS uncomment
1156  A->getLocalRowView(row, indices, vals);
1157  isBoundary = pointBoundaryNodes[row];
1158  } else {
1159  // The amalgamated row is marked as Dirichlet iff all point rows are Dirichlet
1160  for (LO j = 0; j < blkSize; j++) {
1161  if (!pointBoundaryNodes[row*blkSize+j]) {
1162  isBoundary = false;
1163  break;
1164  }
1165  }
1166 
1167  // Merge rows of A
1168  if (!isBoundary)
1169  MergeRows(*A, row, indicesExtra, colTranslation);
1170  else
1171  indicesExtra.push_back(row);
1172  indices = indicesExtra;
1173  }
1174  numTotal += indices.size();
1175 
1176  LO nnz = indices.size(), rownnz = 0;
1177 
1178  if(use_stop_array) {
1179  rows[row+1] = rows[row]+nnz;
1180  realnnz = rows[row];
1181  }
1182 
1183  if (threshold != STS::zero()) {
1184  // default
1185  if (distanceLaplacianAlgo == defaultAlgo) {
1186  /* Standard Distance Laplacian */
1187  for (LO colID = 0; colID < nnz; colID++) {
1188 
1189  LO col = indices[colID];
1190 
1191  if (row == col) {
1192  columns[realnnz++] = col;
1193  rownnz++;
1194  continue;
1195  }
1196 
1197  // We do not want the distance Laplacian aggregating boundary nodes
1198  if(isBoundary) continue;
1199 
1200  SC laplVal;
1201  if(use_dlap_weights == SINGLE_WEIGHTS) {
1202  laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(),coordData, row, col);
1203  }
1204  else if(use_dlap_weights == BLOCK_WEIGHTS) {
1205  int block_id = row % interleaved_blocksize;
1206  int block_start = block_id * interleaved_blocksize;
1207  laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(block_start,interleaved_blocksize),coordData, row, col);
1208  }
1209  else {
1210  laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col);
1211  }
1212  real_type aiiajj = STS::magnitude(realThreshold*realThreshold * ghostedLaplDiagData[row]*ghostedLaplDiagData[col]);
1213  real_type aij = STS::magnitude(laplVal*laplVal);
1214 
1215  if (aij > aiiajj) {
1216  columns[realnnz++] = col;
1217  rownnz++;
1218  } else {
1219  numDropped++;
1220  }
1221  }
1222  } else {
1223  /* Cut Algorithm */
1224  using DropTol = Details::DropTol<real_type,LO>;
1225  std::vector<DropTol> drop_vec;
1226  drop_vec.reserve(nnz);
1227  const real_type zero = Teuchos::ScalarTraits<real_type>::zero();
1228  const real_type one = Teuchos::ScalarTraits<real_type>::one();
1229 
1230  // find magnitudes
1231  for (LO colID = 0; colID < nnz; colID++) {
1232 
1233  LO col = indices[colID];
1234 
1235  if (row == col) {
1236  drop_vec.emplace_back( zero, one, colID, false);
1237  continue;
1238  }
1239  // We do not want the distance Laplacian aggregating boundary nodes
1240  if(isBoundary) continue;
1241 
1242  SC laplVal;
1243  if(use_dlap_weights == SINGLE_WEIGHTS) {
1244  laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(),coordData, row, col);
1245  }
1246  else if(use_dlap_weights == BLOCK_WEIGHTS) {
1247  int block_id = row % interleaved_blocksize;
1248  int block_start = block_id * interleaved_blocksize;
1249  laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(dlap_weights(block_start,interleaved_blocksize),coordData, row, col);
1250  }
1251  else {
1252  laplVal = STS::one() / MueLu::Utilities<real_type,LO,GO,NO>::Distance2(coordData, row, col);
1253  }
1254 
1255  real_type aiiajj = STS::magnitude(ghostedLaplDiagData[row]*ghostedLaplDiagData[col]);
1256  real_type aij = STS::magnitude(laplVal*laplVal);
1257 
1258  drop_vec.emplace_back(aij, aiiajj, colID, false);
1259  }
1260 
1261  const size_t n = drop_vec.size();
1262 
1263  if (distanceLaplacianAlgo == unscaled_cut) {
1264 
1265  std::sort( drop_vec.begin(), drop_vec.end()
1266  , [](DropTol const& a, DropTol const& b) {
1267  return a.val > b.val;
1268  }
1269  );
1270 
1271  bool drop = false;
1272  for (size_t i=1; i<n; ++i) {
1273  if (!drop) {
1274  auto const& x = drop_vec[i-1];
1275  auto const& y = drop_vec[i];
1276  auto a = x.val;
1277  auto b = y.val;
1278  if (a > realThreshold*b) {
1279  drop = true;
1280 #ifdef HAVE_MUELU_DEBUG
1281  if (distanceLaplacianCutVerbose) {
1282  std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl;
1283  }
1284 #endif
1285  }
1286  }
1287  drop_vec[i].drop = drop;
1288  }
1289  }
1290  else if (distanceLaplacianAlgo == scaled_cut || distanceLaplacianAlgo == scaled_cut_symmetric) {
1291 
1292  std::sort( drop_vec.begin(), drop_vec.end()
1293  , [](DropTol const& a, DropTol const& b) {
1294  return a.val/a.diag > b.val/b.diag;
1295  }
1296  );
1297 
1298  bool drop = false;
1299  for (size_t i=1; i<n; ++i) {
1300  if (!drop) {
1301  auto const& x = drop_vec[i-1];
1302  auto const& y = drop_vec[i];
1303  auto a = x.val/x.diag;
1304  auto b = y.val/y.diag;
1305  if (a > realThreshold*b) {
1306  drop = true;
1307 #ifdef HAVE_MUELU_DEBUG
1308  if (distanceLaplacianCutVerbose) {
1309  std::cout << "DJS: KEEP, N, ROW: " << i+1 << ", " << n << ", " << row << std::endl;
1310  }
1311 #endif
1312  }
1313  }
1314  drop_vec[i].drop = drop;
1315  }
1316  }
1317 
1318  std::sort( drop_vec.begin(), drop_vec.end()
1319  , [](DropTol const& a, DropTol const& b) {
1320  return a.col < b.col;
1321  }
1322  );
1323 
1324  for (LO idxID =0; idxID<(LO)drop_vec.size(); idxID++) {
1325  LO col = indices[drop_vec[idxID].col];
1326 
1327 
1328  // don't drop diagonal
1329  if (row == col) {
1330  columns[realnnz++] = col;
1331  rownnz++;
1332  // printf("(%d,%d) KEEP %13s matrix = %6.4e\n",row,row,"DIAGONAL",drop_vec[idxID].aux_val);
1333  continue;
1334  }
1335 
1336  if (!drop_vec[idxID].drop) {
1337  columns[realnnz++] = col;
1338  // printf("(%d,%d) KEEP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val);
1339  rownnz++;
1340  } else {
1341  // printf("(%d,%d) DROP dlap = %6.4e matrix = %6.4e\n",row,col,drop_vec[idxID].val/drop_vec[idxID].diag,drop_vec[idxID].aux_val);
1342  numDropped++;
1343  }
1344  }
1345  }
1346  } else {
1347  // Skip laplace calculation and threshold comparison for zero threshold
1348  for (LO colID = 0; colID < nnz; colID++) {
1349  LO col = indices[colID];
1350  columns[realnnz++] = col;
1351  rownnz++;
1352  }
1353  }
1354 
1355  if ( rownnz == 1) {
1356  // If the only element remaining after filtering is diagonal, mark node as boundary
1357  // FIXME: this should really be replaced by the following
1358  // if (indices.size() == 1 && indices[0] == row)
1359  // boundaryNodes[row] = true;
1360  // We do not do it this way now because there is no framework for distinguishing isolated
1361  // and boundary nodes in the aggregation algorithms
1362  amalgBoundaryNodes[row] = true;
1363  }
1364 
1365  if(use_stop_array)
1366  rows_stop[row] = rownnz + rows[row];
1367  else
1368  rows[row+1] = realnnz;
1369  } //for (LO row = 0; row < numRows; row++)
1370 
1371  } //subtimer
1372 
1373  if (use_stop_array) {
1374  // Do symmetrization of the cut matrix
1375  // NOTE: We assume nested row/column maps here
1376  for (LO row = 0; row < numRows; row++) {
1377  for (LO colidx = rows[row]; colidx < rows_stop[row]; colidx++) {
1378  LO col = columns[colidx];
1379  if(col >= numRows) continue;
1380 
1381  bool found = false;
1382  for(LO t_col = rows[col] ; !found && t_col < rows_stop[col]; t_col++) {
1383  if (columns[t_col] == row)
1384  found = true;
1385  }
1386  // We didn't find the transpose buddy, so let's symmetrize, unless we'd be symmetrizing
1387  // into a Dirichlet unknown. In that case don't.
1388  if(!found && !pointBoundaryNodes[col] && rows_stop[col] < rows[col+1]) {
1389  LO new_idx = rows_stop[col];
1390  // printf("(%d,%d) SYMADD entry\n",col,row);
1391  columns[new_idx] = row;
1392  rows_stop[col]++;
1393  numDropped--;
1394  }
1395  }
1396  }
1397 
1398  // Condense everything down
1399  LO current_start=0;
1400  for (LO row = 0; row < numRows; row++) {
1401  LO old_start = current_start;
1402  for (LO col = rows[row]; col < rows_stop[row]; col++) {
1403  if(current_start != col) {
1404  columns[current_start] = columns[col];
1405  }
1406  current_start++;
1407  }
1408  rows[row] = old_start;
1409  }
1410  rows[numRows] = realnnz = current_start;
1411 
1412  }
1413 
1414  columns.resize(realnnz);
1415 
1416  RCP<GraphBase> graph;
1417  {
1418  SubFactoryMonitor m1(*this, "Build amalgamated graph", currentLevel);
1419  graph = rcp(new LWGraph(rows, columns, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
1420  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1421  } //subtimer
1422 
1423  if (GetVerbLevel() & Statistics1) {
1424  GO numLocalBoundaryNodes = 0;
1425  GO numGlobalBoundaryNodes = 0;
1426 
1427  for (LO i = 0; i < amalgBoundaryNodes.size(); ++i)
1428  if (amalgBoundaryNodes[i])
1429  numLocalBoundaryNodes++;
1430 
1431  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1432  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1433  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " agglomerated Dirichlet nodes"
1434  << " using threshold " << dirichletThreshold << std::endl;
1435  }
1436 
1437  Set(currentLevel, "Graph", graph);
1438  Set(currentLevel, "DofsPerNode", blkSize);
1439  }
1440  }
1441 
1442  if ((GetVerbLevel() & Statistics1) && !(A->GetFixedBlockSize() > 1 && threshold != STS::zero())) {
1443  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1444  GO numGlobalTotal, numGlobalDropped;
1445  MueLu_sumAll(comm, numTotal, numGlobalTotal);
1446  MueLu_sumAll(comm, numDropped, numGlobalDropped);
1447  GetOStream(Statistics1) << "Number of dropped entries in " << graphType << " matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1448  if (numGlobalTotal != 0)
1449  GetOStream(Statistics1) << " (" << 100*Teuchos::as<double>(numGlobalDropped)/Teuchos::as<double>(numGlobalTotal) << "%)";
1450  GetOStream(Statistics1) << std::endl;
1451  }
1452 
1453  } else {
1454  //what Tobias has implemented
1455 
1456  SC threshold = as<SC>(pL.get<double>("aggregation: drop tol"));
1457  //GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
1458  GetOStream(Runtime0) << "algorithm = \"" << "failsafe" << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
1459  Set<bool>(currentLevel, "Filtering", (threshold != STS::zero()));
1460 
1461  RCP<const Map> rowMap = A->getRowMap();
1462  RCP<const Map> colMap = A->getColMap();
1463 
1464  LO blockdim = 1; // block dim for fixed size blocks
1465  GO indexBase = rowMap->getIndexBase(); // index base of maps
1466  GO offset = 0;
1467 
1468  // 1) check for blocking/striding information
1469  if(A->IsView("stridedMaps") &&
1470  Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap("stridedMaps")) != Teuchos::null) {
1471  Xpetra::viewLabel_t oldView = A->SwitchToView("stridedMaps"); // note: "stridedMaps are always non-overlapping (correspond to range and domain maps!)
1472  RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(A->getRowMap());
1473  TEUCHOS_TEST_FOR_EXCEPTION(strMap == Teuchos::null,Exceptions::BadCast,"MueLu::CoalesceFactory::Build: cast to strided row map failed.");
1474  blockdim = strMap->getFixedBlockSize();
1475  offset = strMap->getOffset();
1476  oldView = A->SwitchToView(oldView);
1477  GetOStream(Statistics1) << "CoalesceDropFactory::Build():" << " found blockdim=" << blockdim << " from strided maps. offset=" << offset << std::endl;
1478  } else GetOStream(Statistics1) << "CoalesceDropFactory::Build(): no striding information available. Use blockdim=1 with offset=0" << std::endl;
1479 
1480  // 2) get row map for amalgamated matrix (graph of A)
1481  // with same distribution over all procs as row map of A
1482  RCP<const Map> nodeMap = amalInfo->getNodeRowMap();
1483  GetOStream(Statistics1) << "CoalesceDropFactory: nodeMap " << nodeMap->getNodeNumElements() << "/" << nodeMap->getGlobalNumElements() << " elements" << std::endl;
1484 
1485  // 3) create graph of amalgamated matrix
1486  RCP<CrsGraph> crsGraph = CrsGraphFactory::Build(nodeMap, A->getNodeMaxNumRowEntries()*blockdim);
1487 
1488  LO numRows = A->getRowMap()->getNodeNumElements();
1489  LO numNodes = nodeMap->getNodeNumElements();
1490  const ArrayRCP<bool> amalgBoundaryNodes(numNodes, false);
1491  const ArrayRCP<int> numberDirichletRowsPerNode(numNodes, 0); // helper array counting the number of Dirichlet nodes associated with node
1492  bool bIsDiagonalEntry = false; // boolean flag stating that grid==gcid
1493 
1494  // 4) do amalgamation. generate graph of amalgamated matrix
1495  // Note, this code is much more inefficient than the leightwight implementation
1496  // Most of the work has already been done in the AmalgamationFactory
1497  for(LO row=0; row<numRows; row++) {
1498  // get global DOF id
1499  GO grid = rowMap->getGlobalElement(row);
1500 
1501  // reinitialize boolean helper variable
1502  bIsDiagonalEntry = false;
1503 
1504  // translate grid to nodeid
1505  GO nodeId = AmalgamationFactory::DOFGid2NodeId(grid, blockdim, offset, indexBase);
1506 
1507  size_t nnz = A->getNumEntriesInLocalRow(row);
1508  Teuchos::ArrayView<const LO> indices;
1509  Teuchos::ArrayView<const SC> vals;
1510  A->getLocalRowView(row, indices, vals);
1511 
1512  RCP<std::vector<GO> > cnodeIds = Teuchos::rcp(new std::vector<GO>); // global column block ids
1513  LO realnnz = 0;
1514  for(LO col=0; col<Teuchos::as<LO>(nnz); col++) {
1515  GO gcid = colMap->getGlobalElement(indices[col]); // global column id
1516 
1517  if(vals[col]!=STS::zero()) {
1518  GO cnodeId = AmalgamationFactory::DOFGid2NodeId(gcid, blockdim, offset, indexBase);
1519  cnodeIds->push_back(cnodeId);
1520  realnnz++; // increment number of nnz in matrix row
1521  if (grid == gcid) bIsDiagonalEntry = true;
1522  }
1523  }
1524 
1525  if(realnnz == 1 && bIsDiagonalEntry == true) {
1526  LO lNodeId = nodeMap->getLocalElement(nodeId);
1527  numberDirichletRowsPerNode[lNodeId] += 1; // increment Dirichlet row counter associated with lNodeId
1528  if (numberDirichletRowsPerNode[lNodeId] == blockdim) // mark full Dirichlet nodes
1529  amalgBoundaryNodes[lNodeId] = true;
1530  }
1531 
1532  Teuchos::ArrayRCP<GO> arr_cnodeIds = Teuchos::arcp( cnodeIds );
1533 
1534  if(arr_cnodeIds.size() > 0 )
1535  crsGraph->insertGlobalIndices(nodeId, arr_cnodeIds());
1536  }
1537  // fill matrix graph
1538  crsGraph->fillComplete(nodeMap,nodeMap);
1539 
1540  // 5) create MueLu Graph object
1541  RCP<GraphBase> graph = rcp(new Graph(crsGraph, "amalgamated graph of A"));
1542 
1543  // Detect and record rows that correspond to Dirichlet boundary conditions
1544  graph->SetBoundaryNodeMap(amalgBoundaryNodes);
1545 
1546  if (GetVerbLevel() & Statistics1) {
1547  GO numLocalBoundaryNodes = 0;
1548  GO numGlobalBoundaryNodes = 0;
1549  for (LO i = 0; i < amalgBoundaryNodes.size(); ++i)
1550  if (amalgBoundaryNodes[i])
1551  numLocalBoundaryNodes++;
1552  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1553  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1554  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1555  }
1556 
1557  // 6) store results in Level
1558  //graph->SetBoundaryNodeMap(gBoundaryNodeMap);
1559  Set(currentLevel, "DofsPerNode", blockdim);
1560  Set(currentLevel, "Graph", graph);
1561 
1562  } //if (doExperimentalWrap) ... else ...
1563 
1564 
1565  } //Build
1566 
1567  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1568  void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MergeRows(const Matrix& A, const LO row, Array<LO>& cols, const Array<LO>& translation) const {
1569  typedef typename ArrayView<const LO>::size_type size_type;
1570 
1571  // extract striding information
1572  LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map
1573  if (A.IsView("stridedMaps") == true) {
1574  Teuchos::RCP<const Map> myMap = A.getRowMap("stridedMaps");
1575  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
1576  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
1577  if (strMap->getStridedBlockId() > -1)
1578  blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1579  }
1580 
1581  // count nonzero entries in all dof rows associated with node row
1582  size_t nnz = 0, pos = 0;
1583  for (LO j = 0; j < blkSize; j++)
1584  nnz += A.getNumEntriesInLocalRow(row*blkSize+j);
1585 
1586  if (nnz == 0) {
1587  cols.resize(0);
1588  return;
1589  }
1590 
1591  cols.resize(nnz);
1592 
1593  // loop over all local dof rows associated with local node "row"
1594  ArrayView<const LO> inds;
1595  ArrayView<const SC> vals;
1596  for (LO j = 0; j < blkSize; j++) {
1597  A.getLocalRowView(row*blkSize+j, inds, vals);
1598  size_type numIndices = inds.size();
1599 
1600  if (numIndices == 0) // skip empty dof rows
1601  continue;
1602 
1603  // cols: stores all local node ids for current local node id "row"
1604  cols[pos++] = translation[inds[0]];
1605  for (size_type k = 1; k < numIndices; k++) {
1606  LO nodeID = translation[inds[k]];
1607  // Here we try to speed up the process by reducing the size of an array
1608  // to sort. This works if the column nonzeros belonging to the same
1609  // node are stored consequently.
1610  if (nodeID != cols[pos-1])
1611  cols[pos++] = nodeID;
1612  }
1613  }
1614  cols.resize(pos);
1615  nnz = pos;
1616 
1617  // Sort and remove duplicates
1618  std::sort(cols.begin(), cols.end());
1619  pos = 0;
1620  for (size_t j = 1; j < nnz; j++)
1621  if (cols[j] != cols[pos])
1622  cols[++pos] = cols[j];
1623  cols.resize(pos+1);
1624  }
1625 
1626  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1627  void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::MergeRowsWithDropping(const Matrix& A, const LO row, const ArrayRCP<const SC>& ghostedDiagVals, SC threshold, Array<LO>& cols, const Array<LO>& translation) const {
1628  typedef typename ArrayView<const LO>::size_type size_type;
1629  typedef Teuchos::ScalarTraits<SC> STS;
1630 
1631  // extract striding information
1632  LO blkSize = A.GetFixedBlockSize(); //< stores the size of the block within the strided map
1633  if (A.IsView("stridedMaps") == true) {
1634  Teuchos::RCP<const Map> myMap = A.getRowMap("stridedMaps");
1635  Teuchos::RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
1636  TEUCHOS_TEST_FOR_EXCEPTION(strMap == null, Exceptions::RuntimeError, "Map is not of type StridedMap");
1637  if (strMap->getStridedBlockId() > -1)
1638  blkSize = Teuchos::as<LO>(strMap->getStridingData()[strMap->getStridedBlockId()]);
1639  }
1640 
1641  // count nonzero entries in all dof rows associated with node row
1642  size_t nnz = 0, pos = 0;
1643  for (LO j = 0; j < blkSize; j++)
1644  nnz += A.getNumEntriesInLocalRow(row*blkSize+j);
1645 
1646  if (nnz == 0) {
1647  cols.resize(0);
1648  return;
1649  }
1650 
1651  cols.resize(nnz);
1652 
1653  // loop over all local dof rows associated with local node "row"
1654  ArrayView<const LO> inds;
1655  ArrayView<const SC> vals;
1656  for (LO j = 0; j < blkSize; j++) {
1657  A.getLocalRowView(row*blkSize+j, inds, vals);
1658  size_type numIndices = inds.size();
1659 
1660  if (numIndices == 0) // skip empty dof rows
1661  continue;
1662 
1663  // cols: stores all local node ids for current local node id "row"
1664  LO prevNodeID = -1;
1665  for (size_type k = 0; k < numIndices; k++) {
1666  LO dofID = inds[k];
1667  LO nodeID = translation[inds[k]];
1668 
1669  // we avoid a square root by using squared values
1670  typename STS::magnitudeType aiiajj = STS::magnitude(threshold*threshold*ghostedDiagVals[dofID]*ghostedDiagVals[row*blkSize+j]); // eps^2 * |a_ii| * |a_jj|
1671  typename STS::magnitudeType aij = STS::magnitude(vals[k]*vals[k]);
1672 
1673  // check dropping criterion
1674  if (aij > aiiajj || (row*blkSize+j == dofID)) {
1675  // accept entry in graph
1676 
1677  // Here we try to speed up the process by reducing the size of an array
1678  // to sort. This works if the column nonzeros belonging to the same
1679  // node are stored consequently.
1680  if (nodeID != prevNodeID) {
1681  cols[pos++] = nodeID;
1682  prevNodeID = nodeID;
1683  }
1684  }
1685  }
1686  }
1687  cols.resize(pos);
1688  nnz = pos;
1689 
1690  // Sort and remove duplicates
1691  std::sort(cols.begin(), cols.end());
1692  pos = 0;
1693  for (size_t j = 1; j < nnz; j++)
1694  if (cols[j] != cols[pos])
1695  cols[++pos] = cols[j];
1696  cols.resize(pos+1);
1697 
1698  return;
1699  }
1700 
1701 
1702 
1703  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1704  Teuchos::RCP<Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> > CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BlockDiagonalize(Level & currentLevel,const RCP<Matrix>& A,bool generate_matrix) const {
1705  typedef Teuchos::ScalarTraits<SC> STS;
1706 
1707  const ParameterList & pL = GetParameterList();
1708  const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
1709  const typename STS::magnitudeType rowSumTol = as<typename STS::magnitudeType>(pL.get<double>("aggregation: row sum drop tol"));
1710 
1711  RCP<LocalOrdinalVector> BlockNumber = Get<RCP<LocalOrdinalVector> >(currentLevel, "BlockNumber");
1712  RCP<LocalOrdinalVector> ghostedBlockNumber;
1713  GetOStream(Statistics1) << "Using BlockDiagonal Graph before dropping (with provided blocking)"<<std::endl;
1714 
1715  // Ghost the column block numbers if we need to
1716  RCP<const Import> importer = A->getCrsGraph()->getImporter();
1717  if(!importer.is_null()) {
1718  SubFactoryMonitor m1(*this, "Block Number import", currentLevel);
1719  ghostedBlockNumber= Xpetra::VectorFactory<LO,LO,GO,NO>::Build(importer->getTargetMap());
1720  ghostedBlockNumber->doImport(*BlockNumber, *importer, Xpetra::INSERT);
1721  }
1722  else {
1723  ghostedBlockNumber = BlockNumber;
1724  }
1725 
1726  // Accessors for block numbers
1727  Teuchos::ArrayRCP<const LO> row_block_number = BlockNumber->getData(0);
1728  Teuchos::ArrayRCP<const LO> col_block_number = ghostedBlockNumber->getData(0);
1729 
1730  // allocate space for the local graph
1731  ArrayRCP<size_t> rows_mat;
1732  ArrayRCP<LO> rows_graph,columns;
1733  ArrayRCP<SC> values;
1734  RCP<CrsMatrixWrap> crs_matrix_wrap;
1735 
1736  if(generate_matrix) {
1737  crs_matrix_wrap = rcp(new CrsMatrixWrap(A->getRowMap(), A->getColMap(), 0));
1738  crs_matrix_wrap->getCrsMatrix()->allocateAllValues(A->getNodeNumEntries(), rows_mat, columns, values);
1739  }
1740  else {
1741  rows_graph.resize(A->getNodeNumRows()+1);
1742  columns.resize(A->getNodeNumEntries());
1743  values.resize(A->getNodeNumEntries());
1744  }
1745 
1746  LO realnnz = 0;
1747  GO numDropped = 0, numTotal = 0;
1748  for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getNodeNumElements()); ++row) {
1749  LO row_block = row_block_number[row];
1750  size_t nnz = A->getNumEntriesInLocalRow(row);
1751  ArrayView<const LO> indices;
1752  ArrayView<const SC> vals;
1753  A->getLocalRowView(row, indices, vals);
1754 
1755  LO rownnz = 0;
1756  for (LO colID = 0; colID < Teuchos::as<LO>(nnz); colID++) {
1757  LO col = indices[colID];
1758  LO col_block = col_block_number[col];
1759 
1760  if(row_block == col_block) {
1761  if(generate_matrix) values[realnnz] = vals[colID];
1762  columns[realnnz++] = col;
1763  rownnz++;
1764  } else
1765  numDropped++;
1766  }
1767  if(generate_matrix) rows_mat[row+1] = realnnz;
1768  else rows_graph[row+1] = realnnz;
1769  }
1770 
1771  ArrayRCP<bool> boundaryNodes = Teuchos::arcp_const_cast<bool>(MueLu::Utilities<SC,LO,GO,NO>::DetectDirichletRows(*A, dirichletThreshold));
1772  if (rowSumTol > 0.)
1773  Utilities::ApplyRowSumCriterion(*A, rowSumTol, boundaryNodes);
1774 
1775 
1776  if(!generate_matrix) {
1777  // We can't resize an Arrayrcp and pass the checks for setAllValues
1778  values.resize(realnnz);
1779  columns.resize(realnnz);
1780  }
1781  numTotal = A->getNodeNumEntries();
1782 
1783  if (GetVerbLevel() & Statistics1) {
1784  GO numLocalBoundaryNodes = 0;
1785  GO numGlobalBoundaryNodes = 0;
1786  for (LO i = 0; i < boundaryNodes.size(); ++i)
1787  if (boundaryNodes[i])
1788  numLocalBoundaryNodes++;
1789  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
1790  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
1791  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
1792 
1793  GO numGlobalTotal, numGlobalDropped;
1794  MueLu_sumAll(comm, numTotal, numGlobalTotal);
1795  MueLu_sumAll(comm, numDropped, numGlobalDropped);
1796  GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1797  if (numGlobalTotal != 0)
1798  GetOStream(Statistics1) << " (" << 100*Teuchos::as<double>(numGlobalDropped)/Teuchos::as<double>(numGlobalTotal) << "%)";
1799  GetOStream(Statistics1) << std::endl;
1800  }
1801 
1802  Set(currentLevel, "Filtering", true);
1803 
1804  if(generate_matrix) {
1805  // NOTE: Trying to use A's Import/Export objects will cause the code to segfault back in Build() with errors on the Import
1806  // if you're using Epetra. I'm not really sure why. By using the Col==Domain and Row==Range maps, we get null Import/Export objects
1807  // here, which is legit, because we never use them anyway.
1808  crs_matrix_wrap->getCrsMatrix()->setAllValues(rows_mat,columns,values);
1809  crs_matrix_wrap->getCrsMatrix()->expertStaticFillComplete(A->getColMap(), A->getRowMap());
1810  }
1811  else {
1812  RCP<GraphBase> graph = rcp(new LWGraph(rows_graph, columns, A->getRowMap(), A->getColMap(), "block-diagonalized graph of A"));
1813  graph->SetBoundaryNodeMap(boundaryNodes);
1814  Set(currentLevel, "Graph", graph);
1815  }
1816 
1817 
1818  Set(currentLevel, "DofsPerNode", 1);
1819  return crs_matrix_wrap;
1820  }
1821 
1822 
1823  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1824  void CoalesceDropFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BlockDiagonalizeGraph(const RCP<GraphBase> & inputGraph, const RCP<LocalOrdinalVector> & ghostedBlockNumber, RCP<GraphBase> & outputGraph, RCP<const Import> & importer) const {
1825  typedef Teuchos::ScalarTraits<SC> STS;
1826 
1827  TEUCHOS_TEST_FOR_EXCEPTION(ghostedBlockNumber.is_null(), Exceptions::RuntimeError, "BlockDiagonalizeGraph(): ghostedBlockNumber is null.");
1828  const ParameterList & pL = GetParameterList();
1829 
1830  const bool localizeColoringGraph = pL.get<bool>("aggregation: coloring: localize color graph");
1831 
1832  GetOStream(Statistics1) << "Using BlockDiagonal Graph after Dropping (with provided blocking)";
1833  if (localizeColoringGraph)
1834  GetOStream(Statistics1) << ", with localization" <<std::endl;
1835  else
1836  GetOStream(Statistics1) << ", without localization" <<std::endl;
1837 
1838  // Accessors for block numbers
1839  Teuchos::ArrayRCP<const LO> row_block_number = ghostedBlockNumber->getData(0);
1840  Teuchos::ArrayRCP<const LO> col_block_number = ghostedBlockNumber->getData(0);
1841 
1842  // allocate space for the local graph
1843  ArrayRCP<size_t> rows_mat;
1844  ArrayRCP<LO> rows_graph,columns;
1845 
1846  rows_graph.resize(inputGraph->GetNodeNumVertices()+1);
1847  columns.resize(inputGraph->GetNodeNumEdges());
1848 
1849  LO realnnz = 0;
1850  GO numDropped = 0, numTotal = 0;
1851  const LO numRows = Teuchos::as<LO>(inputGraph->GetDomainMap()->getNodeNumElements());
1852  if (localizeColoringGraph) {
1853 
1854  for (LO row = 0; row < numRows; ++row) {
1855  LO row_block = row_block_number[row];
1856  ArrayView<const LO> indices = inputGraph->getNeighborVertices(row);
1857 
1858  LO rownnz = 0;
1859  for (LO colID = 0; colID < Teuchos::as<LO>(indices.size()); colID++) {
1860  LO col = indices[colID];
1861  LO col_block = col_block_number[col];
1862 
1863  if((row_block == col_block) && (col < numRows)) {
1864  columns[realnnz++] = col;
1865  rownnz++;
1866  } else
1867  numDropped++;
1868  }
1869  rows_graph[row+1] = realnnz;
1870  }
1871  } else {
1872  // ghosting of boundary node map
1873  Teuchos::ArrayRCP<const bool> boundaryNodes = inputGraph->GetBoundaryNodeMap();
1874  auto boundaryNodesVector = Xpetra::VectorFactory<LocalOrdinal,LocalOrdinal,GlobalOrdinal,Node>::Build(inputGraph->GetDomainMap());
1875  for (size_t i=0; i<inputGraph->GetNodeNumVertices(); i++)
1876  boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i];
1877  // Xpetra::IO<LocalOrdinal,LocalOrdinal,GlobalOrdinal,Node>::Write("boundary",*boundaryNodesVector);
1878  auto boundaryColumnVector = Xpetra::VectorFactory<LocalOrdinal,LocalOrdinal,GlobalOrdinal,Node>::Build(inputGraph->GetImportMap());
1879  boundaryColumnVector->doImport(*boundaryNodesVector,*importer, Xpetra::INSERT);
1880  auto boundaryColumn = boundaryColumnVector->getData(0);
1881 
1882  for (LO row = 0; row < numRows; ++row) {
1883  LO row_block = row_block_number[row];
1884  ArrayView<const LO> indices = inputGraph->getNeighborVertices(row);
1885 
1886  LO rownnz = 0;
1887  for (LO colID = 0; colID < Teuchos::as<LO>(indices.size()); colID++) {
1888  LO col = indices[colID];
1889  LO col_block = col_block_number[col];
1890 
1891  if((row_block == col_block) && ((row == col) || (boundaryColumn[col] == 0))) {
1892  columns[realnnz++] = col;
1893  rownnz++;
1894  } else
1895  numDropped++;
1896  }
1897  rows_graph[row+1] = realnnz;
1898  }
1899  }
1900 
1901  columns.resize(realnnz);
1902  numTotal = inputGraph->GetNodeNumEdges();
1903 
1904  if (GetVerbLevel() & Statistics1) {
1905  RCP<const Teuchos::Comm<int> > comm = inputGraph->GetDomainMap()->getComm();
1906  GO numGlobalTotal, numGlobalDropped;
1907  MueLu_sumAll(comm, numTotal, numGlobalTotal);
1908  MueLu_sumAll(comm, numDropped, numGlobalDropped);
1909  GetOStream(Statistics1) << "Number of dropped entries in block-diagonalized matrix graph: " << numGlobalDropped << "/" << numGlobalTotal;
1910  if (numGlobalTotal != 0)
1911  GetOStream(Statistics1) << " (" << 100*Teuchos::as<double>(numGlobalDropped)/Teuchos::as<double>(numGlobalTotal) << "%)";
1912  GetOStream(Statistics1) << std::endl;
1913  }
1914 
1915  if (localizeColoringGraph) {
1916  outputGraph = rcp(new LWGraph(rows_graph, columns, inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A"));
1917  outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap());
1918  } else {
1919  TEUCHOS_ASSERT(inputGraph->GetDomainMap()->lib() == Xpetra::UseTpetra);
1920 #ifdef HAVE_XPETRA_TPETRA
1921  auto outputGraph2 = rcp(new LWGraph(rows_graph, columns, inputGraph->GetDomainMap(), inputGraph->GetImportMap(), "block-diagonalized graph of A"));
1922 
1923  auto tpGraph = Xpetra::toTpetra(rcp_const_cast<const CrsGraph>(outputGraph2->GetCrsGraph()));
1924  auto sym = rcp(new Tpetra::CrsGraphTransposer<LocalOrdinal,GlobalOrdinal,Node>(tpGraph));
1925  auto tpGraphSym = sym->symmetrize();
1926 
1927  auto rowsSym = tpGraphSym->getNodeRowPtrs();
1928  ArrayRCP<LO> rows_graphSym;
1929  rows_graphSym.resize(rowsSym.size());
1930  for (LO row = 0; row < rowsSym.size(); row++)
1931  rows_graphSym[row] = rowsSym[row];
1932  outputGraph = rcp(new LWGraph(rows_graphSym, tpGraphSym->getNodePackedIndices(), inputGraph->GetDomainMap(), Xpetra::toXpetra(tpGraphSym->getColMap()), "block-diagonalized graph of A"));
1933  outputGraph->SetBoundaryNodeMap(inputGraph->GetBoundaryNodeMap());
1934 #endif
1935  }
1936 
1937  }
1938 
1939 
1940 
1941 } //namespace MueLu
1942 
1943 #endif // MUELU_COALESCEDROPFACTORY_DEF_HPP
#define SET_VALID_ENTRY(name)
#define MueLu_sumAll(rcpComm, in, out)
static void AmalgamateMap(const Map &sourceMap, const Matrix &A, RCP< const Map > &amalgamatedMap, Array< LO > &translation)
Method to create merged map for systems of PDEs.
static const GlobalOrdinal DOFGid2NodeId(GlobalOrdinal gid, LocalOrdinal blockSize, const GlobalOrdinal offset, const GlobalOrdinal indexBase)
Translate global (row/column) id to global amalgamation block id.
void MergeRows(const Matrix &A, const LO row, Array< LO > &cols, const Array< LO > &translation) const
Method to merge rows of matrix for systems of PDEs.
Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > BlockDiagonalize(Level &currentLevel, const RCP< Matrix > &A, bool generate_matrix) const
void DeclareInput(Level &currentLevel) const
Input.
void MergeRowsWithDropping(const Matrix &A, const LO row, const ArrayRCP< const SC > &ghostedDiagVals, SC threshold, Array< LO > &cols, const Array< LO > &translation) const
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void Build(Level &currentLevel) const
Build an object with this factory.
void BlockDiagonalizeGraph(const RCP< GraphBase > &inputGraph, const RCP< LocalOrdinalVector > &ghostedBlockNumber, RCP< GraphBase > &outputGraph, RCP< const Import > &importer) const
Exception indicating invalid cast attempted.
Exception throws to report incompatible objects (like maps).
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
MueLu representation of a compressed row storage graph.
Lightweight MueLu representation of a compressed row storage graph.
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:76
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
MueLu utility class.
static Teuchos::ScalarTraits< Scalar >::magnitudeType Distance2(const Teuchos::Array< Teuchos::ArrayRCP< const Scalar >> &v, LocalOrdinal i0, LocalOrdinal i1)
static Teuchos::ArrayRCP< Magnitude > GetMatrixMaxMinusOffDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
static void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Magnitude rowSumTol, Teuchos::ArrayRCP< bool > &dirichletRows)
static RCP< Xpetra::Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
Namespace for MueLu classes and methods.
@ Warnings0
Important warning messages (one line)
@ Statistics1
Print more statistics.
@ Runtime0
One-liner description of what is happening.
@ Warnings1
Additional warnings.
@ Parameters0
Print class parameters.
DropTol(DropTol &&)=default
DropTol(real_type val_, real_type diag_, LO col_, bool drop_)
DropTol & operator=(DropTol const &)=default
DropTol & operator=(DropTol &&)=default
DropTol(DropTol const &)=default