MueLu  Version of the Day
MueLu_ClassicalPFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_CLASSICALPFACTORY_DEF_HPP
47 #define MUELU_CLASSICALPFACTORY_DEF_HPP
48 
49 #include <Xpetra_MultiVectorFactory.hpp>
50 #include <Xpetra_VectorFactory.hpp>
51 #include <Xpetra_CrsGraphFactory.hpp>
52 #include <Xpetra_Matrix.hpp>
53 #include <Xpetra_Map.hpp>
54 #include <Xpetra_Map.hpp>
55 #include <Xpetra_MapFactory.hpp>
56 #include <Xpetra_Vector.hpp>
57 #include <Xpetra_Import.hpp>
58 #include <Xpetra_ImportUtils.hpp>
59 #include <Xpetra_IO.hpp>
60 #include <Xpetra_StridedMapFactory.hpp>
61 
62 #include <Teuchos_OrdinalTraits.hpp>
63 
64 #include "MueLu_MasterList.hpp"
65 #include "MueLu_Monitor.hpp"
66 #include "MueLu_PerfUtils.hpp"
68 #include "MueLu_ClassicalMapFactory.hpp"
69 #include "MueLu_Utilities.hpp"
70 #include "MueLu_AmalgamationInfo.hpp"
71 #include "MueLu_GraphBase.hpp"
72 
73 
74 //#define CMS_DEBUG
75 //#define CMS_DUMP
76 
77 namespace {
78 
79 template<class SC>
80 int Sign(SC val) {
81  using STS = typename Teuchos::ScalarTraits<SC>;
82  typename STS::magnitudeType MT_ZERO = Teuchos::ScalarTraits<typename STS::magnitudeType>::zero();
83  if(STS::real(val) > MT_ZERO) return 1;
84  else if(STS::real(val) < MT_ZERO) return -1;
85  else return 0;
86 }
87 
88 }// anonymous namepsace
89 
90 namespace MueLu {
91 
92 
93 
94 
95  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
97  RCP<ParameterList> validParamList = rcp(new ParameterList());
98 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
99  SET_VALID_ENTRY("aggregation: deterministic");
100  SET_VALID_ENTRY("aggregation: coloring algorithm");
101  SET_VALID_ENTRY("aggregation: classical scheme");
102 
103  // To know if we need BlockNumber
104  SET_VALID_ENTRY("aggregation: drop scheme");
105  {
106  typedef Teuchos::StringToIntegralParameterEntryValidator<int> validatorType;
107  validParamList->getEntry("aggregation: classical scheme").setValidator(rcp(new validatorType(Teuchos::tuple<std::string>("direct","ext+i","classical modified"), "aggregation: classical scheme")));
108 
109  }
110 
111 #undef SET_VALID_ENTRY
112  validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
113  // validParamList->set< RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory of UnAmalgamationInfo");
114  validParamList->set< RCP<const FactoryBase> >("Graph", null, "Generating factory of the graph");
115  validParamList->set< RCP<const FactoryBase> >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
116  validParamList->set< RCP<const FactoryBase> >("CoarseMap", Teuchos::null, "Generating factory of the CoarseMap");
117  validParamList->set< RCP<const FactoryBase> >("FC Splitting", Teuchos::null, "Generating factory of the FC Splitting");
118  validParamList->set< RCP<const FactoryBase> >("BlockNumber", Teuchos::null, "Generating factory for Block Number");
119  // validParamList->set< RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
120 
121  return validParamList;
122  }
123 
124  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
126  Input(fineLevel, "A");
127  Input(fineLevel, "Graph");
128  Input(fineLevel, "DofsPerNode");
129  // Input(fineLevel, "UnAmalgamationInfo");
130  Input(fineLevel, "CoarseMap");
131  Input(fineLevel, "FC Splitting");
132 
133  const ParameterList& pL = GetParameterList();
134  std::string drop_algo = pL.get<std::string>("aggregation: drop scheme");
135  if (drop_algo.find("block diagonal") != std::string::npos || drop_algo == "signed classical") {
136  Input(fineLevel, "BlockNumber");
137  }
138 
139  }
140 
141  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
143  return BuildP(fineLevel, coarseLevel);
144  }
145 
146  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
148  FactoryMonitor m(*this, "Build", coarseLevel);
149  using STS = Teuchos::ScalarTraits<SC>;
150 
151  // We start by assuming that someone did a reasonable strength of connection
152  // algorithm before we start to get our Graph, DofsPerNode and UnAmalgamationInfo
153 
154  // We begin by getting a MIS (from a graph coloring) and then at that point we need
155  // to start generating entries for the prolongator.
156  RCP<const Matrix> A = Get< RCP<Matrix> >(fineLevel, "A");
157  RCP<const Map> ownedCoarseMap = Get<RCP<const Map> >(fineLevel,"CoarseMap");
158  RCP<const LocalOrdinalVector> owned_fc_splitting = Get<RCP<LocalOrdinalVector> >(fineLevel,"FC Splitting");
159  RCP<const GraphBase> graph = Get< RCP<GraphBase> >(fineLevel, "Graph");
160  // LO nDofsPerNode = Get<LO>(fineLevel, "DofsPerNode");
161  // RCP<AmalgamationInfo> amalgInfo = Get< RCP<AmalgamationInfo> > (fineLevel, "UnAmalgamationInfo");
162  RCP<const Import> Importer = A->getCrsGraph()->getImporter();
163  Xpetra::UnderlyingLib lib = ownedCoarseMap->lib();
164 
165  // RCP<MultiVector> fineNullspace = Get< RCP<MultiVector> > (fineLevel, "Nullspace");
166  RCP<Matrix> P;
167  // SC SC_ZERO = STS::zero();
168  LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
171  const ParameterList& pL = GetParameterList();
172 
173  // FIXME: This guy doesn't work right now for NumPDEs != 1
174  TEUCHOS_TEST_FOR_EXCEPTION(A->GetFixedBlockSize() != 1, Exceptions::RuntimeError,"ClassicalPFactory: Multiple PDEs per node not supported yet");
175 
176  // NOTE: Let's hope we never need to deal with this case
177  TEUCHOS_TEST_FOR_EXCEPTION(!A->getRowMap()->isSameAs(*A->getDomainMap()),Exceptions::RuntimeError,"ClassicalPFactory: MPI Ranks > 1 not supported yet");
178 
179  // Do we need ghosts rows of A and myPointType?
180  std::string scheme = pL.get<std::string>("aggregation: classical scheme");
181  bool need_ghost_rows =false;
182  if(scheme == "ext+i")
183  need_ghost_rows=true;
184  else if(scheme == "direct")
185  need_ghost_rows=false;
186  else if(scheme == "classical modified")
187  need_ghost_rows=true;
188  // NOTE: ParameterList validator will check this guy so we don't really need an "else" here
189 
190  if (GetVerbLevel() & Statistics1) {
191  GetOStream(Statistics1) << "ClassicalPFactory: scheme = "<<scheme<<std::endl;
192  }
193 
194  // Ghost the FC splitting and grab the data (if needed)
195  RCP<const LocalOrdinalVector> fc_splitting;
196  ArrayRCP<const LO> myPointType;
197  if(Importer.is_null()) {
198  fc_splitting = owned_fc_splitting;
199  }
200  else {
201  RCP<LocalOrdinalVector> fc_splitting_nonconst = LocalOrdinalVectorFactory::Build(A->getCrsGraph()->getColMap());
202  fc_splitting_nonconst->doImport(*owned_fc_splitting,*Importer,Xpetra::INSERT);
203  fc_splitting = fc_splitting_nonconst;
204  }
205  myPointType = fc_splitting->getData(0);
206 
207 
208  /* Ghost A (if needed) */
209  RCP<const Matrix> Aghost;
210  RCP<const LocalOrdinalVector> fc_splitting_ghost;
211  ArrayRCP<const LO> myPointType_ghost;
212  RCP<const Import> remoteOnlyImporter;
213  if(need_ghost_rows && !Importer.is_null()){
214  ArrayView<const LO> remoteLIDs = Importer->getRemoteLIDs();
215  size_t numRemote = Importer->getNumRemoteIDs();
216  Array<GO> remoteRows(numRemote);
217  for (size_t i = 0; i < numRemote; i++)
218  remoteRows[i] = Importer->getTargetMap()->getGlobalElement(remoteLIDs[i]);
219 
220  RCP<const Map> remoteRowMap = MapFactory::Build(lib,Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(), remoteRows(),
221  A->getDomainMap()->getIndexBase(), A->getDomainMap()->getComm());
222 
223  remoteOnlyImporter = Importer->createRemoteOnlyImport(remoteRowMap);
224  RCP<const CrsMatrix> Acrs = rcp_dynamic_cast<const CrsMatrixWrap>(A)->getCrsMatrix();
225  RCP<CrsMatrix> Aghost_crs = CrsMatrixFactory::Build(Acrs,*remoteOnlyImporter,A->getDomainMap(),remoteOnlyImporter->getTargetMap());
226  Aghost = rcp(new CrsMatrixWrap(Aghost_crs));
227  // CAG: It seems that this isn't actually needed?
228  // We also may need need to ghost myPointType for Aghost
229  // RCP<const Import> Importer2 = Aghost->getCrsGraph()->getImporter();
230  // if(Importer2.is_null()) {
231  // RCP<LocalOrdinalVector> fc_splitting_ghost_nonconst = LocalOrdinalVectorFactory::Build(Aghost->getColMap());
232  // fc_splitting_ghost_nonconst->doImport(*owned_fc_splitting,*Importer,Xpetra::INSERT);
233  // fc_splitting_ghost = fc_splitting_ghost_nonconst;
234  // myPointType_ghost = fc_splitting_ghost->getData(0);
235  // }
236  }
237 
238  /* Generate the ghosted Coarse map using the "Tuminaro maneuver" (if needed)*/
239  RCP<const Map> coarseMap;
240  if(Importer.is_null())
241  coarseMap = ownedCoarseMap;
242  else {
243  // Generate a domain vector with the coarse ID's as entries for C points
244  GhostCoarseMap(*A,*Importer,myPointType,ownedCoarseMap,coarseMap);
245  }
246 
247  /* Get the block number, if we need it (and ghost it) */
248  RCP<LocalOrdinalVector> BlockNumber;
249  std::string drop_algo = pL.get<std::string>("aggregation: drop scheme");
250  if (drop_algo.find("block diagonal") != std::string::npos || drop_algo == "signed classical") {
251  RCP<LocalOrdinalVector> OwnedBlockNumber;
252  OwnedBlockNumber = Get<RCP<LocalOrdinalVector> >(fineLevel, "BlockNumber");
253  if(Importer.is_null())
254  BlockNumber = OwnedBlockNumber;
255  else{
256  BlockNumber = LocalOrdinalVectorFactory::Build(A->getColMap());
257  BlockNumber->doImport(*OwnedBlockNumber,*Importer,Xpetra::INSERT);
258  }
259  }
260 
261 #if defined(CMS_DEBUG) || defined(CMS_DUMP)
262  {
263  RCP<const CrsMatrix> Acrs = rcp_dynamic_cast<const CrsMatrixWrap>(A)->getCrsMatrix();
264  int rank = A->getRowMap()->getComm()->getRank();
265  printf("[%d] A local size = %dx%d\n",rank,(int)Acrs->getRowMap()->getNodeNumElements(),(int)Acrs->getColMap()->getNodeNumElements());
266 
267  printf("[%d] graph local size = %dx%d\n",rank,(int)graph->GetDomainMap()->getNodeNumElements(),(int)graph->GetImportMap()->getNodeNumElements());
268 
269  std::ofstream ofs(std::string("dropped_graph_") + std::to_string(fineLevel.GetLevelID())+std::string("_") + std::to_string(rank) + std::string(".dat"),std::ofstream::out);
270  RCP<Teuchos::FancyOStream> fancy = Teuchos::fancyOStream(Teuchos::rcpFromRef(ofs));
271  graph->print(*fancy,Debug);
272  std::string out_fc = std::string("fc_splitting_") + std::to_string(fineLevel.GetLevelID()) +std::string("_") + std::to_string(rank)+ std::string(".dat");
273  std::string out_block = std::string("block_number_") + std::to_string(fineLevel.GetLevelID()) +std::string("_") + std::to_string(rank)+ std::string(".dat");
274 
275  // We don't support writing LO vectors in Xpetra (boo!) so....
276  using real_type = typename Teuchos::ScalarTraits<SC>::magnitudeType;
277  using RealValuedMultiVector = typename Xpetra::MultiVector<real_type,LO,GO,NO>;
278  typedef Xpetra::MultiVectorFactory<real_type,LO,GO,NO> RealValuedMultiVectorFactory;
279 
280  RCP<RealValuedMultiVector> mv = RealValuedMultiVectorFactory::Build(fc_splitting->getMap(),1);
281  ArrayRCP<real_type> mv_data= mv->getDataNonConst(0);
282 
283  // FC Splitting
284  ArrayRCP<const LO> fc_data= fc_splitting->getData(0);
285  for(LO i=0; i<(LO)fc_data.size(); i++)
286  mv_data[i] = Teuchos::as<real_type>(fc_data[i]);
287  Xpetra::IO<real_type,LO,GO,NO>::Write(out_fc,*mv);
288 
289  // Block Number
290  if(!BlockNumber.is_null()) {
291  RCP<RealValuedMultiVector> mv2 = RealValuedMultiVectorFactory::Build(BlockNumber->getMap(),1);
292  ArrayRCP<real_type> mv_data2= mv2->getDataNonConst(0);
293  ArrayRCP<const LO> b_data= BlockNumber->getData(0);
294  for(LO i=0; i<(LO)b_data.size(); i++) {
295  mv_data2[i] = Teuchos::as<real_type>(b_data[i]);
296  }
297  Xpetra::IO<real_type,LO,GO,NO>::Write(out_block,*mv2);
298  }
299  }
300 
301 
302 #endif
303 
304 
305  /* Generate reindexing arrays */
306  // Note: cpoint2pcol is ghosted if myPointType is
307  // NOTE: Since the ghosted coarse column map follows the ordering of
308  // the fine column map, this *should* work, because it is in local indices.
309  // pcol2cpoint - Takes a LCID for P and turns in into an LCID for A.
310  // cpoint2pcol - Takes a LCID for A --- if it is a C Point --- and turns in into an LCID for P.
311  Array<LO> cpoint2pcol(myPointType.size(),LO_INVALID);
312  Array<LO> pcol2cpoint(coarseMap->getNodeNumElements(),LO_INVALID);
313  LO num_c_points = 0;
314  LO num_f_points =0;
315  for(LO i=0; i<(LO) myPointType.size(); i++) {
316  if(myPointType[i] == C_PT) {
317  cpoint2pcol[i] = num_c_points;
318  num_c_points++;
319  }
320  else if (myPointType[i] == F_PT)
321  num_f_points++;
322  }
323  for(LO i=0; i<(LO)cpoint2pcol.size(); i++) {
324  if(cpoint2pcol[i] != LO_INVALID)
325  pcol2cpoint[cpoint2pcol[i]] =i;
326  }
327 
328  // Generate edge strength flags (this will make everything easier later)
329  // These do *not* need to be ghosted (unlike A)
330  Teuchos::Array<size_t> eis_rowptr;
331  Teuchos::Array<bool> edgeIsStrong;
332  {
333  SubFactoryMonitor sfm(*this,"Strength Flags",coarseLevel);
334  GenerateStrengthFlags(*A,*graph,eis_rowptr,edgeIsStrong);
335  }
336 
337  // Phase 3: Generate the P matrix
338  RCP<const Map> coarseColMap = coarseMap;
339  RCP<const Map> coarseDomainMap = ownedCoarseMap;
340  if(scheme == "ext+i") {
341  SubFactoryMonitor sfm(*this,"Ext+i Interpolation",coarseLevel);
342  Coarsen_Ext_Plus_I(*A,Aghost,*graph,coarseColMap,coarseDomainMap,num_c_points,num_f_points,myPointType(),myPointType_ghost(),cpoint2pcol,pcol2cpoint,eis_rowptr,edgeIsStrong,BlockNumber,P);
343  }
344  else if(scheme == "direct") {
345  SubFactoryMonitor sfm(*this,"Direct Interpolation",coarseLevel);
346  Coarsen_Direct(*A,Aghost,*graph,coarseColMap,coarseDomainMap,num_c_points,num_f_points,myPointType(),myPointType_ghost(),cpoint2pcol,pcol2cpoint,eis_rowptr,edgeIsStrong,BlockNumber,P);
347  }
348  else if(scheme == "classical modified") {
349  SubFactoryMonitor sfm(*this,"Classical Modified Interpolation",coarseLevel);
350  Coarsen_ClassicalModified(*A,Aghost,*graph,coarseColMap,coarseDomainMap,num_c_points,num_f_points,myPointType(),myPointType_ghost(),cpoint2pcol,pcol2cpoint,eis_rowptr,edgeIsStrong,BlockNumber,remoteOnlyImporter,P);
351  }
352  // NOTE: ParameterList validator will check this guy so we don't really need an "else" here
353 
354 #ifdef CMS_DEBUG
355  Xpetra::IO<SC,LO,GO,NO>::Write("classical_p.mat."+std::to_string(coarseLevel.GetLevelID()), *P);
356  // Xpetra::IO<SC,LO,GO,NO>::WriteLocal("classical_p.mat."+std::to_string(coarseLevel.GetLevelID()), *P);
357 #endif
358 
359  // Save output
360  Set(coarseLevel,"P",P);
361  RCP<const CrsGraph> pg = P->getCrsGraph();
362  Set(coarseLevel,"P Graph",pg);
363 
364  //RCP<MultiVector> coarseNullspace = MultiVectorFactory::Build(coarseMap, fineNullspace->getNumVectors());
365  // P->apply(*fineNullspace, *coarseNullspace, Teuchos::TRANS, Teuchos::ScalarTraits<SC>::one(), Teuchos::ScalarTraits<SC>::zero());
366  // Set(coarseLevel, "Nullspace", coarseNullspace);
367 
368  if (IsPrint(Statistics1)) {
369  RCP<ParameterList> params = rcp(new ParameterList());
370  params->set("printLoadBalancingInfo", true);
371  GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*P, "P", params);
372  }
373  }
374 /* ************************************************************************* */
375 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
377 Coarsen_ClassicalModified(const Matrix & A,const RCP<const Matrix> & Aghost, const GraphBase & graph, RCP<const Map> & coarseColMap, RCP<const Map> & coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView<const LO> & myPointType, const Teuchos::ArrayView<const LO> & myPointType_ghost, const Teuchos::Array<LO> & cpoint2pcol, const Teuchos::Array<LO> & pcol2cpoint, Teuchos::Array<size_t> & eis_rowptr, Teuchos::Array<bool> & edgeIsStrong, RCP<LocalOrdinalVector> & BlockNumber, RCP<const Import> remoteOnlyImporter,RCP<Matrix> & P) const {
378  /* ============================================================= */
379  /* Phase 3 : Classical Modified Interpolation */
380  /* De Sterck, Falgout, Nolting and Yang. "Distance-two */
381  /* interpolation for parallel algebraic multigrid", NLAA 2008 */
382  /* 15:115-139 */
383  /* ============================================================= */
384  /* Definitions: */
385  /* F = F-points */
386  /* C = C-points */
387  /* N_i = non-zero neighbors of node i */
388  /* S_i = {j\in N_i | j strongly influences i } [strong neighbors of i] */
389  /* F_i^s = F \cap S_i [strong F-neighbors of i] */
390  /* C_i^s = C \cap S_i [strong C-neighbors of i] */
391 
392  /* N_i^w = N_i\ (F_i^s \cup C_i^s) [weak neighbors of i] */
393  /* This guy has a typo. The paper had a \cap instead of \cup */
394  /* I would note that this set can contain both F-points and */
395  /* C-points. They're just weak neighbors of this guy. */
396  /* Note that N_i^w \cup F_i^s \cup C_i^s = N_i by construction */
397 
398 
399  /* \bar{a}_ij = { 0, if sign(a_ij) == sign(a_ii) */
400  /* { a_ij, otherwise */
401 
402  /* F_i^s\star = {k\in N_i | C_i^s \cap C_k^s = \emptyset} */
403  /* [set of F-neighbors of i that do not share a strong */
404  /* C-neighbor with i] */
405 
406 
407  /* Rewritten Equation (9) on p. 120 */
408  /* \tilde{a}_ii = (a_ij + \sum_{k\in{N_i^w \cup F_i^s\star}} a_ik */
409  /* */
410  /* f_ij = \sum_{k\in{F_i^s\setminusF_i^s*}} \frac{a_ik \bar{a}_kj}{\sum_{m\inC_i^s \bar{a}_km}} */
411  /* */
412  /* w_ij = \frac{1}{\tilde{a}_ii} ( a_ij + f_ij) for all j in C_i^s */
413 
414  // const point_type F_PT = ClassicalMapFactory::F_PT;
416  const point_type DIRICHLET_PT = ClassicalMapFactory::DIRICHLET_PT;
417  using STS = typename Teuchos::ScalarTraits<SC>;
418  LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
419  // size_t ST_INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
420  SC SC_ZERO = STS::zero();
421 #ifdef CMS_DEBUG
422  int rank = A.getRowMap()->getComm()->getRank();
423 #endif
424 
425  // Get the block number if we have it.
426  ArrayRCP<const LO> block_id;
427  if(!BlockNumber.is_null())
428  block_id = BlockNumber->getData(0);
429 
430  // Initial (estimated) allocation
431  // NOTE: If we only used Tpetra, then we could use these guys as is, but because Epetra, we can't, so there
432  // needs to be a copy below.
433  size_t Nrows = A.getNodeNumRows();
434  double c_point_density = (double)num_c_points / (num_c_points+num_f_points);
435  double mean_strong_neighbors_per_row = (double) graph.GetNodeNumEdges() / graph.GetNodeNumVertices();
436  // double mean_neighbors_per_row = (double)A.getNodeNumEntries() / Nrows;
437  double nnz_per_row_est = c_point_density*mean_strong_neighbors_per_row;
438 
439  size_t nnz_est = std::max(Nrows,std::min((size_t)A.getNodeNumEntries(),(size_t)(nnz_per_row_est*Nrows)));
440  Array<size_t> tmp_rowptr(Nrows+1);
441  Array<LO> tmp_colind(nnz_est);
442 
443  // Algorithm (count+realloc)
444  // For each row, i,
445  // - Count the number of elements in \hat{C}_j, aka [C-neighbors and C-neighbors of strong F-neighbors of i]
446  size_t ct=0;
447  for(LO row=0; row < (LO) Nrows; row++) {
448  size_t row_start = eis_rowptr[row];
449  ArrayView<const LO> indices;
450  ArrayView<const SC> vals;
451  std::set<LO> C_hat;
452  if(myPointType[row] == DIRICHLET_PT) {
453  // Dirichlet points get ignored completely
454  }
455  else if(myPointType[row] == C_PT) {
456  // C-Points get a single 1 in their row
457  C_hat.insert(cpoint2pcol[row]);
458  }
459  else {
460  // F-Points have a more complicated interpolation
461 
462  // C-neighbors of row
463  A.getLocalRowView(row, indices, vals);
464  for(LO j=0; j<indices.size(); j++)
465  if(myPointType[indices[j]] == C_PT && edgeIsStrong[row_start + j])
466  C_hat.insert(cpoint2pcol[indices[j]]);
467  }// end else
468 
469  // Realloc if needed
470  if(ct + (size_t)C_hat.size() > (size_t)tmp_colind.size()) {
471  tmp_colind.resize(std::max(ct+(size_t)C_hat.size(),(size_t)2*tmp_colind.size()));
472  }
473 
474  // Copy
475  std::copy(C_hat.begin(), C_hat.end(),tmp_colind.begin()+ct);
476  ct+=C_hat.size();
477  tmp_rowptr[row+1] = tmp_rowptr[row] + C_hat.size();
478  }
479  // Resize down
480  tmp_colind.resize(tmp_rowptr[Nrows]);
481 
482  RCP<CrsMatrix> Pcrs = CrsMatrixFactory::Build(A.getRowMap(),coarseColMap,0);
483  ArrayRCP<size_t> P_rowptr;
484  ArrayRCP<LO> P_colind;
485  ArrayRCP<SC> P_values;
486 
487  if (A.getRowMap()->lib() == Xpetra::UseTpetra) {
488  P_rowptr = Teuchos::arcpFromArray(tmp_rowptr);
489  P_colind = Teuchos::arcpFromArray(tmp_colind);
490  P_values.resize(P_rowptr[Nrows]);
491  } else {
492  // Make the matrix and then get the graph out of it (necessary for Epetra)
493  // NOTE: The lack of an Epetra_CrsGraph::ExpertStaticFillComplete makes this
494  // impossible to do the obvious way
495  Pcrs->allocateAllValues(tmp_rowptr[Nrows],P_rowptr,P_colind,P_values);
496  std::copy(tmp_rowptr.begin(),tmp_rowptr.end(), P_rowptr.begin());
497  std::copy(tmp_colind.begin(),tmp_colind.end(), P_colind.begin());
498  Pcrs->setAllValues(P_rowptr,P_colind,P_values);
499  Pcrs->expertStaticFillComplete(/*domain*/coarseDomainMap, /*range*/A.getDomainMap());
500  }
501 
502  // Generate a remote-ghosted version of the graph (if we're in parallel)
503  RCP<const CrsGraph> Pgraph;
504  RCP<const CrsGraph> Pghost;
505  // TODO: We might want to be more efficient here and actually use
506  // Pgraph in the matrix constructor.
507  ArrayRCP<const size_t> Pghost_rowptr;
508  ArrayRCP<const LO> Pghost_colind;
509  if(!remoteOnlyImporter.is_null()) {
510  if (A.getRowMap()->lib() == Xpetra::UseTpetra) {
511  RCP<CrsGraph> tempPgraph = CrsGraphFactory::Build(A.getRowMap(),coarseColMap,P_rowptr,P_colind);
512  tempPgraph->fillComplete(coarseDomainMap,A.getDomainMap());
513  Pgraph = tempPgraph;
514  } else {
515  // Epetra does not have a graph constructor that uses rowptr and colind.
516  Pgraph = Pcrs->getCrsGraph();
517  }
518  TEUCHOS_ASSERT(!Pgraph.is_null());
519  Pghost = CrsGraphFactory::Build(Pgraph,*remoteOnlyImporter,Pgraph->getDomainMap(),remoteOnlyImporter->getTargetMap());
520  Pghost->getAllIndices(Pghost_rowptr,Pghost_colind);
521  }
522 
523  // Gustavson-style perfect hashing
524  ArrayRCP<LO> Acol_to_Pcol(A.getColMap()->getNodeNumElements(),LO_INVALID);
525 
526  // Get a quick reindexing array from Pghost LCIDs to P LCIDs
527  ArrayRCP<LO> Pghostcol_to_Pcol;
528  if(!Pghost.is_null()) {
529  Pghostcol_to_Pcol.resize(Pghost->getColMap()->getNodeNumElements(),LO_INVALID);
530  for(LO i=0; i<(LO) Pghost->getColMap()->getNodeNumElements(); i++)
531  Pghostcol_to_Pcol[i] = Pgraph->getColMap()->getLocalElement(Pghost->getColMap()->getGlobalElement(i));
532  }//end Pghost
533 
534  // Get a quick reindexing array from Aghost LCIDs to A LCIDs
535  ArrayRCP<LO> Aghostcol_to_Acol;
536  if(!Aghost.is_null()) {
537  Aghostcol_to_Acol.resize(Aghost->getColMap()->getNodeNumElements(),LO_INVALID);
538  for(LO i=0; i<(LO)Aghost->getColMap()->getNodeNumElements(); i++)
539  Aghostcol_to_Acol[i] = A.getColMap()->getLocalElement(Aghost->getColMap()->getGlobalElement(i));
540  }//end Aghost
541 
542 
543  // Algorithm (numeric)
544  for(LO i=0; i < (LO)Nrows; i++) {
545  if(myPointType[i] == DIRICHLET_PT) {
546  // Dirichlet points get ignored completely
547 #ifdef CMS_DEBUG
548  // DEBUG
549  printf("[%d] ** A(%d,:) is a Dirichlet-Point.\n",rank,i);
550 #endif
551  }
552  else if (myPointType[i] == C_PT) {
553  // C Points get a single 1 in their row
554  P_values[P_rowptr[i]] = Teuchos::ScalarTraits<SC>::one();
555 #ifdef CMS_DEBUG
556  // DEBUG
557  printf("[%d] ** A(%d,:) is a C-Point.\n",rank,i);
558 #endif
559  }
560  else {
561  // F Points get all of the fancy stuff
562 #ifdef CMS_DEBUG
563  // DEBUG
564  printf("[%d] ** A(%d,:) is a F-Point.\n",rank,i);
565 #endif
566 
567  // Get all of the relevant information about this row
568  ArrayView<const LO> A_indices_i, A_indices_k;
569  ArrayView<const SC> A_vals_i, A_vals_k;
570  A.getLocalRowView(i, A_indices_i, A_vals_i);
571  size_t row_start = eis_rowptr[i];
572 
573  ArrayView<const LO> P_indices_i = P_colind.view(P_rowptr[i],P_rowptr[i+1] - P_rowptr[i]);
574  ArrayView<SC> P_vals_i = P_values.view(P_rowptr[i],P_rowptr[i+1] - P_rowptr[i]);
575 
576  // FIXME: Do we need this?
577  for(LO j=0; j<(LO)P_vals_i.size(); j++)
578  P_vals_i[j] = SC_ZERO;
579 
580  // Stash the hash: Flag any strong C-points with their index into P_colind
581  // NOTE: We'll consider any points that are LO_INVALID or less than P_rowptr[i] as not strong C-points
582  for(LO j=0; j<(LO)P_indices_i.size(); j++) {
583  Acol_to_Pcol[pcol2cpoint[P_indices_i[j]]] = P_rowptr[i] + j;
584  }
585 
586  // Loop over the entries in the row
587  SC first_denominator = SC_ZERO;
588 #ifdef CMS_DEBUG
589  SC a_ii = SC_ZERO;
590 #endif
591  for(LO k0=0; k0<(LO)A_indices_i.size(); k0++) {
592  LO k = A_indices_i[k0];
593  SC a_ik = A_vals_i[k0];
594  LO pcol_k = Acol_to_Pcol[k];
595 
596  if(k == i) {
597  // Case A: Diagonal value (add to first denominator)
598  // FIXME: Add BlockNumber matching here
599  first_denominator += a_ik;
600 #ifdef CMS_DEBUG
601  a_ii = a_ik;
602  printf("- A(%d,%d) is the diagonal\n",i,k);
603 #endif
604 
605  }
606  else if(myPointType[k] == DIRICHLET_PT) {
607  // Case B: Ignore dirichlet connections completely
608 #ifdef CMS_DEBUG
609  printf("- A(%d,%d) is a Dirichlet point\n",i,k);
610 #endif
611 
612  }
613  else if(pcol_k != LO_INVALID && pcol_k >= (LO)P_rowptr[i]) {
614  // Case C: a_ik is strong C-Point (goes directly into the weight)
615  P_values[pcol_k] += a_ik;
616 #ifdef CMS_DEBUG
617  printf("- A(%d,%d) is a strong C-Point\n",i,k);
618 #endif
619  }
620  else if (!edgeIsStrong[row_start + k0]) {
621  // Case D: Weak non-Dirichlet neighbor (add to first denominator)
622  if(block_id.size() == 0 || block_id[i] == block_id[k]) {
623  first_denominator += a_ik;
624 #ifdef CMS_DEBUG
625  printf("- A(%d,%d) is weak adding to diagonal(%d,%d) (%6.4e)\n",i,k,block_id[i],block_id[k],a_ik);
626  }
627  else {
628  printf("- A(%d,%d) is weak but does not match blocks (%d,%d), discarding\n",i,k,block_id[i],block_id[k]);
629 #endif
630  }
631 
632  }
633  else {//Case E
634  // Case E: Strong F-Point (adds to the first denominator if we don't share a
635  // a strong C-Point with i; adds to the second denominator otherwise)
636 #ifdef CMS_DEBUG
637  printf("- A(%d,%d) is a strong F-Point\n",i,k);
638 #endif
639 
640  SC a_kk = SC_ZERO;
641  SC second_denominator = SC_ZERO;
642  int sign_akk = 0;
643 
644  if(k < (LO)Nrows) {
645  // Grab the diagonal a_kk
646  A.getLocalRowView(k, A_indices_k, A_vals_k);
647  for(LO m0=0; m0<(LO)A_indices_k.size(); m0++){
648  LO m = A_indices_k[m0];
649  if(k == m) {
650  a_kk = A_vals_k[m0];
651  break;
652  }
653  }//end for A_indices_k
654 
655  // Compute the second denominator term
656  sign_akk = Sign(a_kk);
657  for(LO m0=0; m0<(LO)A_indices_k.size(); m0++){
658  LO m = A_indices_k[m0];
659  if(m != k && Acol_to_Pcol[A_indices_k[m0]] >= (LO)P_rowptr[i]) {
660  SC a_km = A_vals_k[m0];
661  second_denominator+= (Sign(a_km) == sign_akk ? SC_ZERO : a_km);
662  }
663  }//end for A_indices_k
664 
665  // Now we have the second denominator, for this particular strong F point.
666  // So we can now add the sum to the w_ij components for the P values
667  if(second_denominator != SC_ZERO) {
668  for(LO j0=0; j0<(LO)A_indices_k.size(); j0++) {
669  LO j = A_indices_k[j0];
670  // NOTE: Row k should be in fis_star, so I should have to check for diagonals here
671  // printf("Acol_to_Pcol[%d] = %d P_values.size() = %d\n",j,Acol_to_Pcol[j],(int)P_values.size());
672  if(Acol_to_Pcol[j] >= (LO)P_rowptr[i]) {
673  SC a_kj = A_vals_k[j0];
674  SC sign_akj_val = sign_akk == Sign(a_kj) ? SC_ZERO : a_kj;
675  P_values[Acol_to_Pcol[j]] += a_ik * sign_akj_val / second_denominator;
676 #ifdef CMS_DEBUG
677  printf("- - Unscaled P(%d,A-%d) += %6.4e = %5.4e\n",i,j,a_ik * sign_akj_val / second_denominator,P_values[Acol_to_Pcol[j]]);
678 #endif
679  }
680  }//end for A_indices_k
681  }//end if second_denominator != 0
682  else {
683 #ifdef CMS_DEBUG
684  printf("- - A(%d,%d) second denominator is zero\n",i,k);
685 #endif
686  if(block_id.size() == 0 || block_id[i] == block_id[k])
687  first_denominator += a_ik;
688  }//end else second_denominator != 0
689  }// end if k < Nrows
690  else {
691  // Ghost row
692  LO kless = k-Nrows;
693  // Grab the diagonal a_kk
694  // NOTE: ColMap is not locally fitted to the RowMap
695  // so we need to check GIDs here
696  Aghost->getLocalRowView(kless, A_indices_k, A_vals_k);
697  GO k_g = Aghost->getRowMap()->getGlobalElement(kless);
698  for(LO m0=0; m0<(LO)A_indices_k.size(); m0++){
699  GO m_g = Aghost->getColMap()->getGlobalElement(A_indices_k[m0]);
700  if(k_g == m_g) {
701  a_kk = A_vals_k[m0];
702  break;
703  }
704  }//end for A_indices_k
705 
706  // Compute the second denominator term
707  sign_akk = Sign(a_kk);
708  for(LO m0=0; m0<(LO)A_indices_k.size(); m0++){
709  GO m_g = Aghost->getColMap()->getGlobalElement(A_indices_k[m0]);
710  LO mghost = A_indices_k[m0];//Aghost LCID
711  LO m = Aghostcol_to_Acol[mghost]; //A's LID (could be LO_INVALID)
712  if(m_g != k_g && m != LO_INVALID && Acol_to_Pcol[m] >= (LO)P_rowptr[i]) {
713  SC a_km = A_vals_k[m0];
714  second_denominator+= (Sign(a_km) == sign_akk ? SC_ZERO : a_km);
715  }
716  }//end for A_indices_k
717 
718 
719  // Now we have the second denominator, for this particular strong F point.
720  // So we can now add the sum to the w_ij components for the P values
721  if(second_denominator != SC_ZERO) {
722  for(LO j0=0; j0<(LO)A_indices_k.size(); j0++){
723  LO jghost = A_indices_k[j0];//Aghost LCID
724  LO j = Aghostcol_to_Acol[jghost]; //A's LID (could be LO_INVALID)
725  // NOTE: Row k should be in fis_star, so I should have to check for diagonals here
726  if((j != LO_INVALID) && (Acol_to_Pcol[j] >= (LO)P_rowptr[i])) {
727  SC a_kj = A_vals_k[j0];
728  SC sign_akj_val = sign_akk == Sign(a_kj) ? SC_ZERO : a_kj;
729  P_values[Acol_to_Pcol[j]] += a_ik * sign_akj_val / second_denominator;
730 #ifdef CMS_DEBUG
731  printf("- - Unscaled P(%d,A-%d) += %6.4e\n",i,j,a_ik * sign_akj_val / second_denominator);
732 #endif
733  }
734 
735  }//end for A_indices_k
736  }//end if second_denominator != 0
737  else {
738 #ifdef CMS_DEBUG
739  printf("- - A(%d,%d) second denominator is zero\n",i,k);
740 #endif
741  if(block_id.size() == 0 || block_id[i] == block_id[k])
742  first_denominator += a_ik;
743  }//end else second_denominator != 0
744  }//end else k < Nrows
745  }//end else Case A,...,E
746 
747  }//end for A_indices_i
748 
749  // Now, downscale by the first_denominator
750  if(first_denominator != SC_ZERO) {
751  for(LO j0=0; j0<(LO)P_indices_i.size(); j0++) {
752 #ifdef CMS_DEBUG
753  SC old_pij = P_vals_i[j0];
754  P_vals_i[j0] /= -first_denominator;
755  printf("P(%d,%d) = %6.4e = %6.4e / (%6.4e + %6.4e)\n",i,P_indices_i[j0],P_vals_i[j0],old_pij,a_ii,first_denominator - a_ii);
756 #else
757  P_vals_i[j0] /= -first_denominator;
758 #endif
759  }//end for P_indices_i
760  }//end if first_denominator != 0
761 
762  }//end else C-Point
763 
764  }// end if i < Nrows
765 
766  // Finish up
767  Pcrs->setAllValues(P_rowptr,P_colind,P_values);
768  Pcrs->expertStaticFillComplete(/*domain*/coarseDomainMap, /*range*/A.getDomainMap());
769  // Wrap from CrsMatrix to Matrix
770  P = rcp(new CrsMatrixWrap(Pcrs));
771 
772 }//end Coarsen_ClassicalModified
773 
774 
775 /* ************************************************************************* */
776 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
778 Coarsen_Direct(const Matrix & A,const RCP<const Matrix> & Aghost, const GraphBase & graph, RCP<const Map> & coarseColMap, RCP<const Map> & coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView<const LO> & myPointType, const Teuchos::ArrayView<const LO> & myPointType_ghost, const Teuchos::Array<LO> & cpoint2pcol, const Teuchos::Array<LO> & pcol2cpoint, Teuchos::Array<size_t> & eis_rowptr, Teuchos::Array<bool> & edgeIsStrong, RCP<LocalOrdinalVector> & BlockNumber, RCP<Matrix> & P) const {
779  /* ============================================================= */
780  /* Phase 3 : Direct Interpolation */
781  /* We do not use De Sterck, Falgout, Nolting and Yang (2008) */
782  /* here. Instead we follow: */
783  /* Trottenberg, Oosterlee and Schueller, Multigrid, 2001. */
784  /* with some modifications inspirted by PyAMG */
785  /* ============================================================= */
786  /* Definitions: */
787  /* F = F-points */
788  /* C = C-points */
789  /* N_i = non-zero neighbors of node i */
790  /* S_i = {j\in N_i | j strongly influences i } [strong neighbors of i] */
791  /* F_i^s = F \cap S_i [strong F-neighbors of i] */
792  /* C_i^s = C \cap S_i [strong C-neighbors of i] */
793  /* P_i = Set of interpolatory variables for row i [here = C_i^s] */
794 
795  /* (A.2.17) from p. 426 */
796  /* a_ij^- = { a_ij, if a_ij < 0 */
797  /* { 0, otherwise */
798  /* a_ij^+ = { a_ij, if a_ij > 0 */
799  /* { 0, otherwise */
800  /* P_i^- = P_i \cap {k | a_ij^- != 0 and a_ij^- = a_ij} */
801  /* [strong C-neighbors with negative edges] */
802  /* P_i^+ = P_i \cap {k | a_ij^+ != 0 and a_ij^+ = a_ij} */
803  /* [strong C-neighbors with positive edges] */
804 
805 
806  /* de Sterck et al., gives us this: */
807  /* Rewritten Equation (6) on p. 119 */
808  /* w_ij = - a_ji / a_ii \frac{\sum_{k\in N_i} a_ik} {\sum k\inC_i^s} a_ik}, j\in C_i^s */
809 
810  /* Trottenberg et al. (A.7.6) and (A.7.7) on p. 479 gives this: */
811  /* alpha_i = \frac{ \sum_{j\in N_i} a_ij^- }{ \sum_{k\in P_i} a_ik^- } */
812  /* beta_i = \frac{ \sum_{j\in N_i} a_ij^+ }{ \sum_{k\in P_i} a_ik^+ } */
813  /* w_ik = { - alpha_i (a_ik / a_ii), if k\in P_i^- */
814  /* { - beta_i (a_ik / a_ii), if k\in P_i^+ */
815  /* NOTE: The text says to modify, if P_i^+ is zero but it isn't entirely clear how that */
816  /* works. We'll follow the PyAMG implementation in a few important ways. */
817 
819  const point_type DIRICHLET_PT = ClassicalMapFactory::DIRICHLET_PT;
820 
821  // Initial (estimated) allocation
822  // NOTE: If we only used Tpetra, then we could use these guys as is, but because Epetra, we can't, so there
823  // needs to be a copy below.
824  using STS = typename Teuchos::ScalarTraits<SC>;
825  using MT = typename STS::magnitudeType;
826  using MTS = typename Teuchos::ScalarTraits<MT>;
827  size_t Nrows = A.getNodeNumRows();
828  double c_point_density = (double)num_c_points / (num_c_points+num_f_points);
829  double mean_strong_neighbors_per_row = (double) graph.GetNodeNumEdges() / graph.GetNodeNumVertices();
830  // double mean_neighbors_per_row = (double)A.getNodeNumEntries() / Nrows;
831  double nnz_per_row_est = c_point_density*mean_strong_neighbors_per_row;
832 
833  size_t nnz_est = std::max(Nrows,std::min((size_t)A.getNodeNumEntries(),(size_t)(nnz_per_row_est*Nrows)));
834  SC SC_ZERO = STS::zero();
835  MT MT_ZERO = MTS::zero();
836  Array<size_t> tmp_rowptr(Nrows+1);
837  Array<LO> tmp_colind(nnz_est);
838 
839  // Algorithm (count+realloc)
840  // For each row, i,
841  // - Count the number of elements in \hat{C}_j, aka [C-neighbors and C-neighbors of strong F-neighbors of i]
842  size_t ct=0;
843  for(LO row=0; row < (LO) Nrows; row++) {
844  size_t row_start = eis_rowptr[row];
845  ArrayView<const LO> indices;
846  ArrayView<const SC> vals;
847  std::set<LO> C_hat;
848  if(myPointType[row] == DIRICHLET_PT) {
849  // Dirichlet points get ignored completely
850  }
851  else if(myPointType[row] == C_PT) {
852  // C-Points get a single 1 in their row
853  C_hat.insert(cpoint2pcol[row]);
854  }
855  else {
856  // F-Points have a more complicated interpolation
857 
858  // C-neighbors of row
859  A.getLocalRowView(row, indices, vals);
860  for(LO j=0; j<indices.size(); j++)
861  if(myPointType[indices[j]] == C_PT && edgeIsStrong[row_start + j])
862  C_hat.insert(cpoint2pcol[indices[j]]);
863  }// end else
864 
865  // Realloc if needed
866  if(ct + (size_t)C_hat.size() > (size_t)tmp_colind.size()) {
867  tmp_colind.resize(std::max(ct+(size_t)C_hat.size(),(size_t)2*tmp_colind.size()));
868  }
869 
870  // Copy
871  std::copy(C_hat.begin(), C_hat.end(),tmp_colind.begin()+ct);
872  ct+=C_hat.size();
873  tmp_rowptr[row+1] = tmp_rowptr[row] + C_hat.size();
874  }
875  // Resize down
876  tmp_colind.resize(tmp_rowptr[Nrows]);
877 
878  // Allocate memory & copy
879  P = rcp(new CrsMatrixWrap(A.getRowMap(), coarseColMap, 0));
880  RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
881  ArrayRCP<size_t> P_rowptr;
882  ArrayRCP<LO> P_colind;
883  ArrayRCP<SC> P_values;
884 
885 #ifdef CMS_DEBUG
886 printf("CMS: Allocating P w/ %d nonzeros\n",(int)tmp_rowptr[Nrows]);
887 #endif
888  PCrs->allocateAllValues(tmp_rowptr[Nrows], P_rowptr, P_colind, P_values);
889  TEUCHOS_TEST_FOR_EXCEPTION(tmp_rowptr.size() !=P_rowptr.size(), Exceptions::RuntimeError,"ClassicalPFactory: Allocation size error (rowptr)");
890  TEUCHOS_TEST_FOR_EXCEPTION(tmp_colind.size() !=P_colind.size(), Exceptions::RuntimeError,"ClassicalPFactory: Allocation size error (colind)");
891  // FIXME: This can be short-circuited for Tpetra, if we decide we care
892  for(LO i=0; i<(LO)Nrows+1; i++)
893  P_rowptr[i] = tmp_rowptr[i];
894  for(LO i=0; i<(LO)tmp_rowptr[Nrows]; i++)
895  P_colind[i] = tmp_colind[i];
896 
897 
898  // Algorithm (numeric)
899  for(LO i=0; i < (LO)Nrows; i++) {
900  if(myPointType[i] == DIRICHLET_PT) {
901  // Dirichlet points get ignored completely
902 #ifdef CMS_DEBUG
903  // DEBUG
904  printf("** A(%d,:) is a Dirichlet-Point.\n",i);
905 #endif
906  }
907  else if (myPointType[i] == C_PT) {
908  // C Points get a single 1 in their row
909  P_values[P_rowptr[i]] = Teuchos::ScalarTraits<SC>::one();
910 #ifdef CMS_DEBUG
911  // DEBUG
912  printf("** A(%d,:) is a C-Point.\n",i);
913 #endif
914  }
915  else {
916  /* Trottenberg et al. (A.7.6) and (A.7.7) on p. 479 gives this: */
917  /* alpha_i = \frac{ \sum_{j\in N_i} a_ij^- }{ \sum_{k\in P_i} a_ik^- } */
918  /* beta_i = \frac{ \sum_{j\in N_i} a_ij^+ }{ \sum_{k\in P_i} a_ik^+ } */
919  /* w_ik = { - alpha_i (a_ik / a_ii), if k\in P_i^- */
920  /* { - beta_i (a_ik / a_ii), if k\in P_i^+ */
921  ArrayView<const LO> A_indices_i, A_incides_k;
922  ArrayView<const SC> A_vals_i, A_indices_k;
923  A.getLocalRowView(i, A_indices_i, A_vals_i);
924  size_t row_start = eis_rowptr[i];
925 
926  ArrayView<LO> P_indices_i = P_colind.view(P_rowptr[i],P_rowptr[i+1] - P_rowptr[i]);
927  ArrayView<SC> P_vals_i = P_values.view(P_rowptr[i],P_rowptr[i+1] - P_rowptr[i]);
928 
929 #ifdef CMS_DEBUG
930  // DEBUG
931  {
932  char mylabel[5]="FUCD";
933  char sw[3]="ws";
934  printf("** A(%d,:) = ",i);
935  for(LO j=0; j<(LO)A_indices_i.size(); j++){
936  printf("%6.4e(%d-%c%c) ",A_vals_i[j],A_indices_i[j],mylabel[1+myPointType[A_indices_i[j]]],sw[(int)edgeIsStrong[row_start+j]]);
937  }
938  printf("\n");
939  }
940 #endif
941 
942  SC a_ii = SC_ZERO;
943  SC pos_numerator = SC_ZERO, neg_numerator = SC_ZERO;
944  SC pos_denominator = SC_ZERO, neg_denominator = SC_ZERO;
945  // Find the diagonal and compute the sum ratio
946  for(LO j=0; j<(LO)A_indices_i.size(); j++) {
947  SC a_ik = A_vals_i[j];
948  LO k = A_indices_i[j];
949 
950  // Diagonal
951  if(i == k) {
952  a_ii = a_ik;
953  }
954  // Only strong C-neighbors are in the denomintor
955  if(myPointType[k] == C_PT && edgeIsStrong[row_start + j]) {
956  if(STS::real(a_ik) > MT_ZERO) pos_denominator += a_ik;
957  else neg_denominator += a_ik;
958  }
959 
960  // All neighbors are in the numerator
961  // NOTE: As per PyAMG, this does not include the diagonal
962  if(i != k) {
963  if(STS::real(a_ik) > MT_ZERO) pos_numerator += a_ik;
964  else neg_numerator += a_ik;
965  }
966  }
967  SC alpha = (neg_denominator == MT_ZERO) ? SC_ZERO : (neg_numerator / neg_denominator);
968  SC beta = (pos_denominator == MT_ZERO) ? SC_ZERO : (pos_numerator / pos_denominator);
969  alpha /= -a_ii;
970  beta /= -a_ii;
971 
972  // Loop over the entries
973  for(LO p_j=0; p_j<(LO)P_indices_i.size(); p_j++){
974  LO P_col = pcol2cpoint[P_indices_i[p_j]];
975  SC a_ij = SC_ZERO;
976 
977  // Find A_ij (if it is there)
978  // FIXME: We can optimize this if we assume sorting
979  for(LO a_j =0; a_j<(LO)A_indices_i.size(); a_j++) {
980  if(A_indices_i[a_j] == P_col) {
981  a_ij = A_vals_i[a_j];
982  break;
983  }
984  }
985  SC w_ij = (STS::real(a_ij) < 0 ) ? (alpha * a_ij) : (beta * a_ij);
986 #ifdef CMS_DEBUG
987  SC alpha_or_beta = (STS::real(a_ij) < 0 ) ? alpha : beta;
988  printf("P(%d,%d/%d) = - %6.4e * %6.4e = %6.4e\n",i,P_indices_i[p_j],pcol2cpoint[P_indices_i[p_j]],alpha_or_beta,a_ij,w_ij);
989 #endif
990  P_vals_i[p_j] = w_ij;
991  }//end for A_indices_i
992  }//end else C_PT
993  }//end for Numrows
994 
995  // Finish up
996  PCrs->setAllValues(P_rowptr, P_colind, P_values);
997  PCrs->expertStaticFillComplete(/*domain*/coarseDomainMap, /*range*/A.getDomainMap());
998 }
999 
1000 
1001 /* ************************************************************************* */
1002 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1004 Coarsen_Ext_Plus_I(const Matrix & A,const RCP<const Matrix> & Aghost, const GraphBase & graph, RCP<const Map> & coarseColMap, RCP<const Map> & coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView<const LO> & myPointType, const Teuchos::ArrayView<const LO> & myPointType_ghost, const Teuchos::Array<LO> & cpoint2pcol, const Teuchos::Array<LO> & pcol2cpoint, Teuchos::Array<size_t> & eis_rowptr, Teuchos::Array<bool> & edgeIsStrong, RCP<LocalOrdinalVector> & BlockNumber, RCP<Matrix> & P) const {
1005 
1006  /* ============================================================= */
1007  /* Phase 3 : Extended+i Interpolation */
1008  /* De Sterck, Falgout, Nolting and Yang. "Distance-two */
1009  /* interpolation for parallel algebraic multigrid", NLAA 2008 */
1010  /* 15:115-139 */
1011  /* ============================================================= */
1012  /* Definitions: */
1013  /* F = F-points */
1014  /* C = C-points */
1015  /* N_i = non-zero neighbors of node i */
1016  /* S_i = {j\in N_i | j strongly influences i } [strong neighbors of i] */
1017  /* F_i^s = F \cap S_i [strong F-neighbors of i] */
1018  /* C_i^s = C \cap S_i [strong C-neighbors of i] */
1019  /* N_i^w = N_i\ (F_i^s \cup C_i^s) [weak neighbors of i] */
1020  /* This guy has a typo. The paper had a \cap instead of \cup */
1021  /* I would note that this set can contain both F-points and */
1022  /* C-points. They're just weak neighbors of this guy. */
1023  /* Note that N_i^w \cup F_i^s \cup C_i^s = N_i by construction */
1024 
1025  /* \hat{C}_i = C_i \cup (\bigcup_{j\inF_i^s} C_j) */
1026  /* [C-neighbors and C-neighbors of strong F-neighbors of i] */
1027  /* */
1028 
1029  /* \bar{a}_ij = { 0, if sign(a_ij) == sign(a_ii) */
1030  /* { a_ij, otherwise */
1031 
1032 
1033  /* Rewritten Equation (19) on p. 123 */
1034  /* f_ik = \frac{\bar{a}_kj}{\sum{l\in \hat{C}_i\cup {i}} \bar{a}_kl */
1035  /* w_ij = -\tilde{a}_ii^{-1} (a_ij + \sum_{k\inF_i^s} a_ik f_ik */
1036  /* for j in \hat{C}_i */
1037 
1038  /* Rewritten Equation (20) on p. 124 [for the lumped diagonal] */
1039  /* g_ik = \frac{\bar{a}_ki}{\sum{l\in \hat{C}_i\cup {i}} \bar{a}_kl */
1040  /* \tilde{a}_ii = a_ii + \sum_{n\inN_i^w\setminus \hat{C}_i} a_in + \sum_{k\inF_i^s} a_ik g_ik */
1041  TEUCHOS_TEST_FOR_EXCEPTION(1,std::runtime_error,"ClassicalPFactory: Ext+i not implemented");
1042 
1043 }
1044 
1045 
1046 
1047 
1048 /* ************************************************************************* */
1049 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1051 GenerateStrengthFlags(const Matrix & A,const GraphBase & graph, Teuchos::Array<size_t> & eis_rowptr,Teuchos::Array<bool> & edgeIsStrong) const {
1052  // To make this easier, we'll create a bool array equal to the nnz in the matrix
1053  // so we know whether each edge is strong or not. This will save us a bunch of
1054  // trying to match the graph and matrix later
1055  size_t Nrows = A.getNodeNumRows();
1056  eis_rowptr.resize(Nrows+1);
1057 
1058  if(edgeIsStrong.size() == 0) {
1059  // Preferred
1060  edgeIsStrong.resize(A.getNodeNumEntries(),false);
1061  }
1062  else {
1063  edgeIsStrong.resize(A.getNodeNumEntries(),false);
1064  edgeIsStrong.assign(A.getNodeNumEntries(),false);
1065  }
1066 
1067  eis_rowptr[0] = 0;
1068  for (LO i=0; i<(LO)Nrows; i++) {
1069  LO rowstart = eis_rowptr[i];
1070  ArrayView<const LO> A_indices;
1071  ArrayView<const SC> A_values;
1072  A.getLocalRowView(i, A_indices, A_values);
1073  LO A_size = (LO) A_indices.size();
1074 
1075  ArrayView<const LO> G_indices = graph.getNeighborVertices(i);
1076  LO G_size = (LO) G_indices.size();
1077 
1078  // Both of these guys should be in the same (sorted) order, but let's check
1079  bool is_ok=true;
1080  for(LO j=0; j<A_size-1; j++)
1081  if (A_indices[j] >= A_indices[j+1]) { is_ok=false; break;}
1082  for(LO j=0; j<G_size-1; j++)
1083  if (G_indices[j] >= G_indices[j+1]) { is_ok=false; break;}
1084  TEUCHOS_TEST_FOR_EXCEPTION(!is_ok, Exceptions::RuntimeError,"ClassicalPFactory: Exected A and Graph to be sorted");
1085 
1086  // Now cycle through and set the flags - if the edge is in G it is strong
1087  for(LO g_idx=0, a_idx=0; g_idx < G_size; g_idx++) {
1088  LO col = G_indices[g_idx];
1089  while (A_indices[a_idx] != col && a_idx < A_size) a_idx++;
1090  if(a_idx == A_size) {is_ok=false;break;}
1091  edgeIsStrong[rowstart+a_idx] = true;
1092  }
1093 
1094  eis_rowptr[i+1] = eis_rowptr[i] + A_size;
1095  }
1096 }
1097 
1098 
1099 /* ************************************************************************* */
1100 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
1102 GhostCoarseMap(const Matrix &A, const Import & Importer, const ArrayRCP<const LO> myPointType, const RCP<const Map> & coarseMap, RCP<const Map> & coarseColMap) const {
1104  const GO GO_INVALID = Teuchos::OrdinalTraits<GO>::invalid();
1105  RCP<GlobalOrdinalVector> d_coarseIds = GlobalOrdinalVectorFactory::Build(A.getRowMap());
1106  ArrayRCP<GO> d_data = d_coarseIds->getDataNonConst(0);
1107  LO ct=0;
1108 
1109  for(LO i=0; i<(LO)d_data.size(); i++) {
1110  if(myPointType[i] == C_PT) {
1111  d_data[i] = coarseMap->getGlobalElement(ct);
1112  ct++;
1113  }
1114  else
1115  d_data[i] = GO_INVALID;
1116  }
1117 
1118  // Ghost this guy
1119  RCP<GlobalOrdinalVector> c_coarseIds = GlobalOrdinalVectorFactory::Build(A.getColMap());
1120  c_coarseIds->doImport(*d_coarseIds,Importer,Xpetra::INSERT);
1121 
1122  // If we assume that A is in Aztec ordering, then any subset of A's unknowns will
1123  // be in Aztec ordering as well, which means we can just condense these guys down
1124  // Overallocate, count and view
1125  ArrayRCP<GO> c_data = c_coarseIds->getDataNonConst(0);
1126 
1127  Array<GO> c_gids(c_data.size());
1128  LO count=0;
1129 
1130  for(LO i=0; i<(LO)c_data.size(); i++) {
1131  if(c_data[i] != GO_INVALID) {
1132  c_gids[count] = c_data[i];
1133  count++;
1134  }
1135  }
1136  // FIXME: Assumes scalar PDE
1137  std::vector<size_t> stridingInfo_(1);
1138  stridingInfo_[0]=1;
1139  GO domainGIDOffset = 0;
1140 
1141  coarseColMap = StridedMapFactory::Build(coarseMap->lib(),
1142  Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
1143  c_gids.view(0,count),
1144  coarseMap->getIndexBase(),
1145  stridingInfo_,
1146  coarseMap->getComm(),
1147  domainGIDOffset);
1148 
1149 }
1150 
1151 
1152 } //namespace MueLu
1153 
1154 
1155 
1156 #define MUELU_CLASSICALPFACTORY_SHORT
1157 #endif // MUELU_CLASSICALPFACTORY_DEF_HPP
1158 
1159 
#define SET_VALID_ENTRY(name)
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
void Coarsen_Direct(const Matrix &A, const RCP< const Matrix > &Aghost, const GraphBase &graph, RCP< const Map > &coarseColMap, RCP< const Map > &coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView< const LO > &myPointType, const Teuchos::ArrayView< const LO > &myPointType_ghost, const Teuchos::Array< LO > &cpoint2pcol, const Teuchos::Array< LO > &pcol2cpoint, Teuchos::Array< size_t > &eis_rowptr, Teuchos::Array< bool > &edgeIsStrong, RCP< LocalOrdinalVector > &BlockNumber, RCP< Matrix > &P) const
void GenerateStrengthFlags(const Matrix &A, const GraphBase &graph, Teuchos::Array< size_t > &eis_rowptr, Teuchos::Array< bool > &edgeIsStrong) const
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
void Coarsen_Ext_Plus_I(const Matrix &A, const RCP< const Matrix > &Aghost, const GraphBase &graph, RCP< const Map > &coarseColMap, RCP< const Map > &coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView< const LO > &myPointType, const Teuchos::ArrayView< const LO > &myPointType_ghost, const Teuchos::Array< LO > &cpoint2pcol, const Teuchos::Array< LO > &pcol2cpoint, Teuchos::Array< size_t > &eis_rowptr, Teuchos::Array< bool > &edgeIsStrong, RCP< LocalOrdinalVector > &BlockNumber, RCP< Matrix > &P) const
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
typename ClassicalMapFactory::point_type point_type
void Coarsen_ClassicalModified(const Matrix &A, const RCP< const Matrix > &Aghost, const GraphBase &graph, RCP< const Map > &coarseColMap, RCP< const Map > &coarseDomainMap, LO num_c_points, LO num_f_points, const Teuchos::ArrayView< const LO > &myPointType, const Teuchos::ArrayView< const LO > &myPointType_ghost, const Teuchos::Array< LO > &cpoint2pcol, const Teuchos::Array< LO > &pcol2cpoint, Teuchos::Array< size_t > &eis_rowptr, Teuchos::Array< bool > &edgeIsStrong, RCP< LocalOrdinalVector > &BlockNumber, RCP< const Import > remoteOnlyImporter, RCP< Matrix > &P) const
void GhostCoarseMap(const Matrix &A, const Import &Importer, const ArrayRCP< const LO > myPointType, const RCP< const Map > &coarseMap, RCP< const Map > &coarseColMap) const
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
MueLu representation of a graph.
virtual Teuchos::ArrayView< const LocalOrdinal > getNeighborVertices(LocalOrdinal v) const =0
Return the list of vertices adjacent to the vertex 'v'.
virtual size_t GetNodeNumEdges() const =0
Return number of edges owned by the calling node.
virtual size_t GetNodeNumVertices() const =0
Return number of vertices owned by the calling node.
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:76
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
Namespace for MueLu classes and methods.
@ Debug
Print additional debugging information.
@ Statistics1
Print more statistics.