MueLu  Version of the Day
MueLu_SemiCoarsenPFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_SEMICOARSENPFACTORY_DEF_HPP
47 #define MUELU_SEMICOARSENPFACTORY_DEF_HPP
48 
49 #include <stdlib.h>
50 
51 #include <Teuchos_LAPACK.hpp>
52 
53 #include <Xpetra_CrsMatrixWrap.hpp>
54 #include <Xpetra_ImportFactory.hpp>
55 #include <Xpetra_Matrix.hpp>
56 #include <Xpetra_MultiVectorFactory.hpp>
57 #include <Xpetra_VectorFactory.hpp>
58 
61 
62 #include "MueLu_MasterList.hpp"
63 #include "MueLu_Monitor.hpp"
64 
65 namespace MueLu {
66 
67  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
69  RCP<ParameterList> validParamList = rcp(new ParameterList());
70 
71 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
72  SET_VALID_ENTRY("semicoarsen: piecewise constant");
73  SET_VALID_ENTRY("semicoarsen: coarsen rate");
74 #undef SET_VALID_ENTRY
75  validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
76  validParamList->set< RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
77  validParamList->set< RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory for coordinates");
78 
79  validParamList->set< RCP<const FactoryBase> >("LineDetection_VertLineIds", Teuchos::null, "Generating factory for LineDetection vertical line ids");
80  validParamList->set< RCP<const FactoryBase> >("LineDetection_Layers", Teuchos::null, "Generating factory for LineDetection layer ids");
81  validParamList->set< RCP<const FactoryBase> >("CoarseNumZLayers", Teuchos::null, "Generating factory for number of coarse z-layers");
82 
83  return validParamList;
84  }
85 
86  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
88  Input(fineLevel, "A");
89  Input(fineLevel, "Nullspace");
90 
91  Input(fineLevel, "LineDetection_VertLineIds");
92  Input(fineLevel, "LineDetection_Layers");
93  Input(fineLevel, "CoarseNumZLayers");
94 
95  // check whether fine level coordinate information is available.
96  // If yes, request the fine level coordinates and generate coarse coordinates
97  // during the Build call
98  if (fineLevel.GetLevelID() == 0) {
99  if (fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
100  fineLevel.DeclareInput("Coordinates", NoFactory::get(), this);
101  bTransferCoordinates_ = true;
102  }
103  } else if (bTransferCoordinates_ == true){
104  // on coarser levels we check the default factory providing "Coordinates"
105  // or the factory declared to provide "Coordinates"
106  // first, check which factory is providing coordinate information
107  RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
108  if (myCoordsFact == Teuchos::null) { myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates"); }
109  if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
110  fineLevel.DeclareInput("Coordinates", myCoordsFact.get(), this);
111  }
112  }
113  }
114 
115  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
117  return BuildP(fineLevel, coarseLevel);
118  }
119 
120  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
122  FactoryMonitor m(*this, "Build", coarseLevel);
123 
124  // obtain general variables
125  RCP<Matrix> A = Get< RCP<Matrix> > (fineLevel, "A");
126  RCP<MultiVector> fineNullspace = Get< RCP<MultiVector> > (fineLevel, "Nullspace");
127 
128  // get user-provided coarsening rate parameter (constant over all levels)
129  const ParameterList& pL = GetParameterList();
130  LO CoarsenRate = as<LO>(pL.get<int>("semicoarsen: coarsen rate"));
131 
132  // collect general input data
133  LO BlkSize = A->GetFixedBlockSize();
134  RCP<const Map> rowMap = A->getRowMap();
135  LO Ndofs = rowMap->getNodeNumElements();
136  LO Nnodes = Ndofs/BlkSize;
137 
138  // collect line detection information generated by the LineDetectionFactory instance
139  LO FineNumZLayers = Get< LO >(fineLevel, "CoarseNumZLayers");
140  Teuchos::ArrayRCP<LO> TVertLineId = Get< Teuchos::ArrayRCP<LO> > (fineLevel, "LineDetection_VertLineIds");
141  Teuchos::ArrayRCP<LO> TLayerId = Get< Teuchos::ArrayRCP<LO> > (fineLevel, "LineDetection_Layers");
142  LO* VertLineId = TVertLineId.getRawPtr();
143  LO* LayerId = TLayerId.getRawPtr();
144 
145  // generate transfer operator with semicoarsening
146  RCP<const Map> theCoarseMap;
147  RCP<Matrix> P;
148  RCP<MultiVector> coarseNullspace;
149  GO Ncoarse = MakeSemiCoarsenP(Nnodes,FineNumZLayers,CoarsenRate,LayerId,VertLineId,
150  BlkSize, A, P, theCoarseMap, fineNullspace,coarseNullspace);
151 
152  // set StridingInformation of P
153  if (A->IsView("stridedMaps") == true)
154  P->CreateView("stridedMaps", A->getRowMap("stridedMaps"), theCoarseMap);
155  else
156  P->CreateView("stridedMaps", P->getRangeMap(), theCoarseMap);
157 
158  if (pL.get<bool>("semicoarsen: piecewise constant"))
159  RevertToPieceWiseConstant(P, BlkSize);
160 
161  // Store number of coarse z-layers on the coarse level container
162  // This information is used by the LineDetectionAlgorithm
163  // TODO get rid of the NoFactory
164 
165  LO CoarseNumZLayers = FineNumZLayers*Ncoarse;
166  CoarseNumZLayers /= Ndofs;
167  coarseLevel.Set("NumZLayers", Teuchos::as<LO>(CoarseNumZLayers), MueLu::NoFactory::get());
168 
169  // store semicoarsening transfer on coarse level
170  Set(coarseLevel, "P", P);
171 
172  Set(coarseLevel, "Nullspace", coarseNullspace);
173 
174  // transfer coordinates
175  if(bTransferCoordinates_) {
176  typedef Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::coordinateType,LO,GO,NO> xdMV;
177  RCP<xdMV> fineCoords = Teuchos::null;
178  if (fineLevel.GetLevelID() == 0 &&
179  fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
180  fineCoords = fineLevel.Get< RCP<xdMV> >("Coordinates", NoFactory::get());
181  } else {
182  RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
183  if (myCoordsFact == Teuchos::null) { myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates"); }
184  if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
185  fineCoords = fineLevel.Get< RCP<xdMV> >("Coordinates", myCoordsFact.get());
186  }
187  }
188 
189  TEUCHOS_TEST_FOR_EXCEPTION(fineCoords==Teuchos::null, Exceptions::RuntimeError, "No Coordinates found provided by the user.");
190 
191  TEUCHOS_TEST_FOR_EXCEPTION(fineCoords->getNumVectors() != 3, Exceptions::RuntimeError, "Three coordinates arrays must be supplied if line detection orientation not given.");
192  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> x = fineCoords->getDataNonConst(0);
193  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> y = fineCoords->getDataNonConst(1);
194  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> z = fineCoords->getDataNonConst(2);
195 
196  // determine the maximum and minimum z coordinate value on the current processor.
197  typename Teuchos::ScalarTraits<Scalar>::coordinateType zval_max = -Teuchos::ScalarTraits<typename Teuchos::ScalarTraits<Scalar>::coordinateType>::one() / Teuchos::ScalarTraits<typename Teuchos::ScalarTraits<Scalar>::coordinateType>::sfmin();
198  typename Teuchos::ScalarTraits<Scalar>::coordinateType zval_min = Teuchos::ScalarTraits<typename Teuchos::ScalarTraits<Scalar>::coordinateType>::one() / Teuchos::ScalarTraits<typename Teuchos::ScalarTraits<Scalar>::coordinateType>::sfmin();
199  for ( auto it = z.begin(); it != z.end(); ++it) {
200  if(*it > zval_max) zval_max = *it;
201  if(*it < zval_min) zval_min = *it;
202  }
203 
204  LO myCoarseZLayers = Teuchos::as<LO>(CoarseNumZLayers);
205 
206  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> myZLayerCoords = Teuchos::arcp<typename Teuchos::ScalarTraits<Scalar>::coordinateType>(myCoarseZLayers);
207  if(myCoarseZLayers == 1) {
208  myZLayerCoords[0] = zval_min;
209  } else {
210  typename Teuchos::ScalarTraits<Scalar>::coordinateType dz = (zval_max-zval_min)/(myCoarseZLayers-1);
211  for(LO k = 0; k<myCoarseZLayers; ++k) {
212  myZLayerCoords[k] = k*dz;
213  }
214  }
215 
216  // Note, that the coarse level node coordinates have to be in vertical ordering according
217  // to the numbering of the vertical lines
218 
219  // number of vertical lines on current node:
220  LO numVertLines = Nnodes / FineNumZLayers;
221  LO numLocalCoarseNodes = numVertLines * myCoarseZLayers;
222 
223  RCP<const Map> coarseCoordMap =
224  MapFactory::Build (fineCoords->getMap()->lib(),
225  Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
226  Teuchos::as<size_t>(numLocalCoarseNodes),
227  fineCoords->getMap()->getIndexBase(),
228  fineCoords->getMap()->getComm());
229  RCP<xdMV> coarseCoords = Xpetra::MultiVectorFactory<typename Teuchos::ScalarTraits<Scalar>::coordinateType,LO,GO,NO>::Build(coarseCoordMap, fineCoords->getNumVectors());
230  coarseCoords->putScalar(-1.0);
231  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cx = coarseCoords->getDataNonConst(0);
232  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cy = coarseCoords->getDataNonConst(1);
233  ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cz = coarseCoords->getDataNonConst(2);
234 
235  // loop over all vert line indices (stop as soon as possible)
236  LO cntCoarseNodes = 0;
237  for( LO vt = 0; vt < TVertLineId.size(); ++vt) {
238  //vertical line id in *vt
239  LO curVertLineId = TVertLineId[vt];
240 
241  if(cx[curVertLineId * myCoarseZLayers] == -1.0 &&
242  cy[curVertLineId * myCoarseZLayers] == -1.0) {
243  // loop over all local myCoarseZLayers
244  for (LO n=0; n<myCoarseZLayers; ++n) {
245  cx[curVertLineId * myCoarseZLayers + n] = x[vt];
246  cy[curVertLineId * myCoarseZLayers + n] = y[vt];
247  cz[curVertLineId * myCoarseZLayers + n] = myZLayerCoords[n];
248  }
249  cntCoarseNodes += myCoarseZLayers;
250  }
251 
252  TEUCHOS_TEST_FOR_EXCEPTION(cntCoarseNodes > numLocalCoarseNodes, Exceptions::RuntimeError, "number of coarse nodes is inconsistent.");
253  if(cntCoarseNodes == numLocalCoarseNodes) break;
254  }
255 
256  // set coarse level coordinates
257  Set(coarseLevel, "Coordinates", coarseCoords);
258  } /* end bool bTransferCoordinates */
259 
260  }
261 
262  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
264  /*
265  * Given the number of points in the z direction (PtsPerLine) and a
266  * coarsening rate (CoarsenRate), determine which z-points will serve
267  * as Cpts and return the total number of Cpts.
268  *
269  * Input
270  * PtsPerLine: Number of fine level points in the z direction
271  *
272  * CoarsenRate: Roughly, number of Cpts = (PtsPerLine+1)/CoarsenRate - 1
273  *
274  * Thin: Must be either 0 or 1. Thin decides what to do when
275  * (PtsPerLine+1)/CoarsenRate is not an integer.
276  *
277  * Thin == 0 ==> ceil() the above fraction
278  * Thin == 1 ==> floor() the above fraction
279  *
280  * Output
281  * LayerCpts Array where LayerCpts[i] indicates that the
282  * LayerCpts[i]th fine level layer is a Cpt Layer.
283  * Note: fine level layers are assumed to be numbered starting
284  * a one.
285  */
286  typename Teuchos::ScalarTraits<Scalar>::coordinateType temp, RestStride, di;
287  LO NCpts, i;
288  LO NCLayers = -1;
289  LO FirstStride;
290 
291  temp = ((typename Teuchos::ScalarTraits<Scalar>::coordinateType) (PtsPerLine+1))/((typename Teuchos::ScalarTraits<Scalar>::coordinateType) (CoarsenRate)) - 1.0;
292  if (Thin == 1) NCpts = (LO) ceil(temp);
293  else NCpts = (LO) floor(temp);
294 
295  TEUCHOS_TEST_FOR_EXCEPTION(PtsPerLine == 1, Exceptions::RuntimeError, "SemiCoarsenPFactory::FindCpts: cannot coarsen further.");
296 
297  if (NCpts < 1) NCpts = 1;
298 
299  FirstStride= (LO) ceil( ((typename Teuchos::ScalarTraits<Scalar>::coordinateType) PtsPerLine+1)/( (typename Teuchos::ScalarTraits<Scalar>::coordinateType) (NCpts+1)));
300  RestStride = ((typename Teuchos::ScalarTraits<Scalar>::coordinateType) (PtsPerLine-FirstStride+1))/((typename Teuchos::ScalarTraits<Scalar>::coordinateType) NCpts);
301 
302  NCLayers = (LO) floor((((typename Teuchos::ScalarTraits<Scalar>::coordinateType) (PtsPerLine-FirstStride+1))/RestStride)+.00001);
303 
304  TEUCHOS_TEST_FOR_EXCEPTION(NCLayers != NCpts, Exceptions::RuntimeError, "sizes do not match.");
305 
306  di = (typename Teuchos::ScalarTraits<Scalar>::coordinateType) FirstStride;
307  for (i = 1; i <= NCpts; i++) {
308  (*LayerCpts)[i] = (LO) floor(di);
309  di += RestStride;
310  }
311 
312  return(NCLayers);
313  }
314 
315 #define MaxHorNeighborNodes 75
316 
317  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
319  MakeSemiCoarsenP(LO const Ntotal, LO const nz, LO const CoarsenRate, LO const LayerId[],
320  LO const VertLineId[], LO const DofsPerNode, RCP<Matrix> & Amat, RCP<Matrix>& P,
321  RCP<const Map>& coarseMap, const RCP<MultiVector> fineNullspace,
322  RCP<MultiVector>& coarseNullspace ) const {
323 
324  /*
325  * Given a CSR matrix (OrigARowPtr, OrigAcols, OrigAvals), information
326  * describing the z-layer and vertical line (LayerId and VertLineId)
327  * of each matrix block row, a coarsening rate, and dofs/node information,
328  * construct a prolongator that coarsening to semicoarsening in the z-direction
329  * using something like an operator-dependent grid transfer. In particular,
330  * matrix stencils are collapsed to vertical lines. Thus, each vertical line
331  * gives rise to a block tridiagonal matrix. BlkRows corresponding to
332  * Cpts are replaced by identity matrices. This tridiagonal is solved
333  * to determine each interpolation basis functions. Each Blk Rhs corresponds
334  * to all zeros except at the corresponding C-pt which has an identity
335  *
336  * On termination, return the number of local prolongator columns owned by
337  * this processor.
338  *
339  * Note: This code was adapted from a matlab code where offsets/arrays
340  * start from 1. In most parts of the code, this 1 offset is kept
341  * (in some cases wasting the first element of the array). The
342  * input and output matrices of this function has been changed to
343  * have offsets/rows/columns which start from 0. LayerId[] and
344  * VertLineId[] currently start from 1.
345  *
346  * Input
347  * =====
348  * Ntotal Number of fine level Blk Rows owned by this processor
349  *
350  * nz Number of vertical layers. Note: partitioning must be done
351  * so that each processor owns an entire vertical line. This
352  * means that nz is the global number of layers, which should
353  * be equal to the local number of layers.
354  * CoarsenRate Rate of z semicoarsening. Smoothed aggregation-like coarsening
355  * would correspond to CoarsenRate = 3.
356  * LayerId Array from 0 to Ntotal-1 + Ghost. LayerId(BlkRow) gives the
357  * layer number associated with the dofs within BlkRow.
358  * VertLineId Array from 1 to Ntotal, VertLineId(BlkRow) gives a unique
359  * vertical line id (from 0 to Ntotal/nz-1) of BlkRow. All
360  * BlkRows associated with nodes along the same vertical line
361  * in the mesh should have the same LineId.
362  * DofsPerNode Number of degrees-of-freedom per mesh node.
363  *
364  * OrigARowPtr, CSR arrays corresponding to the fine level matrix.
365  * OrigAcols,
366  * OrigAvals
367  *
368  * Output
369  * =====
370  * ParamPptr, CSR arrays corresponding to the final prolongation matrix.
371  * ParamPcols,
372  * ParamsPvals
373  */
374  int NLayers, NVertLines, MaxNnz, NCLayers, MyLine, MyLayer;
375  int *InvLineLayer=NULL, *CptLayers=NULL, StartLayer, NStencilNodes;
376  int BlkRow, dof_j, node_k, *Sub2FullMap=NULL, RowLeng;
377  int i, j, iii, col, count, index, loc, PtRow, PtCol;
378  SC *BandSol=NULL, *BandMat=NULL, TheSum;
379  int *IPIV=NULL, KL, KU, KLU, N, NRHS, LDAB,INFO;
380  int *Pcols;
381  size_t *Pptr;
382  SC *Pvals;
383  int MaxStencilSize, MaxNnzPerRow;
384  LO *LayDiff=NULL;
385  int CurRow, LastGuy = -1, NewPtr;
386  int Ndofs;
387  int Nghost;
388  LO *Layerdofs = NULL, *Col2Dof = NULL;
389 
390  Teuchos::LAPACK<LO,SC> lapack;
391 
392  char notrans[3];
393  notrans[0] = 'N';
394  notrans[1] = 'N';
395 
396 
397  MaxNnzPerRow = MaxHorNeighborNodes*DofsPerNode*3;
398  Teuchos::ArrayRCP<LO> TLayDiff = Teuchos::arcp<LO>(1+MaxNnzPerRow); LayDiff = TLayDiff.getRawPtr();
399 
400  Nghost = Amat->getColMap()->getNodeNumElements() - Amat->getDomainMap()->getNodeNumElements();
401  if (Nghost < 0) Nghost = 0;
402  Teuchos::ArrayRCP<LO> TLayerdofs= Teuchos::arcp<LO>(Ntotal*DofsPerNode+Nghost+1); Layerdofs = TLayerdofs.getRawPtr();
403  Teuchos::ArrayRCP<LO> TCol2Dof= Teuchos::arcp<LO>(Ntotal*DofsPerNode+Nghost+1); Col2Dof= TCol2Dof.getRawPtr();
404 
405  RCP<Xpetra::Vector<LO,LO,GO,NO> > localdtemp = Xpetra::VectorFactory<LO,LO,GO,NO>::Build(Amat->getDomainMap());
406  RCP<Xpetra::Vector<LO,LO,GO,NO> > dtemp = Xpetra::VectorFactory<LO,LO,GO,NO>::Build(Amat->getColMap());
407  ArrayRCP<LO> valptr= localdtemp->getDataNonConst(0);
408 
409  for (i = 0; i < Ntotal*DofsPerNode; i++)
410  valptr[i]= LayerId[i/DofsPerNode];
411  valptr=ArrayRCP<LO>();
412 
413  RCP< const Import> importer;
414  importer = Amat->getCrsGraph()->getImporter();
415  if (importer == Teuchos::null) {
416  importer = ImportFactory::Build(Amat->getDomainMap(), Amat->getColMap());
417  }
418  dtemp->doImport(*localdtemp, *(importer), Xpetra::INSERT);
419 
420  valptr= dtemp->getDataNonConst(0);
421  for (i = 0; i < Ntotal*DofsPerNode+Nghost; i++) Layerdofs[i]= valptr[i];
422  valptr= localdtemp->getDataNonConst(0);
423  for (i = 0; i < Ntotal*DofsPerNode; i++) valptr[i]= i%DofsPerNode;
424  valptr=ArrayRCP<LO>();
425  dtemp->doImport(*localdtemp, *(importer), Xpetra::INSERT);
426 
427  valptr= dtemp->getDataNonConst(0);
428  for (i = 0; i < Ntotal*DofsPerNode+Nghost; i++) Col2Dof[i]= valptr[i];
429  valptr=ArrayRCP<LO>();
430 
431  if (Ntotal != 0) {
432  NLayers = LayerId[0];
433  NVertLines= VertLineId[0];
434  }
435  else { NLayers = -1; NVertLines = -1; }
436 
437  for (i = 1; i < Ntotal; i++) {
438  if ( VertLineId[i] > NVertLines ) NVertLines = VertLineId[i];
439  if ( LayerId[i] > NLayers ) NLayers = LayerId[i];
440  }
441  NLayers++;
442  NVertLines++;
443 
444  /*
445  * Make an inverse map so that we can quickly find the dof
446  * associated with a particular vertical line and layer.
447  */
448 
449  Teuchos::ArrayRCP<LO> TInvLineLayer= Teuchos::arcp<LO>(1+NVertLines*NLayers); InvLineLayer = TInvLineLayer.getRawPtr();
450  for (i=0; i < Ntotal; i++) {
451  InvLineLayer[ VertLineId[i]+1+LayerId[i]*NVertLines ] = i;
452  }
453 
454  /*
455  * Determine coarse layers where injection will be applied.
456  */
457 
458  Teuchos::ArrayRCP<LO> TCptLayers = Teuchos::arcp<LO>(nz+1); CptLayers = TCptLayers.getRawPtr();
459  NCLayers = FindCpts(nz,CoarsenRate,0, &CptLayers);
460 
461  /*
462  * Compute the largest possible interpolation stencil width based
463  * on the location of the Clayers. This stencil width is actually
464  * nodal (i.e. assuming 1 dof/node). To get the true max stencil width
465  * one needs to multiply this by DofsPerNode.
466  */
467 
468  if (NCLayers < 2) MaxStencilSize = nz;
469  else MaxStencilSize = CptLayers[2];
470 
471  for (i = 3; i <= NCLayers; i++) {
472  if (MaxStencilSize < CptLayers[i]- CptLayers[i-2])
473  MaxStencilSize = CptLayers[i]- CptLayers[i-2];
474  }
475  if (NCLayers > 1) {
476  if (MaxStencilSize < nz - CptLayers[NCLayers-1]+1)
477  MaxStencilSize = nz - CptLayers[NCLayers-1]+1;
478  }
479 
480  /*
481  * Allocate storage associated with solving a banded sub-matrix needed to
482  * determine the interpolation stencil. Note: we compute interpolation stencils
483  * for all dofs within a node at the same time, and so the banded solution
484  * must be large enough to hold all DofsPerNode simultaneously.
485  */
486 
487  Teuchos::ArrayRCP<LO> TSub2FullMap= Teuchos::arcp<LO>((MaxStencilSize+1)*DofsPerNode); Sub2FullMap= TSub2FullMap.getRawPtr();
488  Teuchos::ArrayRCP<SC> TBandSol= Teuchos::arcp<SC>((MaxStencilSize+1)*DofsPerNode*DofsPerNode); BandSol = TBandSol.getRawPtr();
489  /*
490  * Lapack variables. See comments for dgbsv().
491  */
492  KL = 2*DofsPerNode-1;
493  KU = 2*DofsPerNode-1;
494  KLU = KL+KU;
495  LDAB = 2*KL+KU+1;
496  NRHS = DofsPerNode;
497  Teuchos::ArrayRCP<SC> TBandMat= Teuchos::arcp<SC>(LDAB*MaxStencilSize*DofsPerNode+1); BandMat = TBandMat.getRawPtr();
498  Teuchos::ArrayRCP<LO> TIPIV= Teuchos::arcp<LO>((MaxStencilSize+1)*DofsPerNode); IPIV = TIPIV.getRawPtr();
499 
500  /*
501  * Allocate storage for the final interpolation matrix. Note: each prolongator
502  * row might have entries corresponding to at most two nodes.
503  * Note: the total fine level dofs equals DofsPerNode*Ntotal and the max
504  * nnz per prolongator row is DofsPerNode*2.
505  */
506 
507  Ndofs = DofsPerNode*Ntotal;
508  MaxNnz = 2*DofsPerNode*Ndofs;
509 
510  RCP<const Map> rowMap = Amat->getRowMap();
511  Xpetra::global_size_t GNdofs= rowMap->getGlobalNumElements();
512 
513  std::vector<size_t> stridingInfo_;
514  stridingInfo_.push_back(DofsPerNode);
515 
516  Xpetra::global_size_t itemp = GNdofs/nz;
517  coarseMap = StridedMapFactory::Build(rowMap->lib(),
518  NCLayers*itemp,
519  NCLayers*NVertLines*DofsPerNode,
520  0, /* index base */
521  stridingInfo_,
522  rowMap->getComm(),
523  -1, /* strided block id */
524  0 /* domain gid offset */);
525 
526 
527  //coarseMap = MapFactory::createContigMapWithNode(rowMap->lib(),(NCLayers*GNdofs)/nz, NCLayers*NVertLines*DofsPerNode,(rowMap->getComm()), rowMap->getNode());
528 
529 
530  P = rcp(new CrsMatrixWrap(rowMap, coarseMap , 0));
531  coarseNullspace = MultiVectorFactory::Build(coarseMap, fineNullspace->getNumVectors());
532 
533 
534  Teuchos::ArrayRCP<SC> TPvals= Teuchos::arcp<SC>(1+MaxNnz); Pvals= TPvals.getRawPtr();
535  Teuchos::ArrayRCP<size_t> TPptr = Teuchos::arcp<size_t>(DofsPerNode*(2+Ntotal)); Pptr = TPptr.getRawPtr();
536  Teuchos::ArrayRCP<LO> TPcols= Teuchos::arcp<LO>(1+MaxNnz); Pcols= TPcols.getRawPtr();
537 
538  Pptr[0] = 0; Pptr[1] = 0;
539 
540  TEUCHOS_TEST_FOR_EXCEPTION(Pcols == NULL, Exceptions::RuntimeError, "MakeSemiCoarsenP: Not enough space \n");
541 
542  /*
543  * Setup P's rowptr as if each row had its maximum of 2*DofsPerNode nonzeros.
544  * This will be useful while filling up P, and then later we will squeeze out
545  * the unused nonzeros locations.
546  */
547 
548  for (i = 1; i <= MaxNnz; i++) Pcols[i] = -1; /* mark all entries as unused */
549  count = 1;
550  for (i = 1; i <= DofsPerNode*Ntotal+1; i++) {
551  Pptr[i] = count;
552  count += (2*DofsPerNode);
553  }
554 
555  /*
556  * Build P column by column. The 1st block column corresponds to the 1st coarse
557  * layer and the first line. The 2nd block column corresponds to the 2nd coarse
558  * layer and the first line. The NCLayers+1 block column corresponds to the
559  * 1st coarse layer and the 2nd line, etc.
560  */
561 
562 
563  col = 0;
564  for (MyLine=1; MyLine <= NVertLines; MyLine += 1) {
565  for (iii=1; iii <= NCLayers; iii+= 1) {
566  col = col+1;
567  MyLayer = CptLayers[iii];
568 
569  /*
570  * StartLayer gives the layer number of the lowest layer that
571  * is nonzero in the interpolation stencil that is currently
572  * being computed. Normally, if we are not near a boundary this
573  * is simply CptsLayers[iii-1]+1.
574  *
575  * NStencilNodes indicates the number of nonzero nodes in the
576  * interpolation stencil that is currently being computed. Normally,
577  * if we are not near a boundary this is CptLayers[iii+1]-StartLayer.
578  */
579 
580  if (iii != 1 ) StartLayer = CptLayers[iii-1]+1;
581  else StartLayer = 1;
582 
583  if (iii != NCLayers) NStencilNodes= CptLayers[iii+1]-StartLayer;
584  else NStencilNodes= NLayers - StartLayer + 1;
585 
586 
587  N = NStencilNodes*DofsPerNode;
588 
589  /*
590  * dgbsv() does not require that the first KL rows be initialized,
591  * so we could avoid zeroing out some entries?
592  */
593 
594  for (i = 0; i < NStencilNodes*DofsPerNode*DofsPerNode; i++)
595  BandSol[ i] = 0.0;
596  for (i = 0; i < LDAB*N; i++) BandMat[ i] = 0.0;
597 
598  /*
599  * Fill BandMat and BandSol (which is initially the rhs) for each
600  * node in the interpolation stencil that is being computed.
601  */
602 
603  for (node_k=1; node_k <= NStencilNodes ; node_k++) {
604 
605  /* Map a Line and Layer number to a BlkRow in the fine level matrix
606  * and record the mapping from the sub-system to the BlkRow of the
607  * fine level matrix.
608  */
609  BlkRow = InvLineLayer[MyLine+(StartLayer+node_k-2)*NVertLines]+1;
610  Sub2FullMap[node_k] = BlkRow;
611 
612  /* Two cases:
613  * 1) the current layer is not a Cpoint layer. In this case we
614  * want to basically stick the matrix couplings to other
615  * nonzero stencil rows into the band matrix. One way to do
616  * this is to include couplings associated with only MyLine
617  * and ignore all the other couplings. However, what we do
618  * instead is to sum all the coupling at each layer participating
619  * in this interpolation stencil and stick this sum into BandMat.
620  * 2) the current layer is a Cpoint layer and so we
621  * stick an identity block in BandMat and rhs.
622  */
623  if (StartLayer+node_k-1 != MyLayer) {
624  for (int dof_i=0; dof_i < DofsPerNode; dof_i++) {
625 
626  j = (BlkRow-1)*DofsPerNode+dof_i;
627  ArrayView<const LO> AAcols;
628  ArrayView<const SC> AAvals;
629  Amat->getLocalRowView(j, AAcols, AAvals);
630  const int *Acols = AAcols.getRawPtr();
631  const SC *Avals = AAvals.getRawPtr();
632  RowLeng = AAvals.size();
633 
634  TEUCHOS_TEST_FOR_EXCEPTION(RowLeng >= MaxNnzPerRow, Exceptions::RuntimeError, "MakeSemiCoarsenP: recompile with larger Max(HorNeighborNodes)\n");
635 
636  for (i = 0; i < RowLeng; i++) {
637  LayDiff[i] = Layerdofs[Acols[i]]-StartLayer-node_k+2;
638 
639  /* This is the main spot where there might be off- */
640  /* processor communication. That is, when we */
641  /* average the stencil in the horizontal direction,*/
642  /* we need to know the layer id of some */
643  /* neighbors that might reside off-processor. */
644  }
645  PtRow = (node_k-1)*DofsPerNode+dof_i+1;
646  for (dof_j=0; dof_j < DofsPerNode; dof_j++) {
647  PtCol = (node_k-1)*DofsPerNode+dof_j + 1;
648  /* Stick in entry corresponding to Mat(PtRow,PtCol) */
649  /* see dgbsv() comments for matrix format. */
650  TheSum = 0.0;
651  for (i = 0; i < RowLeng; i++) {
652  if ((LayDiff[i] == 0) && (Col2Dof[Acols[i]] == dof_j))
653  TheSum += Avals[i];
654  }
655  index = LDAB*(PtCol-1)+KLU+PtRow-PtCol;
656  BandMat[index] = TheSum;
657  if (node_k != NStencilNodes) {
658  /* Stick Mat(PtRow,PtCol+DofsPerNode) entry */
659  /* see dgbsv() comments for matrix format. */
660  TheSum = 0.0;
661  for (i = 0; i < RowLeng; i++) {
662  if ((LayDiff[i] == 1) &&(Col2Dof[Acols[i]]== dof_j))
663  TheSum += Avals[i];
664  }
665  j = PtCol+DofsPerNode;
666  index=LDAB*(j-1)+KLU+PtRow-j;
667  BandMat[index] = TheSum;
668  }
669  if (node_k != 1) {
670  /* Stick Mat(PtRow,PtCol-DofsPerNode) entry */
671  /* see dgbsv() comments for matrix format. */
672  TheSum = 0.0;
673  for (i = 0; i < RowLeng; i++) {
674  if ((LayDiff[i]== -1) &&(Col2Dof[Acols[i]]== dof_j))
675  TheSum += Avals[i];
676  }
677  j = PtCol-DofsPerNode;
678  index=LDAB*(j-1)+KLU+PtRow-j;
679  BandMat[index] = TheSum;
680  }
681  }
682  }
683  }
684  else {
685 
686  /* inject the null space */
687  // int FineStride = Ntotal*DofsPerNode;
688  // int CoarseStride= NVertLines*NCLayers*DofsPerNode;
689  for (int k = 0; k < static_cast<int>(fineNullspace->getNumVectors()); k++) {
690  Teuchos::ArrayRCP<SC> OneCNull = coarseNullspace->getDataNonConst(k);
691  Teuchos::ArrayRCP<SC> OneFNull = fineNullspace->getDataNonConst(k);
692  for (int dof_i = 0; dof_i < DofsPerNode; dof_i++) {
693  OneCNull[( col-1)*DofsPerNode+dof_i] = OneFNull[ (BlkRow-1)*DofsPerNode+dof_i];
694  }
695  }
696 
697  for (int dof_i = 0; dof_i < DofsPerNode; dof_i++) {
698  /* Stick Mat(PtRow,PtRow) and Rhs(PtRow,dof_i+1) */
699  /* see dgbsv() comments for matrix format. */
700  PtRow = (node_k-1)*DofsPerNode+dof_i+1;
701  index = LDAB*(PtRow-1)+KLU;
702  BandMat[index] = 1.0;
703  BandSol[(dof_i)*DofsPerNode*NStencilNodes+PtRow-1] = 1.;
704  }
705  }
706  }
707 
708  /* Solve banded system and then stick result in Pmatrix arrays */
709 
710  lapack.GBTRF( N, N, KL, KU, BandMat, LDAB, IPIV, &INFO);
711 
712  TEUCHOS_TEST_FOR_EXCEPTION(INFO != 0, Exceptions::RuntimeError, "Lapack band factorization failed");
713 
714  lapack.GBTRS(notrans[0], N, KL, KU, NRHS, BandMat, LDAB, IPIV,
715  BandSol, N, &INFO );
716 
717  TEUCHOS_TEST_FOR_EXCEPTION(INFO != 0, Exceptions::RuntimeError, "Lapack band solve back substitution failed");
718 
719  for (dof_j=0; dof_j < DofsPerNode; dof_j++) {
720  for (int dof_i=0; dof_i < DofsPerNode; dof_i++) {
721  for (i =1; i <= NStencilNodes ; i++) {
722  index = (Sub2FullMap[i]-1)*DofsPerNode+dof_i+1;
723  loc = Pptr[index];
724  Pcols[loc] = (col-1)*DofsPerNode+dof_j+1;
725  Pvals[loc] = BandSol[dof_j*DofsPerNode*NStencilNodes +
726  (i-1)*DofsPerNode + dof_i];
727  Pptr[index]= Pptr[index] + 1;
728  }
729  }
730  }
731  }
732  }
733 
734  /*
735  * Squeeze the -1's out of the columns. At the same time convert Pcols
736  * so that now the first column is numbered '0' as opposed to '1'.
737  * Also, the arrays Pcols and Pvals should now use the zeroth element
738  * as opposed to just starting with the first element. Pptr will be
739  * fixed in the for loop below so that Pptr[0] = 0, etc.
740  */
741  CurRow = 1;
742  NewPtr = 1;
743  for (size_t ii=1; ii <= Pptr[Ntotal*DofsPerNode]-1; ii++) {
744  if (ii == Pptr[CurRow]) {
745  Pptr[CurRow] = LastGuy;
746  CurRow++;
747  while (ii > Pptr[CurRow]) {
748  Pptr[CurRow] = LastGuy;
749  CurRow++;
750  }
751  }
752  if (Pcols[ii] != -1) {
753  Pcols[NewPtr-1] = Pcols[ii]-1; /* these -1's fix the offset and */
754  Pvals[NewPtr-1] = Pvals[ii]; /* start using the zeroth element */
755  LastGuy = NewPtr;
756  NewPtr++;
757  }
758  }
759  for (i = CurRow; i <= Ntotal*DofsPerNode; i++) Pptr[CurRow] = LastGuy;
760 
761  /* Now move the pointers so that they now point to the beginning of each
762  * row as opposed to the end of each row
763  */
764  for (i=-Ntotal*DofsPerNode+1; i>= 2 ; i--) {
765  Pptr[i-1] = Pptr[i-2]; /* extra -1 added to start from 0 */
766  }
767  Pptr[0] = 0;
768 
769  ArrayRCP<size_t> rcpRowPtr;
770  ArrayRCP<LO> rcpColumns;
771  ArrayRCP<SC> rcpValues;
772 
773  RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
774  LO nnz = Pptr[Ndofs];
775  PCrs->allocateAllValues(nnz, rcpRowPtr, rcpColumns, rcpValues);
776 
777  ArrayView<size_t> rowPtr = rcpRowPtr();
778  ArrayView<LO> columns = rcpColumns();
779  ArrayView<SC> values = rcpValues();
780 
781  // copy data over
782 
783  for (LO ii = 0; ii <= Ndofs; ii++) rowPtr[ii] = Pptr[ii];
784  for (LO ii = 0; ii < nnz; ii++) columns[ii] = Pcols[ii];
785  for (LO ii = 0; ii < nnz; ii++) values[ii] = Pvals[ii];
786  PCrs->setAllValues(rcpRowPtr, rcpColumns, rcpValues);
787  PCrs->expertStaticFillComplete(coarseMap, Amat->getDomainMap());
788 
789 
790  return NCLayers*NVertLines*DofsPerNode;
791  }
792  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
794  // This function is a bit of a hack. We basically compute the semi-coarsening transfers and then throw
795  // away all the interpolation coefficients, instead replacing them by piecewise constants. The reason for this
796  // is that SemiCoarsening has no notion of aggregates so defining piecewise constants in the "usual way" is
797  // not possible. Instead, we look for the largest entry in each row, make it a one, and zero out the other
798  // non-zero entries
799 
800  ArrayView<const LocalOrdinal> inds;
801  ArrayView<const Scalar> vals1;
802  ArrayView< Scalar> vals;
803  Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero();
804  Scalar ONE = Teuchos::ScalarTraits<Scalar>::one();
805 
806  for (size_t i = 0; i < P->getRowMap()->getNodeNumElements(); i++) {
807  P->getLocalRowView(i, inds, vals1);
808 
809  size_t nnz = inds.size();
810  if (nnz != 0) vals = ArrayView<Scalar>(const_cast<Scalar*>(vals1.getRawPtr()), nnz);
811 
812  LO largestIndex = -1;
813  Scalar largestValue = ZERO;
814  /* find largest value in row and change that one to a 1 while the others are set to 0 */
815 
816  LO rowDof = i%BlkSize;
817  for (size_t j =0; j < nnz; j++) {
818  if (Teuchos::ScalarTraits<SC>::magnitude(vals[ j ]) >= Teuchos::ScalarTraits<SC>::magnitude(largestValue)) {
819  if ( inds[j]%BlkSize == rowDof ) {
820  largestValue = vals[j];
821  largestIndex = (int) j;
822  }
823  }
824  vals[j] = ZERO;
825  }
826  if (largestIndex != -1) vals[largestIndex] = ONE;
827  else
828  TEUCHOS_TEST_FOR_EXCEPTION(nnz > 0, Exceptions::RuntimeError, "no nonzero column associated with a proper dof within node.");
829 
830  if (Teuchos::ScalarTraits<SC>::magnitude(largestValue) == Teuchos::ScalarTraits<SC>::magnitude(ZERO)) vals[largestIndex] = ZERO;
831  }
832  }
833 
834 } //namespace MueLu
835 
836 #define MUELU_SEMICOARSENPFACTORY_SHORT
837 #endif // MUELU_SEMICOARSENPFACTORY_DEF_HPP
#define MaxHorNeighborNodes
#define SET_VALID_ENTRY(name)
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultScalar Scalar
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
const RCP< const FactoryManagerBase > GetFactoryManager()
returns the current factory manager
Definition: MueLu_Level.cpp:96
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:76
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access)....
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static const NoFactory * get()
LO FindCpts(LO const PtsPerLine, LO const CoarsenRate, LO const Thin, LO **LayerCpts) const
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
LO MakeSemiCoarsenP(LO const Ntotal, LO const nz, LO const CoarsenRate, LO const LayerId[], LO const VertLineId[], LO const DofsPerNode, RCP< Matrix > &Amat, RCP< Matrix > &P, RCP< const Map > &coarseMap, const RCP< MultiVector > fineNullspace, RCP< MultiVector > &coarseNullspace) const
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void RevertToPieceWiseConstant(RCP< Matrix > &P, LO BlkSize) const
Namespace for MueLu classes and methods.