MueLu  Version of the Day
MueLu_BlackBoxPFactory_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_BLACKBOXPFACTORY_DEF_HPP
47 #define MUELU_BLACKBOXPFACTORY_DEF_HPP
48 
49 #include <stdlib.h>
50 #include <iomanip>
51 
52 
53 // #include <Teuchos_LAPACK.hpp>
54 #include <Teuchos_SerialDenseMatrix.hpp>
55 #include <Teuchos_SerialDenseVector.hpp>
56 #include <Teuchos_SerialDenseSolver.hpp>
57 
58 #include <Xpetra_CrsMatrixUtils.hpp>
59 #include <Xpetra_CrsMatrixWrap.hpp>
60 #include <Xpetra_ImportFactory.hpp>
61 #include <Xpetra_Matrix.hpp>
62 #include <Xpetra_MapFactory.hpp>
63 #include <Xpetra_MultiVectorFactory.hpp>
64 #include <Xpetra_VectorFactory.hpp>
65 
66 #include <Xpetra_IO.hpp>
67 
70 
71 #include "MueLu_MasterList.hpp"
72 #include "MueLu_Monitor.hpp"
73 
74 namespace MueLu {
75 
76  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
78  RCP<ParameterList> validParamList = rcp(new ParameterList());
79 
80  // Coarsen can come in two forms, either a single char that will be interpreted as an integer
81  // which is used as the coarsening rate in every spatial dimentions,
82  // or it can be a longer string that will then be interpreted as an array of integers.
83  // By default coarsen is set as "{2}", hence a coarsening rate of 2 in every spatial dimension
84  // is the default setting!
85  validParamList->set<std::string > ("Coarsen", "{3}", "Coarsening rate in all spatial dimensions");
86  validParamList->set<RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
87  validParamList->set<RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
88  validParamList->set<RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory for coorindates");
89  validParamList->set<RCP<const FactoryBase> >("gNodesPerDim", Teuchos::null, "Number of nodes per spatial dimmension provided by CoordinatesTransferFactory.");
90  validParamList->set<RCP<const FactoryBase> >("lNodesPerDim", Teuchos::null, "Number of nodes per spatial dimmension provided by CoordinatesTransferFactory.");
91  validParamList->set<std::string> ("stencil type", "full", "You can use two type of stencils: full and reduced, that correspond to 27 and 7 points stencils respectively in 3D.");
92  validParamList->set<std::string> ("block strategy", "coupled", "The strategy used to handle systems of PDEs can be: coupled or uncoupled.");
93 
94  return validParamList;
95  }
96 
97  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
99  Level& /* coarseLevel */)
100  const {
101  Input(fineLevel, "A");
102  Input(fineLevel, "Nullspace");
103  Input(fineLevel, "Coordinates");
104  // Request the global number of nodes per dimensions
105  if(fineLevel.GetLevelID() == 0) {
106  if(fineLevel.IsAvailable("gNodesPerDim", NoFactory::get())) {
107  fineLevel.DeclareInput("gNodesPerDim", NoFactory::get(), this);
108  } else {
109  TEUCHOS_TEST_FOR_EXCEPTION(fineLevel.IsAvailable("gNodesPerDim", NoFactory::get()),
111  "gNodesPerDim was not provided by the user on level0!");
112  }
113  } else {
114  Input(fineLevel, "gNodesPerDim");
115  }
116 
117  // Request the local number of nodes per dimensions
118  if(fineLevel.GetLevelID() == 0) {
119  if(fineLevel.IsAvailable("lNodesPerDim", NoFactory::get())) {
120  fineLevel.DeclareInput("lNodesPerDim", NoFactory::get(), this);
121  } else {
122  TEUCHOS_TEST_FOR_EXCEPTION(fineLevel.IsAvailable("lNodesPerDim", NoFactory::get()),
124  "lNodesPerDim was not provided by the user on level0!");
125  }
126  } else {
127  Input(fineLevel, "lNodesPerDim");
128  }
129  }
130 
131  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
133  Level& coarseLevel) const{
134  return BuildP(fineLevel, coarseLevel);
135 
136  }
137 
138  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
140  Level& coarseLevel)const{
141  FactoryMonitor m(*this, "Build", coarseLevel);
142 
143  // Get parameter list
144  const ParameterList& pL = GetParameterList();
145 
146  // obtain general variables
147  RCP<Matrix> A = Get< RCP<Matrix> > (fineLevel, "A");
148  RCP<MultiVector> fineNullspace = Get< RCP<MultiVector> > (fineLevel, "Nullspace");
149  RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType,LO,GO,NO> > coordinates =
150  Get< RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType,LO,GO,NO> > >(fineLevel, "Coordinates");
151  LO numDimensions = coordinates->getNumVectors();
152  LO BlkSize = A->GetFixedBlockSize();
153 
154  // Get fine level geometric data: g(l)FineNodesPerDir and g(l)NumFineNodes
155  Array<GO> gFineNodesPerDir(3);
156  Array<LO> lFineNodesPerDir(3);
157  // Get the number of points in each direction
158  if(fineLevel.GetLevelID() == 0) {
159  gFineNodesPerDir = fineLevel.Get<Array<GO> >("gNodesPerDim", NoFactory::get());
160  lFineNodesPerDir = fineLevel.Get<Array<LO> >("lNodesPerDim", NoFactory::get());
161  } else {
162  // Loading global number of nodes per diretions
163  gFineNodesPerDir = Get<Array<GO> >(fineLevel, "gNodesPerDim");
164 
165  // Loading local number of nodes per diretions
166  lFineNodesPerDir = Get<Array<LO> >(fineLevel, "lNodesPerDim");
167  }
168  for(LO i = 0; i < 3; ++i) {
169  if(gFineNodesPerDir[i] == 0) {
170  GetOStream(Runtime0) << "gNodesPerDim in direction " << i << " is set to 1 from 0"
171  << std::endl;
172  gFineNodesPerDir[i] = 1;
173  }
174  if(lFineNodesPerDir[i] == 0) {
175  GetOStream(Runtime0) << "lNodesPerDim in direction " << i << " is set to 1 from 0"
176  << std::endl;
177  lFineNodesPerDir[i] = 1;
178  }
179  }
180  GO gNumFineNodes = gFineNodesPerDir[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
181  LO lNumFineNodes = lFineNodesPerDir[2]*lFineNodesPerDir[1]*lFineNodesPerDir[0];
182 
183  // Get the coarsening rate
184  std::string coarsenRate = pL.get<std::string>("Coarsen");
185  Array<LO> coarseRate(3);
186  {
187  Teuchos::Array<LO> crates;
188  try {
189  crates = Teuchos::fromStringToArray<LO>(coarsenRate);
190  } catch(const Teuchos::InvalidArrayStringRepresentation& e) {
191  GetOStream(Errors,-1) << " *** Coarsen must be a string convertible into an array! *** "
192  << std::endl;
193  throw e;
194  }
195  TEUCHOS_TEST_FOR_EXCEPTION((crates.size() > 1) && (crates.size() < numDimensions),
197  "Coarsen must have at least as many components as the number of"
198  " spatial dimensions in the problem.");
199  for(LO i = 0; i < 3; ++i) {
200  if(i < numDimensions) {
201  if(crates.size() == 1) {
202  coarseRate[i] = crates[0];
203  } else if(i < crates.size()) {
204  coarseRate[i] = crates[i];
205  } else {
206  GetOStream(Errors,-1) << " *** Coarsen must be at least as long as the number of"
207  " spatial dimensions! *** " << std::endl;
208  throw Exceptions::RuntimeError(" *** Coarsen must be at least as long as the number of"
209  " spatial dimensions! *** \n");
210  }
211  } else {
212  coarseRate[i] = 1;
213  }
214  }
215  } // End of scope for crates
216 
217  // Get the stencil type used for discretization
218  const std::string stencilType = pL.get<std::string>("stencil type");
219  if(stencilType != "full" && stencilType != "reduced") {
220  GetOStream(Errors,-1) << " *** stencil type must be set to: full or reduced, any other value "
221  "is trated as an error! *** " << std::endl;
222  throw Exceptions::RuntimeError(" *** stencil type is neither full, nor reduced! *** \n");
223  }
224 
225  // Get the strategy for PDE systems
226  const std::string blockStrategy = pL.get<std::string>("block strategy");
227  if(blockStrategy != "coupled" && blockStrategy != "uncoupled") {
228  GetOStream(Errors,-1) << " *** block strategy must be set to: coupled or uncoupled, any other "
229  "value is trated as an error! *** " << std::endl;
230  throw Exceptions::RuntimeError(" *** block strategy is neither coupled, nor uncoupled! *** \n");
231  }
232 
233  GO gNumCoarseNodes = 0;
234  LO lNumCoarseNodes = 0;
235  Array<GO> gIndices(3), gCoarseNodesPerDir(3), ghostGIDs, coarseNodesGIDs, colGIDs;
236  Array<LO> myOffset(3), lCoarseNodesPerDir(3), glCoarseNodesPerDir(3), endRate(3);
237  Array<bool> ghostInterface(6);
238  Array<int> boundaryFlags(3);
239  ArrayRCP<Array<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > coarseNodes(numDimensions);
240  Array<ArrayView<const typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > fineNodes(numDimensions);
241  for(LO dim = 0; dim < numDimensions; ++dim) {fineNodes[dim] = coordinates->getData(dim)();}
242 
243  // This struct stores PIDs, LIDs and GIDs on the fine mesh and GIDs on the coarse mesh.
244  RCP<NodesIDs> ghostedCoarseNodes = rcp(new NodesIDs{});
245 
246  GetGeometricData(coordinates, coarseRate, gFineNodesPerDir, lFineNodesPerDir, BlkSize,// inputs
247  gIndices, myOffset, ghostInterface, endRate, gCoarseNodesPerDir, // outputs
248  lCoarseNodesPerDir, glCoarseNodesPerDir, ghostGIDs, coarseNodesGIDs, colGIDs,
249  gNumCoarseNodes, lNumCoarseNodes, coarseNodes, boundaryFlags,
250  ghostedCoarseNodes);
251 
252  // Create the MultiVector of coarse coordinates
253  Xpetra::UnderlyingLib lib = coordinates->getMap()->lib();
254  RCP<const Map> coarseCoordsMap = MapFactory::Build (lib,
255  gNumCoarseNodes,
256  coarseNodesGIDs.view(0, lNumCoarseNodes),
257  coordinates->getMap()->getIndexBase(),
258  coordinates->getMap()->getComm());
259  Array<ArrayView<const typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > coarseCoords(numDimensions);
260  for(LO dim = 0; dim < numDimensions; ++dim) {
261  coarseCoords[dim] = coarseNodes[dim]();
262  }
263  RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType,LO,GO,NO> > coarseCoordinates =
264  Xpetra::MultiVectorFactory<typename Teuchos::ScalarTraits<Scalar>::magnitudeType,LO,GO,NO>::Build(coarseCoordsMap, coarseCoords(),
265  numDimensions);
266 
267  // Now create a new matrix: Aghost that contains all the data
268  // locally needed to compute the local part of the prolongator.
269  // Here assuming that all the coarse nodes o and fine nodes +
270  // are local then all the data associated with the coarse
271  // nodes O and the fine nodes * needs to be imported.
272  //
273  // *--*--*--*--*--*--*--*
274  // | | | | | | | |
275  // o--+--+--o--+--+--O--*
276  // | | | | | | | |
277  // +--+--+--+--+--+--*--*
278  // | | | | | | | |
279  // +--+--+--+--+--+--*--*
280  // | | | | | | | |
281  // o--+--+--o--+--+--O--*
282  // | | | | | | | |
283  // +--+--+--+--+--+--*--*
284  // | | | | | | | |
285  // *--*--*--*--*--*--*--*
286  // | | | | | | | |
287  // O--*--*--O--*--*--O--*
288  //
289  // Creating that local matrix is easy enough using proper range
290  // and domain maps to import data from A. Note that with this
291  // approach we reorder the local entries using the domain map and
292  // can subsequently compute the prolongator without reordering.
293  // As usual we need to be careful about any coarsening rate
294  // change at the boundary!
295 
296  // The ingredients needed are an importer, a range map and a domain map
297  Array<GO> ghostRowGIDs, ghostColGIDs, nodeSteps(3);
298  nodeSteps[0] = 1;
299  nodeSteps[1] = gFineNodesPerDir[0];
300  nodeSteps[2] = gFineNodesPerDir[0]*gFineNodesPerDir[1];
301  Array<LO> glFineNodesPerDir(3);
302  GO startingGID = A->getRowMap()->getMinGlobalIndex();
303  for(LO dim = 0; dim < 3; ++dim) {
304  LO numCoarseNodes = 0;
305  if(dim < numDimensions) {
306  startingGID -= myOffset[dim]*nodeSteps[dim];
307  numCoarseNodes = lCoarseNodesPerDir[dim];
308  if(ghostInterface[2*dim]) {++numCoarseNodes;}
309  if(ghostInterface[2*dim+1]) {++numCoarseNodes;}
310  if(gIndices[dim] + lFineNodesPerDir[dim] == gFineNodesPerDir[dim]) {
311  glFineNodesPerDir[dim] = (numCoarseNodes - 2)*coarseRate[dim] + endRate[dim] + 1;
312  } else {
313  glFineNodesPerDir[dim] = (numCoarseNodes - 1)*coarseRate[dim] + 1;
314  }
315  } else {
316  glFineNodesPerDir[dim] = 1;
317  }
318  }
319  ghostRowGIDs.resize(glFineNodesPerDir[0]*glFineNodesPerDir[1]*glFineNodesPerDir[2]*BlkSize);
320  for(LO k = 0; k < glFineNodesPerDir[2]; ++k) {
321  for(LO j = 0; j < glFineNodesPerDir[1]; ++j) {
322  for(LO i = 0; i < glFineNodesPerDir[0]; ++i) {
323  for(LO l = 0; l < BlkSize; ++l) {
324  ghostRowGIDs[(k*glFineNodesPerDir[1]*glFineNodesPerDir[0]
325  + j*glFineNodesPerDir[0] + i)*BlkSize + l] = startingGID
326  + (k*gFineNodesPerDir[1]*gFineNodesPerDir[0] + j*gFineNodesPerDir[0] + i)*BlkSize + l;
327  }
328  }
329  }
330  }
331 
332  // Looking at the above loops it is easy to find startingGID for the ghostColGIDs
333  Array<GO> startingGlobalIndices(numDimensions), dimStride(numDimensions);
334  Array<GO> startingColIndices(numDimensions), finishingColIndices(numDimensions);
335  GO colMinGID = 0;
336  Array<LO> colRange(numDimensions);
337  dimStride[0] = 1;
338  for(int dim = 1; dim < numDimensions; ++dim) {
339  dimStride[dim] = dimStride[dim - 1]*gFineNodesPerDir[dim - 1];
340  }
341  {
342  GO tmp = startingGID;
343  for(int dim = numDimensions; dim > 0; --dim) {
344  startingGlobalIndices[dim - 1] = tmp / dimStride[dim - 1];
345  tmp = tmp % dimStride[dim - 1];
346 
347  if(startingGlobalIndices[dim - 1] > 0) {
348  startingColIndices[dim - 1] = startingGlobalIndices[dim - 1] - 1;
349  }
350  if(startingGlobalIndices[dim - 1] + glFineNodesPerDir[dim - 1] < gFineNodesPerDir[dim - 1]){
351  finishingColIndices[dim - 1] = startingGlobalIndices[dim - 1]
352  + glFineNodesPerDir[dim - 1];
353  } else {
354  finishingColIndices[dim - 1] = startingGlobalIndices[dim - 1]
355  + glFineNodesPerDir[dim - 1] - 1;
356  }
357  colRange[dim - 1] = finishingColIndices[dim - 1] - startingColIndices[dim - 1] + 1;
358  colMinGID += startingColIndices[dim - 1]*dimStride[dim - 1];
359  }
360  }
361  ghostColGIDs.resize(colRange[0]*colRange[1]*colRange[2]*BlkSize);
362  for(LO k = 0; k < colRange[2]; ++k) {
363  for(LO j = 0; j < colRange[1]; ++j) {
364  for(LO i = 0; i < colRange[0]; ++i) {
365  for(LO l = 0; l < BlkSize; ++l) {
366  ghostColGIDs[(k*colRange[1]*colRange[0] + j*colRange[0] + i)*BlkSize + l] = colMinGID
367  + (k*gFineNodesPerDir[1]*gFineNodesPerDir[0] + j*gFineNodesPerDir[0] + i)*BlkSize + l;
368  }
369  }
370  }
371  }
372 
373  RCP<const Map> ghostedRowMap = Xpetra::MapFactory<LO,GO,NO>::Build(A->getRowMap()->lib(),
374  Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
375  ghostRowGIDs(),
376  A->getRowMap()->getIndexBase(),
377  A->getRowMap()->getComm());
378  RCP<const Map> ghostedColMap = Xpetra::MapFactory<LO,GO,NO>::Build(A->getRowMap()->lib(),
379  Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
380  ghostColGIDs(),
381  A->getRowMap()->getIndexBase(),
382  A->getRowMap()->getComm());
383  RCP<const Import> ghostImporter = Xpetra::ImportFactory<LO,GO,NO>::Build(A->getRowMap(),
384  ghostedRowMap);
385  RCP<const Matrix> Aghost = Xpetra::MatrixFactory<SC,LO,GO,NO>::Build(A, *ghostImporter,
386  ghostedRowMap,
387  ghostedRowMap);
388 
389  // Create the maps and data structures for the projection matrix
390  RCP<const Map> rowMapP = A->getDomainMap();
391 
392  RCP<const Map> domainMapP;
393 
394  RCP<const Map> colMapP;
395 
396  // At this point we need to create the column map which is a delicate operation.
397  // The entries in that map need to be ordered as follows:
398  // 1) first owned entries ordered by LID
399  // 2) second order the remaining entries by PID
400  // 3) entries with the same remote PID are ordered by GID.
401  // One piece of good news: lNumCoarseNodes is the number of ownedNodes and lNumGhostNodes
402  // is the number of remote nodes. The sorting can be limited to remote nodes
403  // as the owned ones are alreaded ordered by LID!
404 
405  LO lNumGhostedNodes = ghostedCoarseNodes->GIDs.size();
406  {
407  std::vector<NodeID> colMapOrdering(lNumGhostedNodes);
408  for(LO ind = 0; ind < lNumGhostedNodes; ++ind) {
409  colMapOrdering[ind].GID = ghostedCoarseNodes->GIDs[ind];
410  if(ghostedCoarseNodes->PIDs[ind] == rowMapP->getComm()->getRank()) {
411  colMapOrdering[ind].PID = -1;
412  } else {
413  colMapOrdering[ind].PID = ghostedCoarseNodes->PIDs[ind];
414  }
415  colMapOrdering[ind].LID = ghostedCoarseNodes->LIDs[ind];
416  colMapOrdering[ind].lexiInd = ind;
417  }
418  std::sort(colMapOrdering.begin(), colMapOrdering.end(),
419  [](NodeID a, NodeID b)->bool{
420  return ( (a.PID < b.PID) || ((a.PID == b.PID) && (a.GID < b.GID)) );
421  });
422 
423  colGIDs.resize(BlkSize*lNumGhostedNodes);
424  for(LO ind = 0; ind < lNumGhostedNodes; ++ind) {
425  // Save the permutation calculated to go from Lexicographic indexing to column map indexing
426  ghostedCoarseNodes->colInds[colMapOrdering[ind].lexiInd] = ind;
427  for(LO dof = 0; dof < BlkSize; ++dof) {
428  colGIDs[BlkSize*ind + dof] = BlkSize*colMapOrdering[ind].GID + dof;
429  }
430  }
431  domainMapP = Xpetra::MapFactory<LO,GO,NO>::Build(rowMapP->lib(),
432  BlkSize*gNumCoarseNodes,
433  colGIDs.view(0,BlkSize*lNumCoarseNodes),
434  rowMapP->getIndexBase(),
435  rowMapP->getComm());
436  colMapP = Xpetra::MapFactory<LO,GO,NO>::Build(rowMapP->lib(),
437  Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
438  colGIDs.view(0, colGIDs.size()),
439  rowMapP->getIndexBase(),
440  rowMapP->getComm());
441  } // End of scope for colMapOrdering and colGIDs
442 
443  std::vector<size_t> strideInfo(1);
444  strideInfo[0] = BlkSize;
445  RCP<const Map> stridedDomainMapP = Xpetra::StridedMapFactory<LO,GO,NO>::Build(domainMapP,
446  strideInfo);
447 
448  GO gnnzP = 0;
449  LO lnnzP = 0;
450  // coarse points have one nnz per row
451  gnnzP += gNumCoarseNodes;
452  lnnzP += lNumCoarseNodes;
453  // add all other points multiplying by 2^numDimensions
454  gnnzP += (gNumFineNodes - gNumCoarseNodes)*std::pow(2, numDimensions);
455  lnnzP += (lNumFineNodes - lNumCoarseNodes)*std::pow(2, numDimensions);
456  // finally multiply by the number of dofs per node
457  gnnzP = gnnzP*BlkSize;
458  lnnzP = lnnzP*BlkSize;
459 
460  // Create the matrix itself using the above maps
461  RCP<Matrix> P;
462  P = rcp(new CrsMatrixWrap(rowMapP, colMapP, 0));
463  RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
464 
465  ArrayRCP<size_t> iaP;
466  ArrayRCP<LO> jaP;
467  ArrayRCP<SC> valP;
468 
469  PCrs->allocateAllValues(lnnzP, iaP, jaP, valP);
470 
471  ArrayView<size_t> ia = iaP();
472  ArrayView<LO> ja = jaP();
473  ArrayView<SC> val = valP();
474  ia[0] = 0;
475 
476 
477  LO numCoarseElements = 1;
478  Array<LO> lCoarseElementsPerDir(3);
479  for(LO dim = 0; dim < numDimensions; ++dim) {
480  lCoarseElementsPerDir[dim] = lCoarseNodesPerDir[dim];
481  if(ghostInterface[2*dim]) {++lCoarseElementsPerDir[dim];}
482  if(!ghostInterface[2*dim+1]) {--lCoarseElementsPerDir[dim];}
483  numCoarseElements = numCoarseElements*lCoarseElementsPerDir[dim];
484  }
485 
486  for(LO dim = numDimensions; dim < 3; ++dim) {
487  lCoarseElementsPerDir[dim] = 1;
488  }
489 
490  // Loop over the coarse elements
491  Array<int> elementFlags(3);
492  Array<LO> elemInds(3), elementNodesPerDir(3), glElementRefTuple(3);
493  Array<LO> glElementRefTupleCG(3), glElementCoarseNodeCG(8);
494  const int numCoarseNodesInElement = std::pow(2, numDimensions);
495  const int nnzPerCoarseNode = (blockStrategy == "coupled") ? BlkSize : 1;
496  const int numRowsPerPoint = BlkSize;
497  for(elemInds[2] = 0; elemInds[2] < lCoarseElementsPerDir[2]; ++elemInds[2]) {
498  for(elemInds[1] = 0; elemInds[1] < lCoarseElementsPerDir[1]; ++elemInds[1]) {
499  for(elemInds[0] = 0; elemInds[0] < lCoarseElementsPerDir[0]; ++elemInds[0]) {
500  elementFlags[0] = 0; elementFlags[1] = 0; elementFlags[2] = 0;
501  for(int dim = 0; dim < 3; ++dim) {
502  // Detect boundary conditions on the element and set corresponding flags.
503  if(elemInds[dim] == 0 && elemInds[dim] == lCoarseElementsPerDir[dim] - 1) {
504  elementFlags[dim] = boundaryFlags[dim];
505  } else if(elemInds[dim] == 0 && (boundaryFlags[dim] == 1 || boundaryFlags[dim] == 3)) {
506  elementFlags[dim] += 1;
507  } else if((elemInds[dim] == lCoarseElementsPerDir[dim] - 1)
508  && (boundaryFlags[dim] == 2 || boundaryFlags[dim] == 3)) {
509  elementFlags[dim] += 2;
510  } else {
511  elementFlags[dim] = 0;
512  }
513 
514  // Compute the number of nodes in the current element.
515  if(dim < numDimensions) {
516  if((elemInds[dim] == lCoarseElementsPerDir[dim])
517  && (gIndices[dim] + lFineNodesPerDir[dim] == gFineNodesPerDir[dim])) {
518  elementNodesPerDir[dim] = endRate[dim] + 1;
519  } else {
520  elementNodesPerDir[dim] = coarseRate[dim] + 1;
521  }
522  } else {
523  elementNodesPerDir[dim] = 1;
524  }
525 
526  // Get the lowest tuple of the element using the ghosted local coordinate system
527  glElementRefTuple[dim] = elemInds[dim]*coarseRate[dim];
528  glElementRefTupleCG[dim] = elemInds[dim];
529  }
530 
531  // Now get the column map indices corresponding to the dofs associated with the current
532  // element's coarse nodes.
533  for(typename Array<LO>::size_type elem = 0; elem < glElementCoarseNodeCG.size(); ++elem) {
534  glElementCoarseNodeCG[elem]
535  = glElementRefTupleCG[2]*glCoarseNodesPerDir[1]*glCoarseNodesPerDir[0]
536  + glElementRefTupleCG[1]*glCoarseNodesPerDir[0] + glElementRefTupleCG[0];
537  }
538  glElementCoarseNodeCG[4] += glCoarseNodesPerDir[1]*glCoarseNodesPerDir[0];
539  glElementCoarseNodeCG[5] += glCoarseNodesPerDir[1]*glCoarseNodesPerDir[0];
540  glElementCoarseNodeCG[6] += glCoarseNodesPerDir[1]*glCoarseNodesPerDir[0];
541  glElementCoarseNodeCG[7] += glCoarseNodesPerDir[1]*glCoarseNodesPerDir[0];
542 
543  glElementCoarseNodeCG[2] += glCoarseNodesPerDir[0];
544  glElementCoarseNodeCG[3] += glCoarseNodesPerDir[0];
545  glElementCoarseNodeCG[6] += glCoarseNodesPerDir[0];
546  glElementCoarseNodeCG[7] += glCoarseNodesPerDir[0];
547 
548  glElementCoarseNodeCG[1] += 1;
549  glElementCoarseNodeCG[3] += 1;
550  glElementCoarseNodeCG[5] += 1;
551  glElementCoarseNodeCG[7] += 1;
552 
553  LO numNodesInElement = elementNodesPerDir[0]*elementNodesPerDir[1]*elementNodesPerDir[2];
554  // LO elementOffset = elemInds[2]*coarseRate[2]*glFineNodesPerDir[1]*glFineNodesPerDir[0]
555  // + elemInds[1]*coarseRate[1]*glFineNodesPerDir[0] + elemInds[0]*coarseRate[0];
556 
557  // Compute the element prolongator
558  Teuchos::SerialDenseMatrix<LO,SC> Pi, Pf, Pe;
559  Array<LO> dofType(numNodesInElement*BlkSize), lDofInd(numNodesInElement*BlkSize);
560  ComputeLocalEntries(Aghost, coarseRate, endRate, BlkSize, elemInds, lCoarseElementsPerDir,
561  numDimensions, glFineNodesPerDir, gFineNodesPerDir, gIndices,
562  lCoarseNodesPerDir, ghostInterface, elementFlags, stencilType,
563  blockStrategy, elementNodesPerDir, numNodesInElement, colGIDs,
564  Pi, Pf, Pe, dofType, lDofInd);
565 
566  // Find ghosted LID associated with nodes in the element and eventually which of these
567  // nodes are ghosts, this information is used to fill the local prolongator.
568  Array<LO> lNodeLIDs(numNodesInElement);
569  {
570  Array<LO> lNodeTuple(3), nodeInd(3);
571  for(nodeInd[2] = 0; nodeInd[2] < elementNodesPerDir[2]; ++nodeInd[2]) {
572  for(nodeInd[1] = 0; nodeInd[1] < elementNodesPerDir[1]; ++nodeInd[1]) {
573  for(nodeInd[0] = 0; nodeInd[0] < elementNodesPerDir[0]; ++nodeInd[0]) {
574  int stencilLength = 0;
575  if((nodeInd[0] == 0 || nodeInd[0] == elementNodesPerDir[0] - 1) &&
576  (nodeInd[1] == 0 || nodeInd[1] == elementNodesPerDir[1] - 1) &&
577  (nodeInd[2] == 0 || nodeInd[2] == elementNodesPerDir[2] - 1)) {
578  stencilLength = 1;
579  } else {
580  stencilLength = std::pow(2, numDimensions);
581  }
582  LO nodeElementInd = nodeInd[2]*elementNodesPerDir[1]*elementNodesPerDir[1]
583  + nodeInd[1]*elementNodesPerDir[0] + nodeInd[0];
584  for(int dim = 0; dim < 3; ++dim) {
585  lNodeTuple[dim] = glElementRefTuple[dim] - myOffset[dim] + nodeInd[dim];
586  }
587  if(lNodeTuple[0] < 0 || lNodeTuple[1] < 0 || lNodeTuple[2] < 0 ||
588  lNodeTuple[0] > lFineNodesPerDir[0] - 1 ||
589  lNodeTuple[1] > lFineNodesPerDir[1] - 1 ||
590  lNodeTuple[2] > lFineNodesPerDir[2] - 1) {
591  // This flags the ghosts nodes used for prolongator calculation but for which
592  // we do not own the row, hence we won't fill these values on this rank.
593  lNodeLIDs[nodeElementInd] = -1;
594  } else if ((nodeInd[0] == 0 && elemInds[0] !=0) ||
595  (nodeInd[1] == 0 && elemInds[1] !=0) ||
596  (nodeInd[2] == 0 && elemInds[2] !=0)) {
597  // This flags nodes that are owned but common to two coarse elements and that
598  // were already filled by another element, we don't want to fill them twice so
599  // we skip them
600  lNodeLIDs[nodeElementInd] = -2;
601  } else {
602  // The remaining nodes are locally owned and we need to fill the coresponding
603  // rows of the prolongator
604 
605  // First we need to find which row needs to be filled
606  lNodeLIDs[nodeElementInd] = lNodeTuple[2]*lFineNodesPerDir[1]*lFineNodesPerDir[0]
607  + lNodeTuple[1]*lFineNodesPerDir[0] + lNodeTuple[0];
608 
609  // We also compute the row offset using arithmetic to ensure that we can loop
610  // easily over the nodes in a macro-element as well as facilitate on-node
611  // parallelization. The node serial code could be rewritten with two loops over
612  // the local part of the mesh to avoid the costly integer divisions...
613  Array<LO> refCoarsePointTuple(3);
614  for(int dim = 2; dim > -1; --dim) {
615  if(dim == 0) {
616  refCoarsePointTuple[dim] = (lNodeTuple[dim] + myOffset[dim]) / coarseRate[dim];
617  if(myOffset[dim] == 0) {
618  ++refCoarsePointTuple[dim];
619  }
620  } else {
621  // Note: no need for magnitudeType here, just use double because these things are LO's
622  refCoarsePointTuple[dim] =
623  std::ceil(static_cast<double>(lNodeTuple[dim] + myOffset[dim])
624  / coarseRate[dim]);
625  }
626  if((lNodeTuple[dim] + myOffset[dim]) % coarseRate[dim] > 0) {break;}
627  }
628  const LO numCoarsePoints = refCoarsePointTuple[0]
629  + refCoarsePointTuple[1]*lCoarseNodesPerDir[0]
630  + refCoarsePointTuple[2]*lCoarseNodesPerDir[1]*lCoarseNodesPerDir[0];
631  const LO numFinePoints = lNodeLIDs[nodeElementInd] + 1;
632 
633  // The below formula computes the rowPtr for row 'n+BlkSize' and we are about to
634  // fill row 'n' to 'n+BlkSize'.
635  size_t rowPtr = (numFinePoints - numCoarsePoints)*numRowsPerPoint
636  *numCoarseNodesInElement*nnzPerCoarseNode + numCoarsePoints*numRowsPerPoint;
637  if(dofType[nodeElementInd*BlkSize] == 0) {
638  // Check if current node is a coarse point
639  rowPtr = rowPtr - numRowsPerPoint;
640  } else {
641  rowPtr = rowPtr - numRowsPerPoint*numCoarseNodesInElement*nnzPerCoarseNode;
642  }
643 
644  for(int dof = 0; dof < BlkSize; ++dof) {
645 
646  // Now we loop over the stencil and fill the column indices and row values
647  int cornerInd = 0;
648  switch(dofType[nodeElementInd*BlkSize + dof]) {
649  case 0: // Corner node
650  if(nodeInd[2] == elementNodesPerDir[2] - 1) {cornerInd += 4;}
651  if(nodeInd[1] == elementNodesPerDir[1] - 1) {cornerInd += 2;}
652  if(nodeInd[0] == elementNodesPerDir[0] - 1) {cornerInd += 1;}
653  ia[lNodeLIDs[nodeElementInd]*BlkSize + dof + 1] = rowPtr + dof + 1;
654  ja[rowPtr + dof] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[cornerInd]]*BlkSize + dof;
655  val[rowPtr + dof] = 1.0;
656  break;
657 
658  case 1: // Edge node
659  ia[lNodeLIDs[nodeElementInd]*BlkSize + dof + 1]
660  = rowPtr + (dof + 1)*numCoarseNodesInElement*nnzPerCoarseNode;
661  for(int ind1 = 0; ind1 < stencilLength; ++ind1) {
662  if(blockStrategy == "coupled") {
663  for(int ind2 = 0; ind2 < BlkSize; ++ind2) {
664  size_t lRowPtr = rowPtr + dof*numCoarseNodesInElement*nnzPerCoarseNode
665  + ind1*BlkSize + ind2;
666  ja[lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]]*BlkSize + ind2;
667  val[lRowPtr] = Pe(lDofInd[nodeElementInd*BlkSize + dof],
668  ind1*BlkSize + ind2);
669  }
670  } else if(blockStrategy == "uncoupled") {
671  size_t lRowPtr = rowPtr + dof*numCoarseNodesInElement*nnzPerCoarseNode
672  + ind1;
673  ja[rowPtr + dof] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]]*BlkSize + dof;
674  val[lRowPtr] = Pe(lDofInd[nodeElementInd*BlkSize + dof],
675  ind1*BlkSize + dof);
676  }
677  }
678  break;
679 
680  case 2: // Face node
681  ia[lNodeLIDs[nodeElementInd]*BlkSize + dof + 1]
682  = rowPtr + (dof + 1)*numCoarseNodesInElement*nnzPerCoarseNode;
683  for(int ind1 = 0; ind1 < stencilLength; ++ind1) {
684  if(blockStrategy == "coupled") {
685  for(int ind2 = 0; ind2 < BlkSize; ++ind2) {
686  size_t lRowPtr = rowPtr + dof*numCoarseNodesInElement*nnzPerCoarseNode
687  + ind1*BlkSize + ind2;
688  ja [lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]]*BlkSize + ind2;
689  val[lRowPtr] = Pf(lDofInd[nodeElementInd*BlkSize + dof],
690  ind1*BlkSize + ind2);
691  }
692  } else if(blockStrategy == "uncoupled") {
693  size_t lRowPtr = rowPtr + dof*numCoarseNodesInElement*nnzPerCoarseNode
694  + ind1;
695  // ja [lRowPtr] = colGIDs[glElementCoarseNodeCG[ind1]*BlkSize + dof];
696  ja [lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]]*BlkSize + dof;
697  val[lRowPtr] = Pf(lDofInd[nodeElementInd*BlkSize + dof],
698  ind1*BlkSize + dof);
699  }
700  }
701  break;
702 
703  case 3: // Interior node
704  ia[lNodeLIDs[nodeElementInd]*BlkSize + dof + 1]
705  = rowPtr + (dof + 1)*numCoarseNodesInElement*nnzPerCoarseNode;
706  for(int ind1 = 0; ind1 < stencilLength; ++ind1) {
707  if(blockStrategy == "coupled") {
708  for(int ind2 = 0; ind2 < BlkSize; ++ind2) {
709  size_t lRowPtr = rowPtr + dof*numCoarseNodesInElement*nnzPerCoarseNode
710  + ind1*BlkSize + ind2;
711  ja [lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]]*BlkSize + ind2;
712  val[lRowPtr] = Pi(lDofInd[nodeElementInd*BlkSize + dof],
713  ind1*BlkSize + ind2);
714  }
715  } else if(blockStrategy == "uncoupled") {
716  size_t lRowPtr = rowPtr + dof*numCoarseNodesInElement*nnzPerCoarseNode
717  + ind1;
718  ja [lRowPtr] = ghostedCoarseNodes->colInds[glElementCoarseNodeCG[ind1]]*BlkSize + dof;
719  val[lRowPtr] = Pi(lDofInd[nodeElementInd*BlkSize + dof],
720  ind1*BlkSize + dof);
721  }
722  }
723  break;
724  }
725  }
726  }
727  }
728  }
729  }
730  } // End of scopt for lNodeTuple and nodeInd
731  }
732  }
733  }
734 
735  // Sort all row's column indicies and entries by LID
736  Xpetra::CrsMatrixUtils<SC,LO,GO,NO>::sortCrsEntries(ia, ja, val, rowMapP->lib());
737 
738  // Set the values of the prolongation operators into the CrsMatrix P and call FillComplete
739  PCrs->setAllValues(iaP, jaP, valP);
740  PCrs->expertStaticFillComplete(domainMapP,rowMapP);
741 
742  // set StridingInformation of P
743  if (A->IsView("stridedMaps") == true) {
744  P->CreateView("stridedMaps", A->getRowMap("stridedMaps"), stridedDomainMapP);
745  } else {
746  P->CreateView("stridedMaps", P->getRangeMap(), stridedDomainMapP);
747  }
748 
749  // store the transfer operator and node coordinates on coarse level
750  Set(coarseLevel, "P", P);
751  Set(coarseLevel, "coarseCoordinates", coarseCoordinates);
752  Set<Array<GO> >(coarseLevel, "gCoarseNodesPerDim", gCoarseNodesPerDir);
753  Set<Array<LO> >(coarseLevel, "lCoarseNodesPerDim", lCoarseNodesPerDir);
754 
755  }
756 
757  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
759  GetGeometricData(RCP<Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::magnitudeType,LO,GO,NO> >& coordinates,
760  const Array<LO> coarseRate, const Array<GO> gFineNodesPerDir,
761  const Array<LO> lFineNodesPerDir, const LO BlkSize, Array<GO>& gIndices,
762  Array<LO>& myOffset, Array<bool>& ghostInterface, Array<LO>& endRate,
763  Array<GO>& gCoarseNodesPerDir, Array<LO>& lCoarseNodesPerDir,
764  Array<LO>& glCoarseNodesPerDir, Array<GO>& ghostGIDs, Array<GO>& coarseNodesGIDs,
765  Array<GO>& colGIDs, GO& gNumCoarseNodes, LO& lNumCoarseNodes,
766  ArrayRCP<Array<typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > coarseNodes, Array<int>& boundaryFlags,
767  RCP<NodesIDs> ghostedCoarseNodes) const {
768  // This function is extracting the geometric information from the coordinates
769  // and creates the necessary data/formatting to perform locally the calculation
770  // of the pronlongator.
771  //
772  // inputs:
773 
774  RCP<const Map> coordinatesMap = coordinates->getMap();
775  LO numDimensions = coordinates->getNumVectors();
776 
777  // Using the coarsening rate and the fine level data,
778  // compute coarse level data
779 
780  // Phase 1 //
781  // ------------------------------------------------------------------ //
782  // We first start by finding small informations on the mesh such as //
783  // the number of coarse nodes (local and global) and the number of //
784  // ghost nodes / the end rate of coarsening. //
785  // ------------------------------------------------------------------ //
786  GO minGlobalIndex = coordinatesMap->getMinGlobalIndex();
787  {
788  GO tmp;
789  gIndices[2] = minGlobalIndex / (gFineNodesPerDir[1]*gFineNodesPerDir[0]);
790  tmp = minGlobalIndex % (gFineNodesPerDir[1]*gFineNodesPerDir[0]);
791  gIndices[1] = tmp / gFineNodesPerDir[0];
792  gIndices[0] = tmp % gFineNodesPerDir[0];
793 
794  myOffset[2] = gIndices[2] % coarseRate[2];
795  myOffset[1] = gIndices[1] % coarseRate[1];
796  myOffset[0] = gIndices[0] % coarseRate[0];
797  }
798 
799  for(int dim = 0; dim < 3; ++dim) {
800  if(gIndices[dim] == 0) {
801  boundaryFlags[dim] += 1;
802  }
803  if(gIndices[dim] + lFineNodesPerDir[dim] == gFineNodesPerDir[dim]) {
804  boundaryFlags[dim] += 2;
805  }
806  }
807 
808  // Check whether ghost nodes are needed in each direction
809  for(LO i=0; i < numDimensions; ++i) {
810  if((gIndices[i] != 0) && (gIndices[i] % coarseRate[i] > 0)) {
811  ghostInterface[2*i] = true;
812  }
813  if(((gIndices[i] + lFineNodesPerDir[i]) != gFineNodesPerDir[i])
814  && ((gIndices[i] + lFineNodesPerDir[i] - 1) % coarseRate[i] > 0)) {
815  ghostInterface[2*i + 1] = true;
816  }
817  }
818 
819  for(LO i = 0; i < 3; ++i) {
820  if(i < numDimensions) {
821  lCoarseNodesPerDir[i] = (lFineNodesPerDir[i] + myOffset[i] - 1) / coarseRate[i];
822  if(myOffset[i] == 0) { ++lCoarseNodesPerDir[i]; }
823  gCoarseNodesPerDir[i] = (gFineNodesPerDir[i] - 1) / coarseRate[i];
824  endRate[i] = (gFineNodesPerDir[i] - 1) % coarseRate[i];
825  if(endRate[i] == 0) {
826  ++gCoarseNodesPerDir[i];
827  endRate[i] = coarseRate[i];
828  }
829  } else {
830  // Most quantities need to be set to 1 for extra dimensions
831  // this is rather logical, an x-y plane is like a single layer
832  // of nodes in the z direction...
833  gCoarseNodesPerDir[i] = 1;
834  lCoarseNodesPerDir[i] = 1;
835  endRate[i] = 1;
836  }
837  }
838 
839  gNumCoarseNodes = gCoarseNodesPerDir[0]*gCoarseNodesPerDir[1]*gCoarseNodesPerDir[2];
840  lNumCoarseNodes = lCoarseNodesPerDir[0]*lCoarseNodesPerDir[1]*lCoarseNodesPerDir[2];
841 
842  // For each direction, determine how many ghost points are required.
843  LO lNumGhostNodes = 0;
844  Array<GO> startGhostedCoarseNode(3);
845  {
846  const int complementaryIndices[3][2] = {{1,2}, {0,2}, {0,1}};
847  LO tmp = 0;
848  for(LO i = 0; i < 3; ++i) {
849  // The first branch of this if-statement will be used if the rank contains only one layer
850  // of nodes in direction i, that layer must also coincide with the boundary of the mesh
851  // and coarseRate[i] == endRate[i]...
852  if((gIndices[i] == gFineNodesPerDir[i] - 1) && (gIndices[i] % coarseRate[i] == 0)) {
853  startGhostedCoarseNode[i] = gIndices[i] / coarseRate[i] - 1;
854  } else {
855  startGhostedCoarseNode[i] = gIndices[i] / coarseRate[i];
856  }
857  // First we load the number of locally owned coarse nodes
858  glCoarseNodesPerDir[i] = lCoarseNodesPerDir[i];
859 
860  // Check whether a face in direction i needs ghost nodes
861  if(ghostInterface[2*i] || ghostInterface[2*i+1]) {
862  ++glCoarseNodesPerDir[i];
863  if(i == 0) {tmp = lCoarseNodesPerDir[1]*lCoarseNodesPerDir[2];}
864  if(i == 1) {tmp = lCoarseNodesPerDir[0]*lCoarseNodesPerDir[2];}
865  if(i == 2) {tmp = lCoarseNodesPerDir[0]*lCoarseNodesPerDir[1];}
866  }
867  // If both faces in direction i need nodes, double the number of ghost nodes
868  if(ghostInterface[2*i] && ghostInterface[2*i+1]) {
869  ++glCoarseNodesPerDir[i];
870  tmp = 2*tmp;
871  }
872  lNumGhostNodes += tmp;
873 
874  // The corners and edges need to be checked in 2D / 3D to add more ghosts nodes
875  for(LO j = 0; j < 2; ++j) {
876  for(LO k = 0; k < 2; ++k) {
877  // Check if two adjoining faces need ghost nodes and then add their common edge
878  if(ghostInterface[2*complementaryIndices[i][0]+j]
879  && ghostInterface[2*complementaryIndices[i][1]+k]) {
880  lNumGhostNodes += lCoarseNodesPerDir[i];
881  // Add corners if three adjoining faces need ghost nodes,
882  // but add them only once! Hence when i == 0.
883  if(ghostInterface[2*i] && (i == 0)) { lNumGhostNodes += 1; }
884  if(ghostInterface[2*i+1] && (i == 0)) { lNumGhostNodes += 1; }
885  }
886  }
887  }
888  tmp = 0;
889  }
890  } // end of scope for tmp and complementaryIndices
891 
892  const LO lNumGhostedNodes = glCoarseNodesPerDir[0]*glCoarseNodesPerDir[1]
893  *glCoarseNodesPerDir[2];
894  ghostedCoarseNodes->PIDs.resize(lNumGhostedNodes);
895  ghostedCoarseNodes->LIDs.resize(lNumGhostedNodes);
896  ghostedCoarseNodes->GIDs.resize(lNumGhostedNodes);
897  ghostedCoarseNodes->coarseGIDs.resize(lNumGhostedNodes);
898  ghostedCoarseNodes->colInds.resize(lNumGhostedNodes);
899 
900  // We loop over all ghosted coarse nodes by increasing global lexicographic order
901  Array<LO> coarseNodeCoarseIndices(3), coarseNodeFineIndices(3), ijk(3);
902  LO currentIndex = -1;
903  for(ijk[2] = 0; ijk[2] < glCoarseNodesPerDir[2]; ++ijk[2]) {
904  for(ijk[1] = 0; ijk[1] < glCoarseNodesPerDir[1]; ++ijk[1]) {
905  for(ijk[0] = 0; ijk[0] < glCoarseNodesPerDir[0]; ++ijk[0]) {
906  currentIndex = ijk[2]*glCoarseNodesPerDir[1]*glCoarseNodesPerDir[0]
907  + ijk[1]*glCoarseNodesPerDir[0] + ijk[0];
908  coarseNodeCoarseIndices[0] = startGhostedCoarseNode[0] + ijk[0];
909  coarseNodeFineIndices[0] = coarseNodeCoarseIndices[0]*coarseRate[0];
910  if(coarseNodeFineIndices[0] > gFineNodesPerDir[0] - 1) {
911  coarseNodeFineIndices[0] = gFineNodesPerDir[0] - 1;
912  }
913  coarseNodeCoarseIndices[1] = startGhostedCoarseNode[1] + ijk[1];
914  coarseNodeFineIndices[1] = coarseNodeCoarseIndices[1]*coarseRate[1];
915  if(coarseNodeFineIndices[1] > gFineNodesPerDir[1] - 1) {
916  coarseNodeFineIndices[1] = gFineNodesPerDir[1] - 1;
917  }
918  coarseNodeCoarseIndices[2] = startGhostedCoarseNode[2] + ijk[2];
919  coarseNodeFineIndices[2] = coarseNodeCoarseIndices[2]*coarseRate[2];
920  if(coarseNodeFineIndices[2] > gFineNodesPerDir[2] - 1) {
921  coarseNodeFineIndices[2] = gFineNodesPerDir[2] - 1;
922  }
923  GO myGID = 0, myCoarseGID = -1;
924  GO factor[3] = {};
925  factor[2] = gFineNodesPerDir[1]*gFineNodesPerDir[0];
926  factor[1] = gFineNodesPerDir[0];
927  factor[0] = 1;
928  for(int dim = 0; dim < 3; ++dim) {
929  if(dim < numDimensions) {
930  if(gIndices[dim] - myOffset[dim] + ijk[dim]*coarseRate[dim]
931  < gFineNodesPerDir[dim] - 1) {
932  myGID += (gIndices[dim] - myOffset[dim]
933  + ijk[dim]*coarseRate[dim])*factor[dim];
934  } else {
935  myGID += (gIndices[dim] - myOffset[dim]
936  + (ijk[dim] - 1)*coarseRate[dim] + endRate[dim])*factor[dim];
937  }
938  }
939  }
940  myCoarseGID = coarseNodeCoarseIndices[0]
941  + coarseNodeCoarseIndices[1]*gCoarseNodesPerDir[0]
942  + coarseNodeCoarseIndices[2]*gCoarseNodesPerDir[1]*gCoarseNodesPerDir[0];
943  ghostedCoarseNodes->GIDs[currentIndex] = myGID;
944  ghostedCoarseNodes->coarseGIDs[currentIndex] = myCoarseGID;
945  }
946  }
947  }
948  coordinatesMap->getRemoteIndexList(ghostedCoarseNodes->GIDs(),
949  ghostedCoarseNodes->PIDs(),
950  ghostedCoarseNodes->LIDs());
951 
952  // Phase 2 //
953  // ------------------------------------------------------------------ //
954  // Build a list of GIDs to import the required ghost nodes. //
955  // The ordering of the ghosts nodes will be as natural as possible, //
956  // i.e. it should follow the GID ordering of the mesh. //
957  // ------------------------------------------------------------------ //
958 
959  // Saddly we have to more or less redo what was just done to figure out the value of
960  // lNumGhostNodes, there might be some optimization possibility here...
961  ghostGIDs.resize(lNumGhostNodes);
962  LO countGhosts = 0;
963  // Get the GID of the first point on the current partition.
964  GO startingGID = minGlobalIndex;
965  Array<GO> startingIndices(3);
966  // We still want ghost nodes even if have with a 0 offset,
967  // except when we are on a boundary
968  if(ghostInterface[4] && (myOffset[2] == 0)) {
969  if(gIndices[2] + coarseRate[2] > gFineNodesPerDir[2]) {
970  startingGID -= endRate[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
971  } else {
972  startingGID -= coarseRate[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
973  }
974  } else {
975  startingGID -= myOffset[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
976  }
977  if(ghostInterface[2] && (myOffset[1] == 0)) {
978  if(gIndices[1] + coarseRate[1] > gFineNodesPerDir[1]) {
979  startingGID -= endRate[1]*gFineNodesPerDir[0];
980  } else {
981  startingGID -= coarseRate[1]*gFineNodesPerDir[0];
982  }
983  } else {
984  startingGID -= myOffset[1]*gFineNodesPerDir[0];
985  }
986  if(ghostInterface[0] && (myOffset[0] == 0)) {
987  if(gIndices[0] + coarseRate[0] > gFineNodesPerDir[0]) {
988  startingGID -= endRate[0];
989  } else {
990  startingGID -= coarseRate[0];
991  }
992  } else {
993  startingGID -= myOffset[0];
994  }
995 
996  { // scope for tmp
997  GO tmp;
998  startingIndices[2] = startingGID / (gFineNodesPerDir[1]*gFineNodesPerDir[0]);
999  tmp = startingGID % (gFineNodesPerDir[1]*gFineNodesPerDir[0]);
1000  startingIndices[1] = tmp / gFineNodesPerDir[0];
1001  startingIndices[0] = tmp % gFineNodesPerDir[0];
1002  }
1003 
1004  GO ghostOffset[3] = {0, 0, 0};
1005  LO lengthZero = lCoarseNodesPerDir[0];
1006  LO lengthOne = lCoarseNodesPerDir[1];
1007  LO lengthTwo = lCoarseNodesPerDir[2];
1008  if(ghostInterface[0]) {++lengthZero;}
1009  if(ghostInterface[1]) {++lengthZero;}
1010  if(ghostInterface[2]) {++lengthOne;}
1011  if(ghostInterface[3]) {++lengthOne;}
1012  if(ghostInterface[4]) {++lengthTwo;}
1013  if(ghostInterface[5]) {++lengthTwo;}
1014 
1015  // First check the bottom face as it will have the lowest GIDs
1016  if(ghostInterface[4]) {
1017  ghostOffset[2] = startingGID;
1018  for(LO j = 0; j < lengthOne; ++j) {
1019  if( (j == lengthOne-1)
1020  && (startingIndices[1] + j*coarseRate[1] + 1 > gFineNodesPerDir[1]) ) {
1021  ghostOffset[1] = ((j-1)*coarseRate[1] + endRate[1])*gFineNodesPerDir[0];
1022  } else {
1023  ghostOffset[1] = j*coarseRate[1]*gFineNodesPerDir[0];
1024  }
1025  for(LO k = 0; k < lengthZero; ++k) {
1026  if( (k == lengthZero-1)
1027  && (startingIndices[0] + k*coarseRate[0] + 1 > gFineNodesPerDir[0]) ) {
1028  ghostOffset[0] = (k-1)*coarseRate[0] + endRate[0];
1029  } else {
1030  ghostOffset[0] = k*coarseRate[0];
1031  }
1032  // If the partition includes a changed rate at the edge, ghost nodes need to be picked
1033  // carefully.
1034  // This if statement is repeated each time a ghost node is selected.
1035  ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1036  ++countGhosts;
1037  }
1038  }
1039  }
1040 
1041  // Sweep over the lCoarseNodesPerDir[2] coarse layers in direction 2 and gather necessary ghost
1042  // nodes located on these layers.
1043  for(LO i = 0; i < lengthTwo; ++i) {
1044  // Exclude the cases where ghost nodes exists on the faces in directions 2, these faces are
1045  // swept seperatly for efficiency.
1046  if( !((i == lengthTwo-1) && ghostInterface[5]) && !((i == 0) && ghostInterface[4]) ) {
1047  // Set the ghostOffset in direction 2 taking into account a possible endRate different
1048  // from the regular coarseRate.
1049  if( (i == lengthTwo-1) && !ghostInterface[5] ) {
1050  ghostOffset[2] = startingGID + ((i-1)*coarseRate[2] + endRate[2])
1051  *gFineNodesPerDir[1]*gFineNodesPerDir[0];
1052  } else {
1053  ghostOffset[2] = startingGID + i*coarseRate[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
1054  }
1055  for(LO j = 0; j < lengthOne; ++j) {
1056  if( (j == 0) && ghostInterface[2] ) {
1057  for(LO k = 0; k < lengthZero; ++k) {
1058  if( (k == lengthZero-1)
1059  && (startingIndices[0] + k*coarseRate[0] + 1 > gFineNodesPerDir[0]) ) {
1060  if(k == 0) {
1061  ghostOffset[0] = endRate[0];
1062  } else {
1063  ghostOffset[0] = (k-1)*coarseRate[0] + endRate[0];
1064  }
1065  } else {
1066  ghostOffset[0] = k*coarseRate[0];
1067  }
1068  // In this case j == 0 so ghostOffset[1] is zero and can be ignored
1069  ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[0];
1070  ++countGhosts;
1071  }
1072  } else if( (j == lengthOne-1) && ghostInterface[3] ) {
1073  // Set the ghostOffset in direction 1 taking into account a possible endRate different
1074  // from the regular coarseRate.
1075  if( (j == lengthOne-1)
1076  && (startingIndices[1] + j*coarseRate[1] + 1 > gFineNodesPerDir[1]) ) {
1077  ghostOffset[1] = ((j-1)*coarseRate[1] + endRate[1])*gFineNodesPerDir[0];
1078  } else {
1079  ghostOffset[1] = j*coarseRate[1]*gFineNodesPerDir[0];
1080  }
1081  for(LO k = 0; k < lengthZero; ++k) {
1082  if( (k == lengthZero-1)
1083  && (startingIndices[0] + k*coarseRate[0] + 1 > gFineNodesPerDir[0]) ) {
1084  ghostOffset[0] = (k-1)*coarseRate[0] + endRate[0];
1085  } else {
1086  ghostOffset[0] = k*coarseRate[0];
1087  }
1088  ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1089  ++countGhosts;
1090  }
1091  } else {
1092  // Set ghostOffset[1] for side faces sweep
1093  if( (j == lengthOne-1)
1094  && (startingIndices[1] + j*coarseRate[1] + 1 > gFineNodesPerDir[1]) ) {
1095  ghostOffset[1] = ( (j-1)*coarseRate[1] + endRate[1] )*gFineNodesPerDir[0];
1096  } else {
1097  ghostOffset[1] = j*coarseRate[1]*gFineNodesPerDir[0];
1098  }
1099 
1100  // Set ghostOffset[0], ghostGIDs and countGhosts
1101  if(ghostInterface[0]) { // In that case ghostOffset[0]==0, so we can ignore it
1102  ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1];
1103  ++countGhosts;
1104  }
1105  if(ghostInterface[1]) { // Grab ghost point at the end of direction 0.
1106  if( (startingIndices[0] + (lengthZero-1)*coarseRate[0]) > gFineNodesPerDir[0] - 1 ) {
1107  if(lengthZero > 2) {
1108  ghostOffset[0] = (lengthZero-2)*coarseRate[0] + endRate[0];
1109  } else {
1110  ghostOffset[0] = endRate[0];
1111  }
1112  } else {
1113  ghostOffset[0] = (lengthZero-1)*coarseRate[0];
1114  }
1115  ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1116  ++countGhosts;
1117  }
1118  }
1119  }
1120  }
1121  }
1122 
1123  // Finally check the top face
1124  if(ghostInterface[5]) {
1125  if( startingIndices[2] + (lengthTwo-1)*coarseRate[2] + 1 > gFineNodesPerDir[2] ) {
1126  ghostOffset[2] = startingGID
1127  + ((lengthTwo-2)*coarseRate[2] + endRate[2])*gFineNodesPerDir[1]*gFineNodesPerDir[0];
1128  } else {
1129  ghostOffset[2] = startingGID
1130  + (lengthTwo-1)*coarseRate[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
1131  }
1132  for(LO j = 0; j < lengthOne; ++j) {
1133  if( (j == lengthOne-1)
1134  && (startingIndices[1] + j*coarseRate[1] + 1 > gFineNodesPerDir[1]) ) {
1135  ghostOffset[1] = ( (j-1)*coarseRate[1] + endRate[1] )*gFineNodesPerDir[0];
1136  } else {
1137  ghostOffset[1] = j*coarseRate[1]*gFineNodesPerDir[0];
1138  }
1139  for(LO k = 0; k < lengthZero; ++k) {
1140  if( (k == lengthZero-1)
1141  && (startingIndices[0] + k*coarseRate[0] + 1 > gFineNodesPerDir[0]) ) {
1142  ghostOffset[0] = (k-1)*coarseRate[0] + endRate[0];
1143  } else {
1144  ghostOffset[0] = k*coarseRate[0];
1145  }
1146  ghostGIDs[countGhosts] = ghostOffset[2] + ghostOffset[1] + ghostOffset[0];
1147  ++countGhosts;
1148  }
1149  }
1150  }
1151 
1152  // Phase 3 //
1153  // ------------------------------------------------------------------ //
1154  // Final phase of this function, lists are being built to create the //
1155  // column and domain maps of the projection as well as the map and //
1156  // arrays for the coarse coordinates multivector. //
1157  // ------------------------------------------------------------------ //
1158 
1159  Array<GO> gCoarseNodesGIDs(lNumCoarseNodes);
1160  LO currentNode, offset2, offset1, offset0;
1161  // Find the GIDs of the coarse nodes on the partition.
1162  for(LO ind2 = 0; ind2 < lCoarseNodesPerDir[2]; ++ind2) {
1163  if(myOffset[2] == 0) {
1164  offset2 = startingIndices[2] + myOffset[2];
1165  } else {
1166  if(startingIndices[2] + endRate[2] == gFineNodesPerDir[2] - 1) {
1167  offset2 = startingIndices[2] + endRate[2];
1168  } else {
1169  offset2 = startingIndices[2] + coarseRate[2];
1170  }
1171  }
1172  if(offset2 + ind2*coarseRate[2] > gFineNodesPerDir[2] - 1) {
1173  offset2 += (ind2 - 1)*coarseRate[2] + endRate[2];
1174  } else {
1175  offset2 += ind2*coarseRate[2];
1176  }
1177  offset2 = offset2*gFineNodesPerDir[1]*gFineNodesPerDir[0];
1178 
1179  for(LO ind1 = 0; ind1 < lCoarseNodesPerDir[1]; ++ind1) {
1180  if(myOffset[1] == 0) {
1181  offset1 = startingIndices[1] + myOffset[1];
1182  } else {
1183  if(startingIndices[1] + endRate[1] == gFineNodesPerDir[1] - 1) {
1184  offset1 = startingIndices[1] + endRate[1];
1185  } else {
1186  offset1 = startingIndices[1] + coarseRate[1];
1187  }
1188  }
1189  if(offset1 + ind1*coarseRate[1] > gFineNodesPerDir[1] - 1) {
1190  offset1 += (ind1 - 1)*coarseRate[1] + endRate[1];
1191  } else {
1192  offset1 += ind1*coarseRate[1];
1193  }
1194  offset1 = offset1*gFineNodesPerDir[0];
1195  for(LO ind0 = 0; ind0 < lCoarseNodesPerDir[0]; ++ind0) {
1196  offset0 = startingIndices[0];
1197  if(myOffset[0] == 0) {
1198  offset0 += ind0*coarseRate[0];
1199  } else {
1200  offset0 += (ind0 + 1)*coarseRate[0];
1201  }
1202  if(offset0 > gFineNodesPerDir[0] - 1) {offset0 += endRate[0] - coarseRate[0];}
1203 
1204  currentNode = ind2*lCoarseNodesPerDir[1]*lCoarseNodesPerDir[0]
1205  + ind1*lCoarseNodesPerDir[0]
1206  + ind0;
1207  gCoarseNodesGIDs[currentNode] = offset2 + offset1 + offset0;
1208  }
1209  }
1210  }
1211 
1212  // Actual loop over all the coarse/ghost nodes to find their index on the coarse mesh
1213  // and the corresponding dofs that will need to be added to colMapP.
1214  colGIDs.resize(BlkSize*(lNumCoarseNodes+lNumGhostNodes));
1215  coarseNodesGIDs.resize(lNumCoarseNodes);
1216  for(LO i = 0; i < numDimensions; ++i) {coarseNodes[i].resize(lNumCoarseNodes);}
1217  GO fineNodesPerCoarseSlab = coarseRate[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
1218  GO fineNodesEndCoarseSlab = endRate[2]*gFineNodesPerDir[1]*gFineNodesPerDir[0];
1219  GO fineNodesPerCoarsePlane = coarseRate[1]*gFineNodesPerDir[0];
1220  GO fineNodesEndCoarsePlane = endRate[1]*gFineNodesPerDir[0];
1221  GO coarseNodesPerCoarseLayer = gCoarseNodesPerDir[1]*gCoarseNodesPerDir[0];
1222  GO gCoarseNodeOnCoarseGridGID;
1223  LO gInd[3], lCol;
1224  Array<int> ghostPIDs (lNumGhostNodes);
1225  Array<LO> ghostLIDs (lNumGhostNodes);
1226  Array<LO> ghostPermut(lNumGhostNodes);
1227  for(LO k = 0; k < lNumGhostNodes; ++k) {ghostPermut[k] = k;}
1228  coordinatesMap->getRemoteIndexList(ghostGIDs, ghostPIDs, ghostLIDs);
1229  sh_sort_permute(ghostPIDs.begin(),ghostPIDs.end(), ghostPermut.begin(),ghostPermut.end());
1230 
1231  { // scope for tmpInds, tmpVars and tmp.
1232  GO tmpInds[3], tmpVars[2];
1233  LO tmp;
1234  // Loop over the coarse nodes of the partition and add them to colGIDs
1235  // that will be used to construct the column and domain maps of P as well
1236  // as to construct the coarse coordinates map.
1237  // for(LO col = 0; col < lNumCoarseNodes; ++col) { // This should most likely be replaced
1238  // by loops of lCoarseNodesPerDir[] to simplify arithmetics
1239  LO col = 0;
1240  LO firstCoarseNodeInds[3], currentCoarseNode;
1241  for(LO dim = 0; dim < 3; ++dim) {
1242  if(myOffset[dim] == 0) {
1243  firstCoarseNodeInds[dim] = 0;
1244  } else {
1245  firstCoarseNodeInds[dim] = coarseRate[dim] - myOffset[dim];
1246  }
1247  }
1248  Array<ArrayRCP<const typename Teuchos::ScalarTraits<Scalar>::magnitudeType> > fineNodes(numDimensions);
1249  for(LO dim = 0; dim < numDimensions; ++dim) {fineNodes[dim] = coordinates->getData(dim);}
1250  for(LO k = 0; k < lCoarseNodesPerDir[2]; ++k) {
1251  for(LO j = 0; j < lCoarseNodesPerDir[1]; ++j) {
1252  for(LO i = 0; i < lCoarseNodesPerDir[0]; ++i) {
1253  col = k*lCoarseNodesPerDir[1]*lCoarseNodesPerDir[0] + j*lCoarseNodesPerDir[0] + i;
1254 
1255  // Check for endRate
1256  currentCoarseNode = 0;
1257  if(firstCoarseNodeInds[0] + i*coarseRate[0] > lFineNodesPerDir[0] - 1) {
1258  currentCoarseNode += firstCoarseNodeInds[0] + (i-1)*coarseRate[0] + endRate[0];
1259  } else {
1260  currentCoarseNode += firstCoarseNodeInds[0] + i*coarseRate[0];
1261  }
1262  if(firstCoarseNodeInds[1] + j*coarseRate[1] > lFineNodesPerDir[1] - 1) {
1263  currentCoarseNode += (firstCoarseNodeInds[1] + (j-1)*coarseRate[1] + endRate[1])
1264  *lFineNodesPerDir[0];
1265  } else {
1266  currentCoarseNode += (firstCoarseNodeInds[1] + j*coarseRate[1])*lFineNodesPerDir[0];
1267  }
1268  if(firstCoarseNodeInds[2] + k*coarseRate[2] > lFineNodesPerDir[2] - 1) {
1269  currentCoarseNode += (firstCoarseNodeInds[2] + (k-1)*coarseRate[2] + endRate[2])
1270  *lFineNodesPerDir[1]*lFineNodesPerDir[0];
1271  } else {
1272  currentCoarseNode += (firstCoarseNodeInds[2] + k*coarseRate[2])
1273  *lFineNodesPerDir[1]*lFineNodesPerDir[0];
1274  }
1275  // Load coordinates
1276  for(LO dim = 0; dim < numDimensions; ++dim) {
1277  coarseNodes[dim][col] = fineNodes[dim][currentCoarseNode];
1278  }
1279 
1280  if((endRate[2] != coarseRate[2])
1281  && (gCoarseNodesGIDs[col] > (gCoarseNodesPerDir[2] - 2)*fineNodesPerCoarseSlab
1282  + fineNodesEndCoarseSlab - 1)) {
1283  tmpInds[2] = gCoarseNodesGIDs[col] / fineNodesPerCoarseSlab + 1;
1284  tmpVars[0] = gCoarseNodesGIDs[col] - (tmpInds[2] - 1)*fineNodesPerCoarseSlab
1285  - fineNodesEndCoarseSlab;
1286  } else {
1287  tmpInds[2] = gCoarseNodesGIDs[col] / fineNodesPerCoarseSlab;
1288  tmpVars[0] = gCoarseNodesGIDs[col] % fineNodesPerCoarseSlab;
1289  }
1290  if((endRate[1] != coarseRate[1])
1291  && (tmpVars[0] > (gCoarseNodesPerDir[1] - 2)*fineNodesPerCoarsePlane
1292  + fineNodesEndCoarsePlane - 1)) {
1293  tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane + 1;
1294  tmpVars[1] = tmpVars[0] - (tmpInds[1] - 1)*fineNodesPerCoarsePlane
1295  - fineNodesEndCoarsePlane;
1296  } else {
1297  tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane;
1298  tmpVars[1] = tmpVars[0] % fineNodesPerCoarsePlane;
1299  }
1300  if(tmpVars[1] == gFineNodesPerDir[0] - 1) {
1301  tmpInds[0] = gCoarseNodesPerDir[0] - 1;
1302  } else {
1303  tmpInds[0] = tmpVars[1] / coarseRate[0];
1304  }
1305  gInd[2] = col / (lCoarseNodesPerDir[1]*lCoarseNodesPerDir[0]);
1306  tmp = col % (lCoarseNodesPerDir[1]*lCoarseNodesPerDir[0]);
1307  gInd[1] = tmp / lCoarseNodesPerDir[0];
1308  gInd[0] = tmp % lCoarseNodesPerDir[0];
1309  lCol = gInd[2]*(lCoarseNodesPerDir[1]*lCoarseNodesPerDir[0])
1310  + gInd[1]*lCoarseNodesPerDir[0] + gInd[0];
1311  gCoarseNodeOnCoarseGridGID = tmpInds[2]*coarseNodesPerCoarseLayer
1312  + tmpInds[1]*gCoarseNodesPerDir[0] + tmpInds[0];
1313  coarseNodesGIDs[lCol] = gCoarseNodeOnCoarseGridGID;
1314  for(LO dof = 0; dof < BlkSize; ++dof) {
1315  colGIDs[BlkSize*lCol + dof] = BlkSize*gCoarseNodeOnCoarseGridGID + dof;
1316  }
1317  }
1318  }
1319  }
1320  // Now loop over the ghost nodes of the partition to add them to colGIDs
1321  // since they will need to be included in the column map of P
1322  for(col = lNumCoarseNodes; col < lNumCoarseNodes + lNumGhostNodes; ++col) {
1323  if((endRate[2] != coarseRate[2])
1324  && (ghostGIDs[ghostPermut[col - lNumCoarseNodes]] > (gCoarseNodesPerDir[2] - 2)
1325  *fineNodesPerCoarseSlab + fineNodesEndCoarseSlab - 1)) {
1326  tmpInds[2] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] / fineNodesPerCoarseSlab + 1;
1327  tmpVars[0] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]]
1328  - (tmpInds[2] - 1)*fineNodesPerCoarseSlab - fineNodesEndCoarseSlab;
1329  } else {
1330  tmpInds[2] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] / fineNodesPerCoarseSlab;
1331  tmpVars[0] = ghostGIDs[ghostPermut[col - lNumCoarseNodes]] % fineNodesPerCoarseSlab;
1332  }
1333  if((endRate[1] != coarseRate[1])
1334  && (tmpVars[0] > (gCoarseNodesPerDir[1] - 2)*fineNodesPerCoarsePlane
1335  + fineNodesEndCoarsePlane - 1)) {
1336  tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane + 1;
1337  tmpVars[1] = tmpVars[0] - (tmpInds[1] - 1)*fineNodesPerCoarsePlane
1338  - fineNodesEndCoarsePlane;
1339  } else {
1340  tmpInds[1] = tmpVars[0] / fineNodesPerCoarsePlane;
1341  tmpVars[1] = tmpVars[0] % fineNodesPerCoarsePlane;
1342  }
1343  if(tmpVars[1] == gFineNodesPerDir[0] - 1) {
1344  tmpInds[0] = gCoarseNodesPerDir[0] - 1;
1345  } else {
1346  tmpInds[0] = tmpVars[1] / coarseRate[0];
1347  }
1348  gCoarseNodeOnCoarseGridGID = tmpInds[2]*coarseNodesPerCoarseLayer
1349  + tmpInds[1]*gCoarseNodesPerDir[0] + tmpInds[0];
1350  for(LO dof = 0; dof < BlkSize; ++dof) {
1351  colGIDs[BlkSize*col + dof] = BlkSize*gCoarseNodeOnCoarseGridGID + dof;
1352  }
1353  }
1354  } // End of scope for tmpInds, tmpVars and tmp
1355 
1356  } // GetGeometricData()
1357 
1358  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1360  ComputeLocalEntries(const RCP<const Matrix>& Aghost, const Array<LO> coarseRate,
1361  const Array<LO> /* endRate */, const LO BlkSize, const Array<LO> elemInds,
1362  const Array<LO> /* lCoarseElementsPerDir */, const LO numDimensions,
1363  const Array<LO> lFineNodesPerDir, const Array<GO> /* gFineNodesPerDir */,
1364  const Array<GO> /* gIndices */, const Array<LO> /* lCoarseNodesPerDir */,
1365  const Array<bool> ghostInterface, const Array<int> elementFlags,
1366  const std::string stencilType, const std::string /* blockStrategy */,
1367  const Array<LO> elementNodesPerDir, const LO numNodesInElement,
1368  const Array<GO> /* colGIDs */,
1369  Teuchos::SerialDenseMatrix<LO,SC>& Pi, Teuchos::SerialDenseMatrix<LO,SC>& Pf,
1370  Teuchos::SerialDenseMatrix<LO,SC>& Pe, Array<LO>& dofType,
1371  Array<LO>& lDofInd) const {
1372 
1373  // First extract data from Aghost and move it to the corresponding dense matrix
1374  // This step requires to compute the number of nodes (resp dofs) in the current
1375  // coarse element, then allocate storage for local dense matrices, finally loop
1376  // over the dofs and extract the corresponding row in Aghost before dispactching
1377  // its data to the dense matrices.
1378  // At the same time we want to compute a mapping from the element indexing to
1379  // group indexing. The groups are the following: corner, edge, face and interior
1380  // nodes. We uses these groups to operate on the dense matrices but need to
1381  // store the nodes in their original order after groupd operations are completed.
1382  LO countInterior=0, countFace=0, countEdge=0, countCorner =0;
1383  Array<LO> collapseDir(numNodesInElement*BlkSize);
1384  for(LO ke = 0; ke < elementNodesPerDir[2]; ++ke) {
1385  for(LO je = 0; je < elementNodesPerDir[1]; ++je) {
1386  for(LO ie = 0; ie < elementNodesPerDir[0]; ++ie) {
1387  // Check for corner node
1388  if((ke == 0 || ke == elementNodesPerDir[2]-1)
1389  && (je == 0 || je == elementNodesPerDir[1]-1)
1390  && (ie == 0 || ie == elementNodesPerDir[0]-1)) {
1391  for(LO dof = 0; dof < BlkSize; ++dof) {
1392  dofType[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1393  + je*elementNodesPerDir[0] + ie) + dof] = 0;
1394  lDofInd[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1395  + je*elementNodesPerDir[0] + ie) + dof] = BlkSize*countCorner + dof;
1396  }
1397  ++countCorner;
1398 
1399  // Check for edge node
1400  } else if (((ke == 0 || ke == elementNodesPerDir[2]-1)
1401  && (je == 0 || je == elementNodesPerDir[1]-1))
1402  || ((ke == 0 || ke == elementNodesPerDir[2]-1)
1403  && (ie == 0 || ie == elementNodesPerDir[0]-1))
1404  || ((je == 0 || je == elementNodesPerDir[1]-1)
1405  && (ie == 0 || ie == elementNodesPerDir[0]-1))) {
1406  for(LO dof = 0; dof < BlkSize; ++dof) {
1407  dofType[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1408  + je*elementNodesPerDir[0] + ie) + dof] = 1;
1409  lDofInd[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1410  + je*elementNodesPerDir[0] + ie) + dof] = BlkSize*countEdge + dof;
1411  if((ke == 0 || ke == elementNodesPerDir[2]-1)
1412  && (je == 0 || je == elementNodesPerDir[1]-1)) {
1413  collapseDir[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1414  + je*elementNodesPerDir[0] + ie) + dof] = 0;
1415  } else if((ke == 0 || ke == elementNodesPerDir[2]-1)
1416  && (ie == 0 || ie == elementNodesPerDir[0]-1)) {
1417  collapseDir[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1418  + je*elementNodesPerDir[0] + ie) + dof] = 1;
1419  } else if((je == 0 || je == elementNodesPerDir[1]-1
1420  ) && (ie == 0 || ie == elementNodesPerDir[0]-1)) {
1421  collapseDir[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1422  + je*elementNodesPerDir[0] + ie) + dof] = 2;
1423  }
1424  }
1425  ++countEdge;
1426 
1427  // Check for face node
1428  } else if ((ke == 0 || ke == elementNodesPerDir[2]-1)
1429  || (je == 0 || je == elementNodesPerDir[1]-1)
1430  || (ie == 0 || ie == elementNodesPerDir[0]-1)) {
1431  for(LO dof = 0; dof < BlkSize; ++dof) {
1432  dofType[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1433  + je*elementNodesPerDir[0] + ie) + dof] = 2;
1434  lDofInd[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1435  + je*elementNodesPerDir[0] + ie) + dof] = BlkSize*countFace + dof;
1436  if(ke == 0 || ke == elementNodesPerDir[2]-1) {
1437  collapseDir[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1438  + je*elementNodesPerDir[0] + ie) + dof] = 2;
1439  } else if(je == 0 || je == elementNodesPerDir[1]-1) {
1440  collapseDir[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1441  + je*elementNodesPerDir[0] + ie) + dof] = 1;
1442  } else if(ie == 0 || ie == elementNodesPerDir[0]-1) {
1443  collapseDir[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1444  + je*elementNodesPerDir[0] + ie) + dof] = 0;
1445  }
1446  }
1447  ++countFace;
1448 
1449  // Otherwise it is an interior node
1450  } else {
1451  for(LO dof = 0; dof < BlkSize; ++dof) {
1452  dofType[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1453  + je*elementNodesPerDir[0] + ie) + dof] = 3;
1454  lDofInd[BlkSize*(ke*elementNodesPerDir[1]*elementNodesPerDir[0]
1455  + je*elementNodesPerDir[0] + ie) + dof] = BlkSize*countInterior +dof;
1456  }
1457  ++countInterior;
1458  }
1459  }
1460  }
1461  }
1462 
1463  LO numInteriorNodes = 0, numFaceNodes = 0, numEdgeNodes = 0, numCornerNodes = 8;
1464  numInteriorNodes = (elementNodesPerDir[0] - 2)*(elementNodesPerDir[1] - 2)
1465  *(elementNodesPerDir[2] - 2);
1466  numFaceNodes = 2*(elementNodesPerDir[0] - 2)*(elementNodesPerDir[1] - 2)
1467  + 2*(elementNodesPerDir[0] - 2)*(elementNodesPerDir[2] - 2)
1468  + 2*(elementNodesPerDir[1] - 2)*(elementNodesPerDir[2] - 2);
1469  numEdgeNodes = 4*(elementNodesPerDir[0] - 2) + 4*(elementNodesPerDir[1] - 2)
1470  + 4*(elementNodesPerDir[2] - 2);
1471  // Diagonal blocks
1472  Teuchos::SerialDenseMatrix<LO,SC> Aii(BlkSize*numInteriorNodes, BlkSize*numInteriorNodes);
1473  Teuchos::SerialDenseMatrix<LO,SC> Aff(BlkSize*numFaceNodes, BlkSize*numFaceNodes);
1474  Teuchos::SerialDenseMatrix<LO,SC> Aee(BlkSize*numEdgeNodes, BlkSize*numEdgeNodes);
1475  // Upper triangular blocks
1476  Teuchos::SerialDenseMatrix<LO,SC> Aif(BlkSize*numInteriorNodes, BlkSize*numFaceNodes);
1477  Teuchos::SerialDenseMatrix<LO,SC> Aie(BlkSize*numInteriorNodes, BlkSize*numEdgeNodes);
1478  Teuchos::SerialDenseMatrix<LO,SC> Afe(BlkSize*numFaceNodes, BlkSize*numEdgeNodes);
1479  // Coarse nodes "right hand sides" and local prolongators
1480  Teuchos::SerialDenseMatrix<LO,SC> Aic(BlkSize*numInteriorNodes, BlkSize*numCornerNodes);
1481  Teuchos::SerialDenseMatrix<LO,SC> Afc(BlkSize*numFaceNodes, BlkSize*numCornerNodes);
1482  Teuchos::SerialDenseMatrix<LO,SC> Aec(BlkSize*numEdgeNodes, BlkSize*numCornerNodes);
1483  Pi.shape( BlkSize*numInteriorNodes, BlkSize*numCornerNodes);
1484  Pf.shape( BlkSize*numFaceNodes, BlkSize*numCornerNodes);
1485  Pe.shape( BlkSize*numEdgeNodes, BlkSize*numCornerNodes);
1486 
1487  ArrayView<const LO> rowIndices;
1488  ArrayView<const SC> rowValues;
1489  LO idof, iInd, jInd;
1490  int iType = 0, jType = 0;
1491  int orientation = -1;
1492  int collapseFlags[3] = {};
1493  Array<SC> stencil((std::pow(3,numDimensions))*BlkSize);
1494  // LBV Note: we could skip the extraction of rows corresponding to coarse nodes
1495  // we might want to fuse the first three loops and do integer arithmetic
1496  // to have more optimization during compilation...
1497  for(LO ke = 0; ke < elementNodesPerDir[2]; ++ke) {
1498  for(LO je = 0; je < elementNodesPerDir[1]; ++je) {
1499  for(LO ie = 0; ie < elementNodesPerDir[0]; ++ie) {
1500  collapseFlags[0] = 0; collapseFlags[1] = 0; collapseFlags[2] = 0;
1501  if((elementFlags[0] == 1 || elementFlags[0] == 3) && ie == 0) {
1502  collapseFlags[0] += 1;
1503  }
1504  if((elementFlags[0] == 2 || elementFlags[0] == 3) && ie == elementNodesPerDir[0] - 1) {
1505  collapseFlags[0] += 2;
1506  }
1507  if((elementFlags[1] == 1 || elementFlags[1] == 3) && je == 0) {
1508  collapseFlags[1] += 1;
1509  }
1510  if((elementFlags[1] == 2 || elementFlags[1] == 3) && je == elementNodesPerDir[1] - 1) {
1511  collapseFlags[1] += 2;
1512  }
1513  if((elementFlags[2] == 1 || elementFlags[2] == 3) && ke == 0) {
1514  collapseFlags[2] += 1;
1515  }
1516  if((elementFlags[2] == 2 || elementFlags[2] == 3) && ke == elementNodesPerDir[2] - 1) {
1517  collapseFlags[2] += 2;
1518  }
1519 
1520  // Based on (ie, je, ke) we detect the type of node we are dealing with
1521  GetNodeInfo(ie, je, ke, elementNodesPerDir, &iType, iInd, &orientation);
1522  for(LO dof0 = 0; dof0 < BlkSize; ++dof0) {
1523  // Compute the dof index for each dof at point (ie, je, ke)
1524  idof = ((elemInds[2]*coarseRate[2] + ke)*lFineNodesPerDir[1]*lFineNodesPerDir[0]
1525  + (elemInds[1]*coarseRate[1] + je)*lFineNodesPerDir[0]
1526  + elemInds[0]*coarseRate[0] + ie)*BlkSize + dof0;
1527  Aghost->getLocalRowView(idof, rowIndices, rowValues);
1528  FormatStencil(BlkSize, ghostInterface, ie, je, ke, rowValues,
1529  elementNodesPerDir, collapseFlags, stencilType, stencil);
1530  LO io, jo, ko;
1531  if(iType == 3) {// interior node, no stencil collapse needed
1532  for(LO interactingNode = 0; interactingNode < 27; ++interactingNode) {
1533  // Find (if, jf, kf) the indices associated with the interacting node
1534  ko = ke + (interactingNode / 9 - 1);
1535  {
1536  LO tmp = interactingNode % 9;
1537  jo = je + (tmp / 3 - 1);
1538  io = ie + (tmp % 3 - 1);
1539  int dummy;
1540  GetNodeInfo(io, jo, ko, elementNodesPerDir, &jType, jInd, &dummy);
1541  }
1542  if((io > -1 && io < elementNodesPerDir[0]) &&
1543  (jo > -1 && jo < elementNodesPerDir[1]) &&
1544  (ko > -1 && ko < elementNodesPerDir[2])) {
1545  for(LO dof1 = 0; dof1 < BlkSize; ++dof1) {
1546  if (jType == 3) {
1547  Aii(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1548  stencil[interactingNode*BlkSize + dof1];
1549  } else if(jType == 2) {
1550  Aif(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1551  stencil[interactingNode*BlkSize + dof1];
1552  } else if(jType == 1) {
1553  Aie(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1554  stencil[interactingNode*BlkSize + dof1];
1555  } else if(jType == 0) {
1556  Aic(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1557  -stencil[interactingNode*BlkSize + dof1];
1558  }
1559  }
1560  }
1561  }
1562  } else if(iType == 2) {// Face node, collapse stencil along face normal (*orientation)
1563  CollapseStencil(2, orientation, collapseFlags, stencil);
1564  for(LO interactingNode = 0; interactingNode < 27; ++interactingNode) {
1565  // Find (if, jf, kf) the indices associated with the interacting node
1566  ko = ke + (interactingNode / 9 - 1);
1567  {
1568  LO tmp = interactingNode % 9;
1569  jo = je + (tmp / 3 - 1);
1570  io = ie + (tmp % 3 - 1);
1571  int dummy;
1572  GetNodeInfo(io, jo, ko, elementNodesPerDir, &jType, jInd, &dummy);
1573  }
1574  if((io > -1 && io < elementNodesPerDir[0]) &&
1575  (jo > -1 && jo < elementNodesPerDir[1]) &&
1576  (ko > -1 && ko < elementNodesPerDir[2])) {
1577  for(LO dof1 = 0; dof1 < BlkSize; ++dof1) {
1578  if(jType == 2) {
1579  Aff(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1580  stencil[interactingNode*BlkSize + dof1];
1581  } else if(jType == 1) {
1582  Afe(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1583  stencil[interactingNode*BlkSize + dof1];
1584  } else if(jType == 0) {
1585  Afc(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1586  -stencil[interactingNode*BlkSize + dof1];
1587  }
1588  }
1589  }
1590  }
1591  } else if(iType == 1) {// Edge node, collapse stencil perpendicular to edge direction
1592  CollapseStencil(1, orientation, collapseFlags, stencil);
1593  for(LO interactingNode = 0; interactingNode < 27; ++interactingNode) {
1594  // Find (if, jf, kf) the indices associated with the interacting node
1595  ko = ke + (interactingNode / 9 - 1);
1596  {
1597  LO tmp = interactingNode % 9;
1598  jo = je + (tmp / 3 - 1);
1599  io = ie + (tmp % 3 - 1);
1600  int dummy;
1601  GetNodeInfo(io, jo, ko, elementNodesPerDir, &jType, jInd, &dummy);
1602  }
1603  if((io > -1 && io < elementNodesPerDir[0]) &&
1604  (jo > -1 && jo < elementNodesPerDir[1]) &&
1605  (ko > -1 && ko < elementNodesPerDir[2])) {
1606  for(LO dof1 = 0; dof1 < BlkSize; ++dof1) {
1607  if(jType == 1) {
1608  Aee(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1609  stencil[interactingNode*BlkSize + dof1];
1610  } else if(jType == 0) {
1611  Aec(iInd*BlkSize + dof0, jInd*BlkSize + dof1) =
1612  -stencil[interactingNode*BlkSize + dof1];
1613  }
1614  } // Check if interacting node is in element or not
1615  } // Pc is the identity so no need to treat iType == 0
1616  } // Loop over interacting nodes within a row
1617  } // Check for current node type
1618  } // Loop over degrees of freedom associated to a given node
1619  } // Loop over i
1620  } // Loop over j
1621  } // Loop over k
1622 
1623  // Compute the projection operators for edge and interior nodes
1624  //
1625  // [P_i] = [A_{ii} & A_{if} & A_{ie}]^{-1} [A_{ic}]
1626  // [P_f] = [ 0 & A_{ff} & A_{fe}] [A_{fc}]
1627  // [P_e] = [ 0 & 0 & A_{ee}] [A_{ec}]
1628  // [P_c] = I_c
1629  //
1630  { // Compute P_e
1631  // We need to solve for P_e in: A_{ee}*P_e = A_{fc}
1632  // So we simple do P_e = A_{ee}^{-1)*A_{ec}
1633  Teuchos::SerialDenseSolver<LO,SC> problem;
1634  problem.setMatrix(Teuchos::rcp(&Aee, false));
1635  problem.setVectors(Teuchos::rcp(&Pe, false), Teuchos::rcp(&Aec, false));
1636  problem.factorWithEquilibration(true);
1637  problem.solve();
1638  problem.unequilibrateLHS();
1639  }
1640 
1641  { // Compute P_f
1642  // We need to solve for P_f in: A_{ff}*P_f + A_{fe}*P_e = A_{fc}
1643  // Step one: A_{fc} = 1.0*A_{fc} + (-1.0)*A_{fe}*P_e
1644  Afc.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -1.0, Afe, Pe, 1.0);
1645  // Step two: P_f = A_{ff}^{-1}*A_{fc}
1646  Teuchos::SerialDenseSolver<LO,SC> problem;
1647  problem.setMatrix(Teuchos::rcp(&Aff, false));
1648  problem.setVectors(Teuchos::rcp(&Pf, false), Teuchos::rcp(&Afc, false));
1649  problem.factorWithEquilibration(true);
1650  problem.solve();
1651  problem.unequilibrateLHS();
1652  }
1653 
1654  { // Compute P_i
1655  // We need to solve for P_i in: A_{ii}*P_i + A_{if}*P_f + A_{ie}*P_e = A_{ic}
1656  // Step one: A_{ic} = 1.0*A_{ic} + (-1.0)*A_{ie}*P_e
1657  Aic.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -1.0, Aie, Pe, 1.0);
1658  // Step one: A_{ic} = 1.0*A_{ic} + (-1.0)*A_{if}*P_f
1659  Aic.multiply(Teuchos::NO_TRANS, Teuchos::NO_TRANS, -1.0, Aif, Pf, 1.0);
1660  // Step two: P_i = A_{ii}^{-1}*A_{ic}
1661  Teuchos::SerialDenseSolver<LO,SC> problem;
1662  problem.setMatrix(Teuchos::rcp(&Aii, false));
1663  problem.setVectors(Teuchos::rcp(&Pi, false), Teuchos::rcp(&Aic, false));
1664  problem.factorWithEquilibration(true);
1665  problem.solve();
1666  problem.unequilibrateLHS();
1667  }
1668 
1669  } // ComputeLocalEntries()
1670 
1671  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1673  CollapseStencil(const int type, const int orientation, const int /* collapseFlags */[3],
1674  Array<SC>& stencil) const {
1675 
1676  if(type == 2) {// Face stencil collapse
1677  // Let's hope things vectorize well inside this if statement
1678  if(orientation == 0) {
1679  for(LO i = 0; i < 9; ++i) {
1680  stencil[3*i + 1] = stencil[3*i] + stencil[3*i + 1] + stencil[3*i + 2];
1681  stencil[3*i] = 0;
1682  stencil[3*i + 2] = 0;
1683  }
1684  } else if(orientation == 1) {
1685  for(LO i = 0; i < 3; ++i) {
1686  stencil[9*i + 3] = stencil[9*i + 0] + stencil[9*i + 3] + stencil[9*i + 6];
1687  stencil[9*i + 0] = 0;
1688  stencil[9*i + 6] = 0;
1689  stencil[9*i + 4] = stencil[9*i + 1] + stencil[9*i + 4] + stencil[9*i + 7];
1690  stencil[9*i + 1] = 0;
1691  stencil[9*i + 7] = 0;
1692  stencil[9*i + 5] = stencil[9*i + 2] + stencil[9*i + 5] + stencil[9*i + 8];
1693  stencil[9*i + 2] = 0;
1694  stencil[9*i + 8] = 0;
1695  }
1696  } else if(orientation == 2) {
1697  for(LO i = 0; i < 9; ++i) {
1698  stencil[i + 9] = stencil[i] + stencil[i + 9] + stencil[i + 18];
1699  stencil[i] = 0;
1700  stencil[i + 18] = 0;
1701  }
1702  }
1703  } else if(type == 1) {// Edge stencil collapse
1704  SC tmp1 = 0, tmp2 = 0, tmp3 = 0;
1705  if(orientation == 0) {
1706  for(LO i = 0; i < 9; ++i) {
1707  tmp1 += stencil[0 + 3*i];
1708  stencil[0 + 3*i] = 0;
1709  tmp2 += stencil[1 + 3*i];
1710  stencil[1 + 3*i] = 0;
1711  tmp3 += stencil[2 + 3*i];
1712  stencil[2 + 3*i] = 0;
1713  }
1714  stencil[12] = tmp1;
1715  stencil[13] = tmp2;
1716  stencil[14] = tmp3;
1717  } else if(orientation == 1) {
1718  for(LO i = 0; i < 3; ++i) {
1719  tmp1 += stencil[0 + i] + stencil[9 + i] + stencil[18 + i];
1720  stencil[ 0 + i] = 0;
1721  stencil[ 9 + i] = 0;
1722  stencil[18 + i] = 0;
1723  tmp2 += stencil[3 + i] + stencil[12 + i] + stencil[21 + i];
1724  stencil[ 3 + i] = 0;
1725  stencil[12 + i] = 0;
1726  stencil[21 + i] = 0;
1727  tmp3 += stencil[6 + i] + stencil[15 + i] + stencil[24 + i];
1728  stencil[ 6 + i] = 0;
1729  stencil[15 + i] = 0;
1730  stencil[24 + i] = 0;
1731  }
1732  stencil[10] = tmp1;
1733  stencil[13] = tmp2;
1734  stencil[16] = tmp3;
1735  } else if(orientation == 2) {
1736  for(LO i = 0; i < 9; ++i) {
1737  tmp1 += stencil[i];
1738  stencil[i] = 0;
1739  tmp2 += stencil[i + 9];
1740  stencil[i + 9] = 0;
1741  tmp3 += stencil[i + 18];
1742  stencil[i + 18] = 0;
1743  }
1744  stencil[ 4] = tmp1;
1745  stencil[13] = tmp2;
1746  stencil[22] = tmp3;
1747  }
1748  }
1749  } // CollapseStencil
1750 
1751  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1753  FormatStencil(const LO BlkSize, const Array<bool> /* ghostInterface */, const LO /* ie */, const LO /* je */,
1754  const LO /* ke */, const ArrayView<const SC> rowValues,const Array<LO> /* elementNodesPerDir */,
1755  const int collapseFlags[3], const std::string stencilType, Array<SC>& stencil)
1756  const {
1757 
1758  if(stencilType == "reduced") {
1759  Array<int> fullStencilInds(7);
1760  fullStencilInds[0] = 4; fullStencilInds[1] = 10; fullStencilInds[2] = 12;
1761  fullStencilInds[3] = 13; fullStencilInds[4] = 14; fullStencilInds[5] = 16;
1762  fullStencilInds[6] = 22;
1763 
1764  // Create a mask array to automate mesh boundary processing
1765  Array<int> mask(7);
1766  int stencilSize = rowValues.size();
1767  if(collapseFlags[0] + collapseFlags[1] + collapseFlags[2] > 0) {
1768  if(stencilSize == 1) {
1769  // The assumption is made that if only one non-zero exists in the row
1770  // then this represent a Dirichlet BC being enforced.
1771  // One might want to zero out the corresponding row in the prolongator.
1772  mask[0] = 1; mask[1] = 1; mask[2] = 1;
1773  mask[4] = 1; mask[5] = 1; mask[6] = 1;
1774  } else {
1775  // Here we are looking at Neumann like BC where only a flux is prescribed
1776  // and less non-zeros will be present in the corresponding row.
1777  if(collapseFlags[2] == 1 || collapseFlags[2] == 3) {
1778  mask[0] = 1;
1779  }
1780  if(collapseFlags[2] == 2 || collapseFlags[2] == 3) {
1781  mask[6] = 1;
1782  }
1783  if(collapseFlags[1] == 1 || collapseFlags[1] == 3) {
1784  mask[1] = 1;
1785  }
1786  if(collapseFlags[1] == 2 || collapseFlags[1] == 3) {
1787  mask[5] = 1;
1788  }
1789  if(collapseFlags[0] == 1 || collapseFlags[0] == 3) {
1790  mask[2] = 1;
1791  }
1792  if(collapseFlags[0] == 2 || collapseFlags[0] == 3) {
1793  mask[4] = 1;
1794  }
1795  }
1796  }
1797 
1798  int offset = 0;
1799  for(int ind = 0; ind < 7; ++ind) {
1800  if(mask[ind] == 1) {
1801  for(int dof = 0; dof < BlkSize; ++dof) {
1802  stencil[BlkSize*fullStencilInds[ind] + dof] = 0.0;
1803  }
1804  ++offset; // The offset is used to stay within rowValues bounds
1805  } else {
1806  for(int dof = 0; dof < BlkSize; ++dof) {
1807  stencil[BlkSize*fullStencilInds[ind] + dof] = rowValues[BlkSize*(ind - offset) + dof];
1808  }
1809  }
1810  }
1811  } else if(stencilType == "full") {
1812  // Create a mask array to automate mesh boundary processing
1813  Array<int> mask(27);
1814  if(collapseFlags[2] == 1 || collapseFlags[2] == 3) {
1815  for(int ind = 0; ind < 9; ++ind) {
1816  mask[ind] = 1;
1817  }
1818  }
1819  if(collapseFlags[2] == 2 || collapseFlags[2] == 3) {
1820  for(int ind = 0; ind < 9; ++ind) {
1821  mask[18 + ind] = 1;
1822  }
1823  }
1824  if(collapseFlags[1] == 1 || collapseFlags[1] == 3) {
1825  for(int ind1 = 0; ind1 < 3; ++ind1) {
1826  for(int ind2 = 0; ind2 < 3; ++ind2) {
1827  mask[ind1*9 + ind2] = 1;
1828  }
1829  }
1830  }
1831  if(collapseFlags[1] == 2 || collapseFlags[1] == 3) {
1832  for(int ind1 = 0; ind1 < 3; ++ind1) {
1833  for(int ind2 = 0; ind2 < 3; ++ind2) {
1834  mask[ind1*9 + ind2 + 6] = 1;
1835  }
1836  }
1837  }
1838  if(collapseFlags[0] == 1 || collapseFlags[0] == 3) {
1839  for(int ind = 0; ind < 9; ++ind) {
1840  mask[3*ind] = 1;
1841  }
1842  }
1843  if(collapseFlags[0] == 2 || collapseFlags[0] == 3) {
1844  for(int ind = 0; ind < 9; ++ind) {
1845  mask[3*ind + 2] = 1;
1846  }
1847  }
1848 
1849  // Now the stencil is extracted and formated
1850  int offset = 0;
1851  for(LO ind = 0; ind < 27; ++ind) {
1852  if(mask[ind] == 0) {
1853  for(int dof = 0; dof < BlkSize; ++dof) {
1854  stencil[BlkSize*ind + dof] = 0.0;
1855  }
1856  ++offset; // The offset is used to stay within rowValues bounds
1857  } else {
1858  for(int dof = 0; dof < BlkSize; ++dof) {
1859  stencil[BlkSize*ind + dof] = rowValues[BlkSize*(ind - offset) + dof];
1860  }
1861  }
1862  }
1863  } // stencilTpye
1864  } // FormatStencil()
1865 
1866  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1868  const LO ie, const LO je, const LO ke,
1869  const Array<LO> elementNodesPerDir,
1870  int* type, LO& ind, int* orientation) const {
1871 
1872  // Initialize flags with -1 to be able to catch issues easily
1873  *type = -1, ind = 0, *orientation = -1;
1874  if((ke == 0 || ke == elementNodesPerDir[2]-1)
1875  && (je == 0 || je == elementNodesPerDir[1]-1)
1876  && (ie == 0 || ie == elementNodesPerDir[0]-1)) {
1877  // Corner node
1878  *type = 0;
1879  ind = 4*ke / (elementNodesPerDir[2]-1) + 2*je / (elementNodesPerDir[1]-1)
1880  + ie / (elementNodesPerDir[0]-1);
1881  } else if(((ke == 0 || ke == elementNodesPerDir[2]-1)
1882  && (je == 0 || je == elementNodesPerDir[1]-1))
1883  || ((ke == 0 || ke == elementNodesPerDir[2]-1)
1884  && (ie == 0 || ie == elementNodesPerDir[0]-1))
1885  || ((je == 0 || je == elementNodesPerDir[1]-1)
1886  && (ie == 0 || ie == elementNodesPerDir[0]-1))) {
1887  // Edge node
1888  *type = 1;
1889  if(ke > 0) {ind += 2*(elementNodesPerDir[0] - 2 + elementNodesPerDir[1] - 2);}
1890  if(ke == elementNodesPerDir[2] - 1) {ind += 4*(elementNodesPerDir[2] - 2);}
1891  if((ke == 0) || (ke == elementNodesPerDir[2] - 1)) { // je or ie edges
1892  if(je == 0) {// jlo ie edge
1893  *orientation = 0;
1894  ind += ie - 1;
1895  } else if(je == elementNodesPerDir[1] - 1) {// jhi ie edge
1896  *orientation = 0;
1897  ind += 2*(elementNodesPerDir[1] - 2) + elementNodesPerDir[0] - 2 + ie - 1;
1898  } else {// ilo or ihi je edge
1899  *orientation = 1;
1900  ind += elementNodesPerDir[0] - 2 + 2*(je - 1) + ie / (elementNodesPerDir[0] - 1);
1901  }
1902  } else {// ke edges
1903  *orientation = 2;
1904  ind += 4*(ke - 1) + 2*(je/(elementNodesPerDir[1] - 1)) + ie / (elementNodesPerDir[0] - 1);
1905  }
1906  } else if ((ke == 0 || ke == elementNodesPerDir[2]-1)
1907  || (je == 0 || je == elementNodesPerDir[1]-1)
1908  || (ie == 0 || ie == elementNodesPerDir[0]-1)) {
1909  // Face node
1910  *type = 2;
1911  if(ke == 0) {// current node is on "bottom" face
1912  *orientation = 2;
1913  ind = (je - 1)*(elementNodesPerDir[0] - 2) + ie - 1;
1914  } else {
1915  // add nodes from "bottom" face
1916  ind += (elementNodesPerDir[1] - 2)*(elementNodesPerDir[0] - 2);
1917  // add nodes from side faces
1918  ind += 2*(ke - 1)*(elementNodesPerDir[1] - 2 + elementNodesPerDir[0] - 2);
1919  if(ke == elementNodesPerDir[2]-1) {// current node is on "top" face
1920  *orientation = 2;
1921  ind += (je - 1)*(elementNodesPerDir[0] - 2) + ie - 1;
1922  } else {// current node is on a side face
1923  if(je == 0) {
1924  *orientation = 1;
1925  ind += ie - 1;
1926  } else if(je == elementNodesPerDir[1] - 1) {
1927  *orientation = 1;
1928  ind += 2*(elementNodesPerDir[1] - 2) + elementNodesPerDir[0] - 2 + ie - 1;
1929  } else {
1930  *orientation = 0;
1931  ind += elementNodesPerDir[0] - 2 + 2*(je - 1) + ie / (elementNodesPerDir[0]-1);
1932  }
1933  }
1934  }
1935  } else {
1936  // Interior node
1937  *type = 3;
1938  ind = (ke - 1)*(elementNodesPerDir[1] - 2)*(elementNodesPerDir[0] - 2)
1939  + (je - 1)*(elementNodesPerDir[0] - 2) + ie - 1;
1940  }
1941  } // GetNodeInfo()
1942 
1943  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
1945  const typename Teuchos::Array<LocalOrdinal>::iterator& first1,
1946  const typename Teuchos::Array<LocalOrdinal>::iterator& last1,
1947  const typename Teuchos::Array<LocalOrdinal>::iterator& first2,
1948  const typename Teuchos::Array<LocalOrdinal>::iterator& /* last2 */) const
1949  {
1950  typedef typename std::iterator_traits<typename Teuchos::Array<LocalOrdinal>::iterator>::difference_type DT;
1951  DT n = last1 - first1;
1952  DT m = n / 2;
1953  DT z = Teuchos::OrdinalTraits<DT>::zero();
1954  while (m > z)
1955  {
1956  DT max = n - m;
1957  for (DT j = 0; j < max; j++)
1958  {
1959  for (DT k = j; k >= 0; k-=m)
1960  {
1961  if (first1[first2[k+m]] >= first1[first2[k]])
1962  break;
1963  std::swap(first2[k+m], first2[k]);
1964  }
1965  }
1966  m = m/2;
1967  }
1968  }
1969 
1970 } //namespace MueLu
1971 
1972 #define MUELU_BLACKBOXPFACTORY_SHORT
1973 #endif // MUELU_BLACKBOXPFACTORY_DEF_HPP
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
void FormatStencil(const LO BlkSize, const Array< bool > ghostInterface, const LO ie, const LO je, const LO ke, const ArrayView< const SC > rowValues, const Array< LO > elementNodesPerDir, const int collapseFlags[3], const std::string stencilType, Array< SC > &stencil) const
void GetNodeInfo(const LO ie, const LO je, const LO ke, const Array< LO > elementNodesPerDir, int *type, LO &ind, int *orientation) const
void sh_sort_permute(const typename Teuchos::Array< LocalOrdinal >::iterator &first1, const typename Teuchos::Array< LocalOrdinal >::iterator &last1, const typename Teuchos::Array< LocalOrdinal >::iterator &first2, const typename Teuchos::Array< LocalOrdinal >::iterator &last2) const
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
void ComputeLocalEntries(const RCP< const Matrix > &Aghost, const Array< LO > coarseRate, const Array< LO > endRate, const LO BlkSize, const Array< LO > elemInds, const Array< LO > lCoarseElementsPerDir, const LO numDimensions, const Array< LO > lFineNodesPerDir, const Array< GO > gFineNodesPerDir, const Array< GO > gIndices, const Array< LO > lCoarseNodesPerDir, const Array< bool > ghostInterface, const Array< int > elementFlags, const std::string stencilType, const std::string blockStrategy, const Array< LO > elementNodesPerDir, const LO numNodesInElement, const Array< GO > colGIDs, Teuchos::SerialDenseMatrix< LO, SC > &Pi, Teuchos::SerialDenseMatrix< LO, SC > &Pf, Teuchos::SerialDenseMatrix< LO, SC > &Pe, Array< LO > &dofType, Array< LO > &lDofInd) const
void CollapseStencil(const int type, const int orientation, const int collapseFlags[3], Array< SC > &stencil) const
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void GetGeometricData(RCP< Xpetra::MultiVector< typename Teuchos::ScalarTraits< Scalar >::magnitudeType, LO, GO, NO > > &coordinates, const Array< LO > coarseRate, const Array< GO > gFineNodesPerDir, const Array< LO > lFineNodesPerDir, const LO BlkSize, Array< GO > &gIndices, Array< LO > &myOffset, Array< bool > &ghostInterface, Array< LO > &endRate, Array< GO > &gCoarseNodesPerDir, Array< LO > &lCoarseNodesPerDir, Array< LO > &glCoarseNodesPerDir, Array< GO > &ghostGIDs, Array< GO > &coarseNodesGIDs, Array< GO > &colGIDs, GO &gNumCoarseNodes, LO &lNumCoarseNodes, ArrayRCP< Array< typename Teuchos::ScalarTraits< Scalar >::magnitudeType > > coarseNodes, Array< int > &boundaryFlags, RCP< NodesIDs > ghostedCoarseNodes) const
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:76
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access)....
static const NoFactory * get()
Namespace for MueLu classes and methods.
@ Errors
Errors.
@ Runtime0
One-liner description of what is happening.