MueLu  Version of the Day
MueLu_CoalesceDropFactory_kokkos_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // MueLu: A package for multigrid based preconditioning
6 // Copyright 2012 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact
39 // Jonathan Hu (jhu@sandia.gov)
40 // Andrey Prokopenko (aprokop@sandia.gov)
41 // Ray Tuminaro (rstumin@sandia.gov)
42 //
43 // ***********************************************************************
44 //
45 // @HEADER
46 #ifndef MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
47 #define MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
48 
49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR
50 #include <Kokkos_Core.hpp>
51 #include <Kokkos_CrsMatrix.hpp>
52 
53 #include "Xpetra_Matrix.hpp"
54 
56 
57 #include "MueLu_AmalgamationInfo.hpp"
58 #include "MueLu_Exceptions.hpp"
59 #include "MueLu_Level.hpp"
60 #include "MueLu_LWGraph_kokkos.hpp"
61 #include "MueLu_MasterList.hpp"
62 #include "MueLu_Monitor.hpp"
63 #include "MueLu_Utilities_kokkos.hpp"
64 
65 namespace MueLu {
66 
67 
68  namespace { // anonymous
69 
70  template<class LO, class RowType>
71  class ScanFunctor {
72  public:
73  ScanFunctor(RowType rows_) : rows(rows_) { }
74 
75  KOKKOS_INLINE_FUNCTION
76  void operator()(const LO i, LO& upd, const bool& final) const {
77  upd += rows(i);
78  if (final)
79  rows(i) = upd;
80  }
81 
82  private:
83  RowType rows;
84  };
85 
86  template<class MatrixType, class GhostedDiagType, class RowType>
87  class Stage1ScalarFunctor {
88  private:
89  typedef typename MatrixType::ordinal_type LO;
90  typedef typename MatrixType::value_type SC;
91  typedef Kokkos::ArithTraits<SC> ATS;
92  typedef typename ATS::magnitudeType magnitudeType;
93 
94  public:
95  Stage1ScalarFunctor(MatrixType kokkosMatrix_, double threshold_, GhostedDiagType ghostedDiag_, RowType rows_) :
96  kokkosMatrix(kokkosMatrix_),
97  threshold(threshold_),
98  ghostedDiag(ghostedDiag_),
99  rows(rows_)
100  { }
101 
102  KOKKOS_INLINE_FUNCTION
103  void operator()(const LO row, LO& nnz) const {
104  auto rowView = kokkosMatrix.row (row);
105  auto length = rowView.length;
106 
107  LO rownnz = 0;
108  for (decltype(length) colID = 0; colID < length; colID++) {
109  LO col = rowView.colidx(colID);
110 
111  // Avoid square root by using squared values
112  magnitudeType aiiajj = threshold*threshold * ATS::magnitude(ghostedDiag(row, 0))*ATS::magnitude(ghostedDiag(col, 0)); // eps^2*|a_ii|*|a_jj|
113  magnitudeType aij2 = ATS::magnitude(rowView.value(colID)) * ATS::magnitude(rowView.value(colID)); // |a_ij|^2
114 
115  if (aij2 > aiiajj || row == col)
116  rownnz++;
117  }
118  rows(row+1) = rownnz;
119  nnz += rownnz;
120  }
121 
122  private:
123  MatrixType kokkosMatrix;
124  double threshold;
125  GhostedDiagType ghostedDiag;
126  RowType rows;
127  };
128 
129  template<class MatrixType, class GhostedDiagType, class RowType, class ColType, class BndNodesType>
130  class Stage2ScalarFunctor {
131  private:
132  typedef typename MatrixType::ordinal_type LO;
133  typedef typename MatrixType::size_type GO;
134  typedef typename MatrixType::value_type SC;
135  typedef Kokkos::ArithTraits<SC> ATS;
136  typedef typename ATS::magnitudeType magnitudeType;
137 
138  public:
139  Stage2ScalarFunctor(MatrixType kokkosMatrix_, GhostedDiagType ghostedDiag_, RowType rows_, ColType cols_, BndNodesType bndNodes_, double threshold_) :
140  kokkosMatrix(kokkosMatrix_),
141  ghostedDiag(ghostedDiag_),
142  rows(rows_),
143  cols(cols_),
144  bndNodes(bndNodes_),
145  threshold(threshold_)
146  { }
147 
148  KOKKOS_INLINE_FUNCTION
149  void operator()(const LO row, GO& dropped) const {
150  auto rowView = kokkosMatrix.row (row);
151  auto length = rowView.length;
152 
153  LO rownnz = 0;
154  for (decltype(length) colID = 0; colID < length; colID++) {
155  LO col = rowView.colidx(colID);
156 
157  // Avoid square root by using squared values
158  magnitudeType aiiajj = threshold*threshold * ATS::magnitude(ghostedDiag(row, 0))*ATS::magnitude(ghostedDiag(col, 0)); // eps^2*|a_ii|*|a_jj|
159  magnitudeType aij2 = ATS::magnitude(rowView.value(colID)) * ATS::magnitude(rowView.value(colID)); // |a_ij|^2
160 
161  if (aij2 > aiiajj || row == col) {
162  cols(rows(row) + rownnz) = col;
163  rownnz++;
164  } else {
165  dropped++;
166  }
167  if (rownnz == 1) {
168  // If the only element remaining after filtering is diagonal, mark node as boundary
169  // FIXME: this should really be replaced by the following
170  // if (indices.size() == 1 && indices[0] == row)
171  // boundaryNodes[row] = true;
172  // We do not do it this way now because there is no framework for distinguishing isolated
173  // and boundary nodes in the aggregation algorithms
174  bndNodes(row) = true;
175  }
176  }
177  }
178 
179  private:
180  MatrixType kokkosMatrix;
181  GhostedDiagType ghostedDiag;
182  RowType rows;
183  ColType cols;
184  BndNodesType bndNodes;
185  double threshold;
186  };
187 
188  // collect number nonzeros of blkSize rows in nnz_(row+1)
189  template<class MatrixType, class NnzType, class blkSizeType>
190  class Stage1aVectorFunctor {
191  private:
192  typedef typename MatrixType::ordinal_type LO;
193 
194  public:
195  Stage1aVectorFunctor(MatrixType kokkosMatrix_, NnzType nnz_, blkSizeType blkSize_) :
196  kokkosMatrix(kokkosMatrix_),
197  nnz(nnz_),
198  blkSize(blkSize_) { }
199 
200  KOKKOS_INLINE_FUNCTION
201  void operator()(const LO row, LO& totalnnz) const {
202 
203  // the following code is more or less what MergeRows is doing
204  // count nonzero entries in all dof rows associated with node row
205  LO nodeRowMaxNonZeros = 0;
206  for (LO j = 0; j < blkSize; j++) {
207  auto rowView = kokkosMatrix.row(row * blkSize + j);
208  nodeRowMaxNonZeros += rowView.length;
209  }
210  nnz(row + 1) = nodeRowMaxNonZeros;
211  totalnnz += nodeRowMaxNonZeros;
212  }
213 
214 
215  private:
216  MatrixType kokkosMatrix; //< local matrix part
217  NnzType nnz; //< View containing number of nonzeros for current row
218  blkSizeType blkSize; //< block size (or partial block size in strided maps)
219  };
220 
221 
222  // build the dof-based column map containing the local dof ids belonging to blkSize rows in matrix
223  // the DofIds may not be sorted.
224  template<class MatrixType, class NnzType, class blkSizeType, class ColDofType>
225  class Stage1bVectorFunctor {
226  private:
227  typedef typename MatrixType::ordinal_type LO;
228  //typedef typename MatrixType::value_type SC;
229  //typedef typename MatrixType::device_type NO;
230 
231  private:
232  MatrixType kokkosMatrix; //< local matrix part
233  NnzType nnz; //< View containing dof offsets for dof columns
234  blkSizeType blkSize; //< block size (or partial block size in strided maps)
235  ColDofType coldofs; //< view containing the local dof ids associated with columns for the blkSize rows (not sorted)
236 
237  public:
238  Stage1bVectorFunctor(MatrixType kokkosMatrix_,
239  NnzType nnz_,
240  blkSizeType blkSize_,
241  ColDofType coldofs_) :
242  kokkosMatrix(kokkosMatrix_),
243  nnz(nnz_),
244  blkSize(blkSize_),
245  coldofs(coldofs_) {
246  }
247 
248  KOKKOS_INLINE_FUNCTION
249  void operator()(const LO rowNode) const {
250 
251  LO pos = nnz(rowNode);
252  for (LO j = 0; j < blkSize; j++) {
253  auto rowView = kokkosMatrix.row(rowNode * blkSize + j);
254  auto numIndices = rowView.length;
255 
256  for (decltype(numIndices) k = 0; k < numIndices; k++) {
257  auto dofID = rowView.colidx(k);
258  coldofs(pos) = dofID;
259  pos ++;
260  }
261  }
262  }
263 
264  };
265 
266  // sort column ids
267  // translate them into (unique) node ids
268  // count the node column ids per node row
269  template<class MatrixType, class ColDofNnzType, class ColDofType, class Dof2NodeTranslationType, class BdryNodeType>
270  class Stage1cVectorFunctor {
271  private:
272  typedef typename MatrixType::ordinal_type LO;
273 
274  private:
275  ColDofType coldofs; //< view containing the local dof ids associated with columns for the blkSize rows (not sorted)
276  ColDofNnzType coldofnnz; //< view containing start and stop indices for subviews
277  Dof2NodeTranslationType dof2node; //< view containing the local node id associated with the local dof id
278  ColDofNnzType colnodennz; //< view containing number of column nodes for each node row
279  BdryNodeType bdrynode; //< view containing with numNodes booleans. True if node is (full) dirichlet boundardy node.
280 
281  public:
282  Stage1cVectorFunctor(ColDofNnzType coldofnnz_, ColDofType coldofs_, Dof2NodeTranslationType dof2node_, ColDofNnzType colnodennz_, BdryNodeType bdrynode_) :
283  coldofnnz(coldofnnz_),
284  coldofs(coldofs_),
285  dof2node(dof2node_),
286  colnodennz(colnodennz_),
287  bdrynode(bdrynode_) {
288  }
289 
290  KOKKOS_INLINE_FUNCTION
291  void operator()(const LO rowNode, LO& nnz) const {
292  LO begin = coldofnnz(rowNode);
293  LO end = coldofnnz(rowNode+1);
294  LO n = end - begin;
295  for (LO i = 0; i < (n-1); i++) {
296  for (LO j = 0; j < (n-i-1); j++) {
297  if (coldofs(j+begin) > coldofs(j+begin+1)) {
298  LO temp = coldofs(j+begin);
299  coldofs(j+begin) = coldofs(j+begin+1);
300  coldofs(j+begin+1) = temp;
301  }
302  }
303  }
304  size_t cnt = 0;
305  LO lastNodeID = -1;
306  for (LO i = 0; i < n; i++) {
307  LO dofID = coldofs(begin + i);
308  LO nodeID = dof2node(dofID);
309  if(nodeID != lastNodeID) {
310  lastNodeID = nodeID;
311  coldofs(begin+cnt) = nodeID;
312  cnt++;
313  }
314  }
315  if(cnt == 1)
316  bdrynode(rowNode) = true;
317  else
318  bdrynode(rowNode) = false;
319  colnodennz(rowNode+1) = cnt;
320  nnz += cnt;
321  }
322 
323  };
324 
325  // fill column node id view
326  template<class MatrixType, class ColDofNnzType, class ColDofType, class ColNodeNnzType, class ColNodeType>
327  class Stage1dVectorFunctor {
328  private:
329  typedef typename MatrixType::ordinal_type LO;
330  typedef typename MatrixType::value_type SC;
331 
332  private:
333  ColDofType coldofs; //< view containing mixed node and dof indices (only input)
334  ColDofNnzType coldofnnz; //< view containing the start and stop indices for subviews (dofs)
335  ColNodeType colnodes; //< view containing the local node ids associated with columns
336  ColNodeNnzType colnodennz; //< view containing start and stop indices for subviews
337 
338  public:
339  Stage1dVectorFunctor(ColDofType coldofs_, ColDofNnzType coldofnnz_, ColNodeType colnodes_, ColNodeNnzType colnodennz_) :
340  coldofs(coldofs_),
341  coldofnnz(coldofnnz_),
342  colnodes(colnodes_),
343  colnodennz(colnodennz_) {
344  }
345 
346  KOKKOS_INLINE_FUNCTION
347  void operator()(const LO rowNode) const {
348  auto dofbegin = coldofnnz(rowNode);
349  auto nodebegin = colnodennz(rowNode);
350  auto nodeend = colnodennz(rowNode+1);
351  auto n = nodeend - nodebegin;
352 
353  for (decltype(nodebegin) i = 0; i < n; i++) {
354  colnodes(nodebegin + i) = coldofs(dofbegin + i);
355  }
356  }
357  };
358 
359 
360  } // namespace
361 
362  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class DeviceType>
363  RCP<const ParameterList> CoalesceDropFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::GetValidParameterList() const {
364  RCP<ParameterList> validParamList = rcp(new ParameterList());
365 
366 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
367  SET_VALID_ENTRY("aggregation: drop tol");
368  SET_VALID_ENTRY("aggregation: Dirichlet threshold");
369  SET_VALID_ENTRY("aggregation: drop scheme");
370  {
371  typedef Teuchos::StringToIntegralParameterEntryValidator<int> validatorType;
372  validParamList->getEntry("aggregation: drop scheme").setValidator(
373  rcp(new validatorType(Teuchos::tuple<std::string>("classical", "distance laplacian"), "aggregation: drop scheme")));
374  }
375 #undef SET_VALID_ENTRY
376  validParamList->set< bool > ("lightweight wrap", true, "Experimental option for lightweight graph access");
377 
378  validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
379  validParamList->set< RCP<const FactoryBase> >("UnAmalgamationInfo", Teuchos::null, "Generating factory for UnAmalgamationInfo");
380  validParamList->set< RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory for Coordinates");
381 
382  return validParamList;
383  }
384 
385  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class DeviceType>
386  void CoalesceDropFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::DeclareInput(Level &currentLevel) const {
387  Input(currentLevel, "A");
388  Input(currentLevel, "UnAmalgamationInfo");
389 
390  const ParameterList& pL = GetParameterList();
391  if (pL.get<bool>("lightweight wrap") == true) {
392  if (pL.get<std::string>("aggregation: drop scheme") == "distance laplacian")
393  Input(currentLevel, "Coordinates");
394  }
395  }
396 
397  template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class DeviceType>
398  void CoalesceDropFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::Build(Level& currentLevel) const {
399  FactoryMonitor m(*this, "Build", currentLevel);
400 
401  typedef Teuchos::ScalarTraits<SC> STS;
402  SC zero = STS::zero();
403 
404  auto A = Get< RCP<Matrix> >(currentLevel, "A");
405  LO blkSize = A->GetFixedBlockSize();
406  GO indexBase = A->getRowMap()->getIndexBase();
407 
408  auto amalInfo = Get< RCP<AmalgamationInfo> >(currentLevel, "UnAmalgamationInfo");
409 
410  const ParameterList& pL = GetParameterList();
411 
412  // bool doLightWeightWrap = pL.get<bool>("lightweight wrap");
413  GetOStream(Warnings0) << "lightweight wrap is deprecated" << std::endl;
414 
415  std::string algo = pL.get<std::string>("aggregation: drop scheme");
416 
417  double threshold = pL.get<double>("aggregation: drop tol");
418  GetOStream(Runtime0) << "algorithm = \"" << algo << "\": threshold = " << threshold << ", blocksize = " << A->GetFixedBlockSize() << std::endl;
419 
420  Set(currentLevel, "Filtering", (threshold != zero));
421 
422  const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<double>("aggregation: Dirichlet threshold")));
423 
424  GO numDropped = 0, numTotal = 0;
425 
426  RCP<LWGraph_kokkos> graph;
427  LO dofsPerNode = -1;
428 
429  typedef typename LWGraph_kokkos::boundary_nodes_type boundary_nodes_type;
430  boundary_nodes_type boundaryNodes;
431 
432  if (blkSize == 1 && threshold == zero) {
433  // Case 1: scalar problem without dropping
434 
435  graph = rcp(new LWGraph_kokkos(A->getLocalMatrix().graph, A->getRowMap(), A->getColMap(), "graph of A"));
436 
437  // Detect and record rows that correspond to Dirichlet boundary conditions
438  boundaryNodes = Utilities_kokkos::DetectDirichletRows(*A, dirichletThreshold);
439  graph->SetBoundaryNodeMap(boundaryNodes);
440  numTotal = A->getNodeNumEntries();
441  dofsPerNode = 1;
442  } else if (blkSize > 1 && threshold == zero) {
443  // Case 3: block problem without filtering
444 
445  TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getNodeNumElements() % blkSize != 0, MueLu::Exceptions::RuntimeError, "MueLu::CoalesceDropFactory: Number of local elements is " << A->getRowMap()->getNodeNumElements() << " but should be a multiply of " << blkSize);
446 
447  const RCP<const Map> rowMap = A->getRowMap();
448  const RCP<const Map> colMap = A->getColMap();
449 
450  // build a node row map (uniqueMap = non-overlapping) and a node column map
451  // (nonUniqueMap = overlapping). The arrays rowTranslation and colTranslation
452  // stored in the AmalgamationInfo class container contain the local node id
453  // given a local dof id. The data is calculated in the AmalgamationFactory and
454  // stored in the variable "UnAmalgamationInfo" (which is of type AmalagamationInfo)
455  const RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
456  const RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
457  Array<LO> rowTranslationArray = *(amalInfo->getRowTranslation()); // TAW should be transform that into a View?
458  Array<LO> colTranslationArray = *(amalInfo->getColTranslation());
459 
460  // get number of local nodes
461  LO numNodes = Teuchos::as<LocalOrdinal>(uniqueMap->getNodeNumElements());
462  typedef typename Kokkos::View<LocalOrdinal*, DeviceType> id_translation_type;
463  id_translation_type rowTranslation("dofId2nodeId",rowTranslationArray.size());
464  id_translation_type colTranslation("ov_dofId2nodeId",colTranslationArray.size());
465 
466  // TODO change this to lambdas
467  for (decltype(rowTranslationArray.size()) i = 0; i < rowTranslationArray.size(); ++i)
468  rowTranslation(i) = rowTranslationArray[i];
469  for (decltype(colTranslationArray.size()) i = 0; i < colTranslationArray.size(); ++i)
470  colTranslation(i) = colTranslationArray[i];
471  // extract striding information
472  blkSize = A->GetFixedBlockSize(); //< the full block size (number of dofs per node in strided map)
473  LocalOrdinal blkId = -1; //< the block id within a strided map or -1 if it is a full block map
474  LocalOrdinal blkPartSize = A->GetFixedBlockSize(); //< stores block size of part blkId (or the full block size)
475  if(A->IsView("stridedMaps") == true) {
476  const RCP<const Map> myMap = A->getRowMap("stridedMaps");
477  const RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<const StridedMap>(myMap);
478  TEUCHOS_TEST_FOR_EXCEPTION(strMap.is_null() == true, Exceptions::RuntimeError, "Map is not of type stridedMap");
479  blkSize = Teuchos::as<const LocalOrdinal>(strMap->getFixedBlockSize());
480  blkId = strMap->getStridedBlockId();
481  if (blkId > -1)
482  blkPartSize = Teuchos::as<LocalOrdinal>(strMap->getStridingData()[blkId]);
483  }
484  auto kokkosMatrix = A->getLocalMatrix(); // access underlying kokkos data
485 
486  //
487  typedef typename Matrix::local_matrix_type local_matrix_type;
488  typedef typename LWGraph_kokkos::local_graph_type kokkos_graph_type;
489  typedef typename kokkos_graph_type::row_map_type row_map_type;
490  //typedef typename row_map_type::HostMirror row_map_type_h;
491  typedef typename kokkos_graph_type::entries_type entries_type;
492 
493  // Stage 1c: get number of dof-nonzeros per blkSize node rows
494  typename row_map_type::non_const_type dofNnz("nnz_map", numNodes + 1);
495  LO numDofCols = 0;
496  Stage1aVectorFunctor<decltype(kokkosMatrix), decltype(dofNnz), decltype(blkPartSize)> stage1aFunctor(kokkosMatrix, dofNnz, blkPartSize);
497  Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:stage1a", range_type(0,numNodes), stage1aFunctor, numDofCols);
498  // parallel_scan (exclusive)
499  ScanFunctor<LO,decltype(dofNnz)> scanFunctor(dofNnz);
500  Kokkos::parallel_scan("MueLu:CoalesceDropF:Build:scalar_filter:stage1_scan", range_type(0,numNodes+1), scanFunctor);
501 
502  typename entries_type::non_const_type dofcols("dofcols", numDofCols/*dofNnz(numNodes)*/); // why does dofNnz(numNodes) work? should be a parallel reduce, i guess
503  Stage1bVectorFunctor <decltype(kokkosMatrix), decltype(dofNnz), decltype(blkPartSize), decltype(dofcols)> stage1bFunctor(kokkosMatrix, dofNnz, blkPartSize, dofcols);
504  Kokkos::parallel_for("MueLu:CoalesceDropF:Build:scalar_filter:stage1b", range_type(0,numNodes), stage1bFunctor);
505 
506  // we have dofcols and dofids from Stage1dVectorFunctor
507  LO numNodeCols = 0;
508  typename row_map_type::non_const_type rows("nnz_nodemap", numNodes + 1);
509  typename boundary_nodes_type::non_const_type bndNodes("boundaryNodes", numNodes);
510  Stage1cVectorFunctor <decltype(kokkosMatrix), decltype(dofNnz), decltype(dofcols), decltype(colTranslation), decltype(bndNodes)> stage1cFunctor(dofNnz, dofcols, colTranslation,rows,bndNodes);
511  Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:stage1c", range_type(0,numNodes), stage1cFunctor,numNodeCols);
512 
513  // parallel_scan (exclusive)
514  ScanFunctor<LO,decltype(rows)> scanNodeFunctor(rows);
515  Kokkos::parallel_scan("MueLu:CoalesceDropF:Build:scalar_filter:stage1_scan", range_type(0,numNodes+1), scanNodeFunctor);
516 
517  // create column node view
518  typename entries_type::non_const_type cols("nodecols", numNodeCols);
519 
520 
521  Stage1dVectorFunctor <decltype(kokkosMatrix), decltype(dofNnz), decltype(dofcols), decltype(rows), decltype(cols)> stage1dFunctor(dofcols, dofNnz, cols, rows);
522  Kokkos::parallel_for("MueLu:CoalesceDropF:Build:scalar_filter:stage1c", range_type(0,numNodes), stage1dFunctor);
523  kokkos_graph_type kokkosGraph(cols, rows);
524 
525  // create LW graph
526  graph = rcp(new LWGraph_kokkos(kokkosGraph, uniqueMap, nonUniqueMap, "amalgamated graph of A"));
527 
528  boundaryNodes = bndNodes;
529  graph->SetBoundaryNodeMap(boundaryNodes);
530  numTotal = A->getNodeNumEntries();
531 
532  dofsPerNode = blkSize;
533  } else if (algo == "classical") {
534 
535  if (blkSize == 1 && threshold != zero) {
536  // Case 2: scalar problem with filtering
537 
538  RCP<Vector> ghostedDiagVec = Utilities_kokkos::GetMatrixOverlappedDiagonal(*A);
539  auto ghostedDiag = ghostedDiagVec->template getLocalView<typename NO::device_type>();
540 
541  typedef typename Matrix::local_matrix_type local_matrix_type;
542  typedef typename LWGraph_kokkos::local_graph_type kokkos_graph_type;
543  typedef typename kokkos_graph_type::row_map_type row_map_type;
544  typedef typename kokkos_graph_type::entries_type entries_type;
545 
546  LO numRows = A->getNodeNumRows();
547  auto kokkosMatrix = A->getLocalMatrix();
548 
549  typedef Kokkos::ArithTraits<SC> ATS;
550  typedef typename ATS::magnitudeType magnitudeType;
551 
552  // Stage 1: calculate the number of remaining entries per row
553  typename row_map_type::non_const_type rows("row_map", numRows+1); // rows(0) = 0 automatically
554 
555  LO realnnz = 0;
556 
557  Stage1ScalarFunctor<decltype(kokkosMatrix), decltype(ghostedDiag), decltype(rows)> stage1Functor(kokkosMatrix, threshold, ghostedDiag, rows);
558  Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:stage1_reduce", range_type(0,numRows), stage1Functor, realnnz);
559 
560  // parallel_scan (exclusive)
561  ScanFunctor<LO,decltype(rows)> scanFunctor(rows);
562  Kokkos::parallel_scan("MueLu:CoalesceDropF:Build:scalar_filter:stage1_scan", range_type(0,numRows+1), scanFunctor);
563 
564 
565  // Stage 2: fill in the column indices
566  typename boundary_nodes_type::non_const_type bndNodes("boundaryNodes", numRows);
567  typename entries_type::non_const_type cols ("entries", realnnz);
568 
569  typename decltype(kokkosMatrix)::size_type kokkos_numDropped;
570 
571  Stage2ScalarFunctor<decltype(kokkosMatrix), decltype(ghostedDiag), decltype(rows), decltype(cols), decltype(bndNodes)>
572  stage2Functor(kokkosMatrix, ghostedDiag, rows, cols, bndNodes, threshold);
573  Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:scalar_filter:stage2_reduce", range_type(0,numRows), stage2Functor, kokkos_numDropped);
574  numDropped = kokkos_numDropped;
575 
576  boundaryNodes = bndNodes;
577 
578  kokkos_graph_type kokkosGraph(cols, rows);
579 
580  graph = rcp(new LWGraph_kokkos(kokkosGraph, A->getRowMap(), A->getColMap(), "filtered graph of A"));
581  graph->SetBoundaryNodeMap(boundaryNodes);
582 
583  numTotal = A->getNodeNumEntries();
584 
585  dofsPerNode = 1;
586 
587  } else if (blkSize > 1 && threshold != zero) {
588  // Case 4: block problem with filtering
589 
590  throw Exceptions::RuntimeError("Block systems with filtering are not implemented");
591 
592  // Detect and record rows that correspond to Dirichlet boundary conditions
593  // boundary_nodes_type pointBoundaryNodes = Utilities_kokkos::DetectDirichletRows(*A, dirichletThreshold);
594  }
595 
596  } else if (algo == "distance laplacian") {
597  typedef Xpetra::MultiVector<double,LO,GO,NO> doubleMultiVector;
598 
599  auto coords = Get<RCP<doubleMultiVector> >(currentLevel, "Coordinates");
600 
601  if (blkSize == 1 && threshold != zero) {
602  // Case 2: scalar problem with filtering
603 
604  throw Exceptions::RuntimeError("not implemented");
605 
606  } else if (blkSize > 1 && threshold != zero) {
607  // Case 4: block problem with filtering
608 
609  throw Exceptions::RuntimeError("Block systems with filtering are not implemented");
610  }
611  }
612  if (GetVerbLevel() & Statistics1) {
613  GO numLocalBoundaryNodes = 0;
614  GO numGlobalBoundaryNodes = 0;
615 #if 0
616  // Convert to functors later
617  Kokkos::parallel_reduce("MueLu:CoalesceDropF:Build:bnd", range_type(0,boundaryNodes.dimension_0()), KOKKOS_LAMBDA(const LO i, GO& n) {
618  if (boundaryNodes(i))
619  n++;
620  }, numLocalBoundaryNodes);
621 #endif
622 
623  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
624  MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
625  GetOStream(Statistics1) << "Detected " << numGlobalBoundaryNodes << " Dirichlet nodes" << std::endl;
626  }
627  if ((GetVerbLevel() & Statistics1) && threshold != zero) {
628  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
629  GO numGlobalTotal, numGlobalDropped;
630  MueLu_sumAll(comm, numTotal, numGlobalTotal);
631  MueLu_sumAll(comm, numDropped, numGlobalDropped);
632  if (numGlobalTotal != 0) {
633  GetOStream(Statistics1) << "Number of dropped entries: "
634  << numGlobalDropped << "/" << numGlobalTotal
635  << " (" << 100*Teuchos::as<double>(numGlobalDropped)/Teuchos::as<double>(numGlobalTotal) << "%)" << std::endl;
636  }
637  }
638  Set(currentLevel, "DofsPerNode", dofsPerNode);
639  Set(currentLevel, "Graph", graph);
640  }
641 }
642 #endif // HAVE_MUELU_KOKKOS_REFACTOR
643 #endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP
Important warning messages (one line)
#define MueLu_sumAll(rcpComm, in, out)
void parallel_reduce(const std::string &label, const PolicyType &policy, const FunctorType &functor, ReturnType &return_value, typename Impl::enable_if< Kokkos::Impl::is_execution_policy< PolicyType >::value >::type *=0)
Print more statistics.
One-liner description of what is happening.
Namespace for MueLu classes and methods.
Kokkos::View< const bool *, typename NO::device_type > DetectDirichletRows(const Xpetra::Matrix< SC, LO, GO, NO > &A, const typename Teuchos::ScalarTraits< SC >::magnitudeType &tol)
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< ! Impl::is_integral< ExecPolicy >::value >::type *=0)
Exception throws to report errors in the internal logical of the program.
#define SET_VALID_ENTRY(name)