46 #ifndef MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP 47 #define MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP 49 #ifdef HAVE_MUELU_KOKKOS_REFACTOR 50 #include <Kokkos_Core.hpp> 51 #include <Kokkos_CrsMatrix.hpp> 53 #include "Xpetra_Matrix.hpp" 57 #include "MueLu_AmalgamationInfo.hpp" 60 #include "MueLu_LWGraph_kokkos.hpp" 63 #include "MueLu_Utilities_kokkos.hpp" 70 template<
class LO,
class RowType>
73 ScanFunctor(RowType rows_) : rows(rows_) { }
75 KOKKOS_INLINE_FUNCTION
76 void operator()(
const LO i, LO& upd,
const bool&
final)
const {
86 template<
class MatrixType,
class GhostedDiagType,
class RowType>
87 class Stage1ScalarFunctor {
89 typedef typename MatrixType::ordinal_type LO;
90 typedef typename MatrixType::value_type SC;
91 typedef Kokkos::ArithTraits<SC> ATS;
92 typedef typename ATS::magnitudeType magnitudeType;
95 Stage1ScalarFunctor(MatrixType kokkosMatrix_,
double threshold_, GhostedDiagType ghostedDiag_, RowType rows_) :
96 kokkosMatrix(kokkosMatrix_),
97 threshold(threshold_),
98 ghostedDiag(ghostedDiag_),
102 KOKKOS_INLINE_FUNCTION
103 void operator()(
const LO row, LO& nnz)
const {
104 auto rowView = kokkosMatrix.row (row);
105 auto length = rowView.length;
108 for (decltype(length) colID = 0; colID < length; colID++) {
109 LO col = rowView.colidx(colID);
112 magnitudeType aiiajj = threshold*threshold * ATS::magnitude(ghostedDiag(row, 0))*ATS::magnitude(ghostedDiag(col, 0));
113 magnitudeType aij2 = ATS::magnitude(rowView.value(colID)) * ATS::magnitude(rowView.value(colID));
115 if (aij2 > aiiajj || row == col)
118 rows(row+1) = rownnz;
123 MatrixType kokkosMatrix;
125 GhostedDiagType ghostedDiag;
129 template<
class MatrixType,
class GhostedDiagType,
class RowType,
class ColType,
class BndNodesType>
130 class Stage2ScalarFunctor {
132 typedef typename MatrixType::ordinal_type LO;
133 typedef typename MatrixType::size_type GO;
134 typedef typename MatrixType::value_type SC;
135 typedef Kokkos::ArithTraits<SC> ATS;
136 typedef typename ATS::magnitudeType magnitudeType;
139 Stage2ScalarFunctor(MatrixType kokkosMatrix_, GhostedDiagType ghostedDiag_, RowType rows_, ColType cols_, BndNodesType bndNodes_,
double threshold_) :
140 kokkosMatrix(kokkosMatrix_),
141 ghostedDiag(ghostedDiag_),
145 threshold(threshold_)
148 KOKKOS_INLINE_FUNCTION
149 void operator()(
const LO row, GO& dropped)
const {
150 auto rowView = kokkosMatrix.row (row);
151 auto length = rowView.length;
154 for (decltype(length) colID = 0; colID < length; colID++) {
155 LO col = rowView.colidx(colID);
158 magnitudeType aiiajj = threshold*threshold * ATS::magnitude(ghostedDiag(row, 0))*ATS::magnitude(ghostedDiag(col, 0));
159 magnitudeType aij2 = ATS::magnitude(rowView.value(colID)) * ATS::magnitude(rowView.value(colID));
161 if (aij2 > aiiajj || row == col) {
162 cols(rows(row) + rownnz) = col;
174 bndNodes(row) =
true;
180 MatrixType kokkosMatrix;
181 GhostedDiagType ghostedDiag;
184 BndNodesType bndNodes;
189 template<
class MatrixType,
class NnzType,
class blkSizeType>
190 class Stage1aVectorFunctor {
192 typedef typename MatrixType::ordinal_type LO;
195 Stage1aVectorFunctor(MatrixType kokkosMatrix_, NnzType nnz_, blkSizeType blkSize_) :
196 kokkosMatrix(kokkosMatrix_),
198 blkSize(blkSize_) { }
200 KOKKOS_INLINE_FUNCTION
201 void operator()(
const LO row, LO& totalnnz)
const {
205 LO nodeRowMaxNonZeros = 0;
206 for (LO j = 0; j < blkSize; j++) {
207 auto rowView = kokkosMatrix.row(row * blkSize + j);
208 nodeRowMaxNonZeros += rowView.length;
210 nnz(row + 1) = nodeRowMaxNonZeros;
211 totalnnz += nodeRowMaxNonZeros;
216 MatrixType kokkosMatrix;
224 template<
class MatrixType,
class NnzType,
class blkSizeType,
class ColDofType>
225 class Stage1bVectorFunctor {
227 typedef typename MatrixType::ordinal_type LO;
232 MatrixType kokkosMatrix;
238 Stage1bVectorFunctor(MatrixType kokkosMatrix_,
240 blkSizeType blkSize_,
241 ColDofType coldofs_) :
242 kokkosMatrix(kokkosMatrix_),
248 KOKKOS_INLINE_FUNCTION
249 void operator()(
const LO rowNode)
const {
251 LO pos = nnz(rowNode);
252 for (LO j = 0; j < blkSize; j++) {
253 auto rowView = kokkosMatrix.row(rowNode * blkSize + j);
254 auto numIndices = rowView.length;
256 for (decltype(numIndices) k = 0; k < numIndices; k++) {
257 auto dofID = rowView.colidx(k);
258 coldofs(pos) = dofID;
269 template<
class MatrixType,
class ColDofNnzType,
class ColDofType,
class Dof2NodeTranslationType,
class BdryNodeType>
270 class Stage1cVectorFunctor {
272 typedef typename MatrixType::ordinal_type LO;
276 ColDofNnzType coldofnnz;
277 Dof2NodeTranslationType dof2node;
278 ColDofNnzType colnodennz;
279 BdryNodeType bdrynode;
282 Stage1cVectorFunctor(ColDofNnzType coldofnnz_, ColDofType coldofs_, Dof2NodeTranslationType dof2node_, ColDofNnzType colnodennz_, BdryNodeType bdrynode_) :
283 coldofnnz(coldofnnz_),
286 colnodennz(colnodennz_),
287 bdrynode(bdrynode_) {
290 KOKKOS_INLINE_FUNCTION
291 void operator()(
const LO rowNode, LO& nnz)
const {
292 LO begin = coldofnnz(rowNode);
293 LO end = coldofnnz(rowNode+1);
295 for (LO i = 0; i < (n-1); i++) {
296 for (LO j = 0; j < (n-i-1); j++) {
297 if (coldofs(j+begin) > coldofs(j+begin+1)) {
298 LO temp = coldofs(j+begin);
299 coldofs(j+begin) = coldofs(j+begin+1);
300 coldofs(j+begin+1) = temp;
306 for (LO i = 0; i < n; i++) {
307 LO dofID = coldofs(begin + i);
308 LO nodeID = dof2node(dofID);
309 if(nodeID != lastNodeID) {
311 coldofs(begin+cnt) = nodeID;
316 bdrynode(rowNode) =
true;
318 bdrynode(rowNode) =
false;
319 colnodennz(rowNode+1) = cnt;
326 template<
class MatrixType,
class ColDofNnzType,
class ColDofType,
class ColNodeNnzType,
class ColNodeType>
327 class Stage1dVectorFunctor {
329 typedef typename MatrixType::ordinal_type LO;
330 typedef typename MatrixType::value_type SC;
334 ColDofNnzType coldofnnz;
335 ColNodeType colnodes;
336 ColNodeNnzType colnodennz;
339 Stage1dVectorFunctor(ColDofType coldofs_, ColDofNnzType coldofnnz_, ColNodeType colnodes_, ColNodeNnzType colnodennz_) :
341 coldofnnz(coldofnnz_),
343 colnodennz(colnodennz_) {
346 KOKKOS_INLINE_FUNCTION
347 void operator()(
const LO rowNode)
const {
348 auto dofbegin = coldofnnz(rowNode);
349 auto nodebegin = colnodennz(rowNode);
350 auto nodeend = colnodennz(rowNode+1);
351 auto n = nodeend - nodebegin;
353 for (decltype(nodebegin) i = 0; i < n; i++) {
354 colnodes(nodebegin + i) = coldofs(dofbegin + i);
362 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
363 RCP<const ParameterList> CoalesceDropFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::GetValidParameterList()
const {
364 RCP<ParameterList> validParamList = rcp(
new ParameterList());
366 #define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name)) 371 typedef Teuchos::StringToIntegralParameterEntryValidator<int> validatorType;
372 validParamList->getEntry(
"aggregation: drop scheme").setValidator(
373 rcp(
new validatorType(Teuchos::tuple<std::string>(
"classical",
"distance laplacian"),
"aggregation: drop scheme")));
375 #undef SET_VALID_ENTRY 376 validParamList->set<
bool > (
"lightweight wrap",
true,
"Experimental option for lightweight graph access");
378 validParamList->set< RCP<const FactoryBase> >(
"A", Teuchos::null,
"Generating factory of the matrix A");
379 validParamList->set< RCP<const FactoryBase> >(
"UnAmalgamationInfo", Teuchos::null,
"Generating factory for UnAmalgamationInfo");
380 validParamList->set< RCP<const FactoryBase> >(
"Coordinates", Teuchos::null,
"Generating factory for Coordinates");
382 return validParamList;
385 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
386 void CoalesceDropFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::DeclareInput(Level ¤tLevel)
const {
387 Input(currentLevel,
"A");
388 Input(currentLevel,
"UnAmalgamationInfo");
390 const ParameterList& pL = GetParameterList();
391 if (pL.get<
bool>(
"lightweight wrap") ==
true) {
392 if (pL.get<std::string>(
"aggregation: drop scheme") ==
"distance laplacian")
393 Input(currentLevel,
"Coordinates");
397 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
398 void CoalesceDropFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::Build(Level& currentLevel)
const {
399 FactoryMonitor m(*
this,
"Build", currentLevel);
401 typedef Teuchos::ScalarTraits<SC> STS;
402 SC zero = STS::zero();
404 auto A = Get< RCP<Matrix> >(currentLevel,
"A");
405 LO blkSize = A->GetFixedBlockSize();
406 GO indexBase = A->getRowMap()->getIndexBase();
408 auto amalInfo = Get< RCP<AmalgamationInfo> >(currentLevel,
"UnAmalgamationInfo");
410 const ParameterList& pL = GetParameterList();
413 GetOStream(
Warnings0) <<
"lightweight wrap is deprecated" << std::endl;
415 std::string algo = pL.get<std::string>(
"aggregation: drop scheme");
417 double threshold = pL.get<
double>(
"aggregation: drop tol");
418 GetOStream(
Runtime0) <<
"algorithm = \"" << algo <<
"\": threshold = " << threshold <<
", blocksize = " << A->GetFixedBlockSize() << std::endl;
420 Set(currentLevel,
"Filtering", (threshold != zero));
422 const typename STS::magnitudeType dirichletThreshold = STS::magnitude(as<SC>(pL.get<
double>(
"aggregation: Dirichlet threshold")));
424 GO numDropped = 0, numTotal = 0;
426 RCP<LWGraph_kokkos> graph;
429 typedef typename LWGraph_kokkos::boundary_nodes_type boundary_nodes_type;
430 boundary_nodes_type boundaryNodes;
432 if (blkSize == 1 && threshold == zero) {
435 graph = rcp(
new LWGraph_kokkos(A->getLocalMatrix().graph, A->getRowMap(), A->getColMap(),
"graph of A"));
439 graph->SetBoundaryNodeMap(boundaryNodes);
440 numTotal = A->getNodeNumEntries();
442 }
else if (blkSize > 1 && threshold == zero) {
445 TEUCHOS_TEST_FOR_EXCEPTION(A->getRowMap()->getNodeNumElements() % blkSize != 0,
MueLu::Exceptions::RuntimeError,
"MueLu::CoalesceDropFactory: Number of local elements is " << A->getRowMap()->getNodeNumElements() <<
" but should be a multiply of " << blkSize);
447 const RCP<const Map> rowMap = A->getRowMap();
448 const RCP<const Map> colMap = A->getColMap();
455 const RCP<const Map> uniqueMap = amalInfo->getNodeRowMap();
456 const RCP<const Map> nonUniqueMap = amalInfo->getNodeColMap();
457 Array<LO> rowTranslationArray = *(amalInfo->getRowTranslation());
458 Array<LO> colTranslationArray = *(amalInfo->getColTranslation());
461 LO numNodes = Teuchos::as<LocalOrdinal>(uniqueMap->getNodeNumElements());
463 id_translation_type rowTranslation(
"dofId2nodeId",rowTranslationArray.size());
464 id_translation_type colTranslation(
"ov_dofId2nodeId",colTranslationArray.size());
467 for (decltype(rowTranslationArray.size()) i = 0; i < rowTranslationArray.size(); ++i)
468 rowTranslation(i) = rowTranslationArray[i];
469 for (decltype(colTranslationArray.size()) i = 0; i < colTranslationArray.size(); ++i)
470 colTranslation(i) = colTranslationArray[i];
472 blkSize = A->GetFixedBlockSize();
473 LocalOrdinal blkId = -1;
474 LocalOrdinal blkPartSize = A->GetFixedBlockSize();
475 if(A->IsView(
"stridedMaps") ==
true) {
476 const RCP<const Map> myMap = A->getRowMap(
"stridedMaps");
477 const RCP<const StridedMap> strMap = Teuchos::rcp_dynamic_cast<
const StridedMap>(myMap);
478 TEUCHOS_TEST_FOR_EXCEPTION(strMap.is_null() ==
true, Exceptions::RuntimeError,
"Map is not of type stridedMap");
479 blkSize = Teuchos::as<const LocalOrdinal>(strMap->getFixedBlockSize());
480 blkId = strMap->getStridedBlockId();
482 blkPartSize = Teuchos::as<LocalOrdinal>(strMap->getStridingData()[blkId]);
484 auto kokkosMatrix = A->getLocalMatrix();
487 typedef typename Matrix::local_matrix_type local_matrix_type;
488 typedef typename LWGraph_kokkos::local_graph_type kokkos_graph_type;
489 typedef typename kokkos_graph_type::row_map_type row_map_type;
491 typedef typename kokkos_graph_type::entries_type entries_type;
494 typename row_map_type::non_const_type dofNnz(
"nnz_map", numNodes + 1);
496 Stage1aVectorFunctor<decltype(kokkosMatrix), decltype(dofNnz), decltype(blkPartSize)> stage1aFunctor(kokkosMatrix, dofNnz, blkPartSize);
497 Kokkos::parallel_reduce(
"MueLu:CoalesceDropF:Build:scalar_filter:stage1a", range_type(0,numNodes), stage1aFunctor, numDofCols);
499 ScanFunctor<LO,decltype(dofNnz)> scanFunctor(dofNnz);
500 Kokkos::parallel_scan(
"MueLu:CoalesceDropF:Build:scalar_filter:stage1_scan", range_type(0,numNodes+1), scanFunctor);
502 typename entries_type::non_const_type dofcols(
"dofcols", numDofCols);
503 Stage1bVectorFunctor <decltype(kokkosMatrix), decltype(dofNnz), decltype(blkPartSize), decltype(dofcols)> stage1bFunctor(kokkosMatrix, dofNnz, blkPartSize, dofcols);
504 Kokkos::parallel_for(
"MueLu:CoalesceDropF:Build:scalar_filter:stage1b", range_type(0,numNodes), stage1bFunctor);
508 typename row_map_type::non_const_type rows(
"nnz_nodemap", numNodes + 1);
509 typename boundary_nodes_type::non_const_type bndNodes(
"boundaryNodes", numNodes);
510 Stage1cVectorFunctor <decltype(kokkosMatrix), decltype(dofNnz), decltype(dofcols), decltype(colTranslation), decltype(bndNodes)> stage1cFunctor(dofNnz, dofcols, colTranslation,rows,bndNodes);
511 Kokkos::parallel_reduce(
"MueLu:CoalesceDropF:Build:scalar_filter:stage1c", range_type(0,numNodes), stage1cFunctor,numNodeCols);
514 ScanFunctor<LO,decltype(rows)> scanNodeFunctor(rows);
515 Kokkos::parallel_scan(
"MueLu:CoalesceDropF:Build:scalar_filter:stage1_scan", range_type(0,numNodes+1), scanNodeFunctor);
518 typename entries_type::non_const_type cols(
"nodecols", numNodeCols);
521 Stage1dVectorFunctor <decltype(kokkosMatrix), decltype(dofNnz), decltype(dofcols), decltype(rows), decltype(cols)> stage1dFunctor(dofcols, dofNnz, cols, rows);
522 Kokkos::parallel_for(
"MueLu:CoalesceDropF:Build:scalar_filter:stage1c", range_type(0,numNodes), stage1dFunctor);
523 kokkos_graph_type kokkosGraph(cols, rows);
526 graph = rcp(
new LWGraph_kokkos(kokkosGraph, uniqueMap, nonUniqueMap,
"amalgamated graph of A"));
528 boundaryNodes = bndNodes;
529 graph->SetBoundaryNodeMap(boundaryNodes);
530 numTotal = A->getNodeNumEntries();
532 dofsPerNode = blkSize;
533 }
else if (algo ==
"classical") {
535 if (blkSize == 1 && threshold != zero) {
538 RCP<Vector> ghostedDiagVec = Utilities_kokkos::GetMatrixOverlappedDiagonal(*A);
539 auto ghostedDiag = ghostedDiagVec->template getLocalView<typename NO::device_type>();
541 typedef typename Matrix::local_matrix_type local_matrix_type;
542 typedef typename LWGraph_kokkos::local_graph_type kokkos_graph_type;
543 typedef typename kokkos_graph_type::row_map_type row_map_type;
544 typedef typename kokkos_graph_type::entries_type entries_type;
546 LO numRows = A->getNodeNumRows();
547 auto kokkosMatrix = A->getLocalMatrix();
549 typedef Kokkos::ArithTraits<SC> ATS;
550 typedef typename ATS::magnitudeType magnitudeType;
553 typename row_map_type::non_const_type rows(
"row_map", numRows+1);
557 Stage1ScalarFunctor<decltype(kokkosMatrix), decltype(ghostedDiag), decltype(rows)> stage1Functor(kokkosMatrix, threshold, ghostedDiag, rows);
558 Kokkos::parallel_reduce(
"MueLu:CoalesceDropF:Build:scalar_filter:stage1_reduce", range_type(0,numRows), stage1Functor, realnnz);
561 ScanFunctor<LO,decltype(rows)> scanFunctor(rows);
562 Kokkos::parallel_scan(
"MueLu:CoalesceDropF:Build:scalar_filter:stage1_scan", range_type(0,numRows+1), scanFunctor);
566 typename boundary_nodes_type::non_const_type bndNodes(
"boundaryNodes", numRows);
567 typename entries_type::non_const_type cols (
"entries", realnnz);
569 typename decltype(kokkosMatrix)::size_type kokkos_numDropped;
571 Stage2ScalarFunctor<decltype(kokkosMatrix), decltype(ghostedDiag), decltype(rows), decltype(cols), decltype(bndNodes)>
572 stage2Functor(kokkosMatrix, ghostedDiag, rows, cols, bndNodes, threshold);
573 Kokkos::parallel_reduce(
"MueLu:CoalesceDropF:Build:scalar_filter:stage2_reduce", range_type(0,numRows), stage2Functor, kokkos_numDropped);
574 numDropped = kokkos_numDropped;
576 boundaryNodes = bndNodes;
578 kokkos_graph_type kokkosGraph(cols, rows);
580 graph = rcp(
new LWGraph_kokkos(kokkosGraph, A->getRowMap(), A->getColMap(),
"filtered graph of A"));
581 graph->SetBoundaryNodeMap(boundaryNodes);
583 numTotal = A->getNodeNumEntries();
587 }
else if (blkSize > 1 && threshold != zero) {
590 throw Exceptions::RuntimeError(
"Block systems with filtering are not implemented");
596 }
else if (algo ==
"distance laplacian") {
597 typedef Xpetra::MultiVector<double,LO,GO,NO> doubleMultiVector;
599 auto coords = Get<RCP<doubleMultiVector> >(currentLevel,
"Coordinates");
601 if (blkSize == 1 && threshold != zero) {
604 throw Exceptions::RuntimeError(
"not implemented");
606 }
else if (blkSize > 1 && threshold != zero) {
609 throw Exceptions::RuntimeError(
"Block systems with filtering are not implemented");
613 GO numLocalBoundaryNodes = 0;
614 GO numGlobalBoundaryNodes = 0;
617 Kokkos::parallel_reduce(
"MueLu:CoalesceDropF:Build:bnd", range_type(0,boundaryNodes.dimension_0()), KOKKOS_LAMBDA(
const LO i, GO& n) {
618 if (boundaryNodes(i))
620 }, numLocalBoundaryNodes);
623 RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
624 MueLu_sumAll(comm, numLocalBoundaryNodes, numGlobalBoundaryNodes);
625 GetOStream(
Statistics1) <<
"Detected " << numGlobalBoundaryNodes <<
" Dirichlet nodes" << std::endl;
627 if ((GetVerbLevel() &
Statistics1) && threshold != zero) {
628 RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
629 GO numGlobalTotal, numGlobalDropped;
632 if (numGlobalTotal != 0) {
633 GetOStream(
Statistics1) <<
"Number of dropped entries: " 634 << numGlobalDropped <<
"/" << numGlobalTotal
635 <<
" (" << 100*Teuchos::as<double>(numGlobalDropped)/Teuchos::as<double>(numGlobalTotal) <<
"%)" << std::endl;
638 Set(currentLevel,
"DofsPerNode", dofsPerNode);
639 Set(currentLevel,
"Graph", graph);
642 #endif // HAVE_MUELU_KOKKOS_REFACTOR 643 #endif // MUELU_COALESCEDROPFACTORY_KOKKOS_DEF_HPP Important warning messages (one line)
#define MueLu_sumAll(rcpComm, in, out)
void parallel_reduce(const std::string &label, const PolicyType &policy, const FunctorType &functor, ReturnType &return_value, typename Impl::enable_if< Kokkos::Impl::is_execution_policy< PolicyType >::value >::type *=0)
One-liner description of what is happening.
Namespace for MueLu classes and methods.
Kokkos::View< const bool *, typename NO::device_type > DetectDirichletRows(const Xpetra::Matrix< SC, LO, GO, NO > &A, const typename Teuchos::ScalarTraits< SC >::magnitudeType &tol)
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< ! Impl::is_integral< ExecPolicy >::value >::type *=0)
Exception throws to report errors in the internal logical of the program.
#define SET_VALID_ENTRY(name)