246#ifdef HAVE_MUELU_CUDA
247 if (parameterList_.get<
bool>(
"refmaxwell: cuda profile setup",
false)) cudaProfilerStart();
250 std::string timerLabel;
252 timerLabel =
"MueLu RefMaxwell: compute (reuse)";
254 timerLabel =
"MueLu RefMaxwell: compute";
255 RCP<Teuchos::TimeMonitor> tmCompute = getTimer(timerLabel);
262 RCP<ParameterList> params = rcp(
new ParameterList());;
263 params->set(
"printLoadBalancingInfo",
true);
264 params->set(
"printCommInfo",
true);
271 magnitudeType rowSumTol = parameterList_.get(
"refmaxwell: row sum drop tol (1,1)",-1.0);
274 useKokkos_,BCrowsKokkos_,BCcolsKokkos_,BCdomainKokkos_,
276 BCedges_,BCnodes_,BCrows_,BCcols_,BCdomain_,
277 allEdgesBoundary_,allNodesBoundary_);
279 GetOStream(
Statistics2) <<
"MueLu::RefMaxwell::compute(): Detected " << BCedges_ <<
" BC rows and " << BCnodes_ <<
" BC columns." << std::endl;
283 if (allEdgesBoundary_) {
286 GetOStream(
Warnings0) <<
"All edges are detected as boundary edges!" << std::endl;
288 setFineLevelSmoother();
295 if(Nullspace_ != null) {
297 TEUCHOS_ASSERT(Nullspace_->getMap()->isCompatible(*(SM_Matrix_->getRowMap())));
299 else if(Nullspace_ == null && Coords_ != null) {
300#ifdef HAVE_MUELU_KOKKOS_REFACTOR
301 RCP<MultiVector> CoordsSC;
303 CoordsSC = Utilities_kokkos::RealValuedToScalarMultiVector(Coords_);
309 Nullspace_ = MultiVectorFactory::Build(SM_Matrix_->getRowMap(),Coords_->getNumVectors());
310 D0_Matrix_->apply(*CoordsSC,*Nullspace_);
312 bool normalize = parameterList_.get<
bool>(
"refmaxwell: normalize nullspace", MasterList::getDefault<bool>(
"refmaxwell: normalize nullspace"));
317 ArrayRCP<ArrayRCP<const Scalar> > localNullspace(Nullspace_->getNumVectors());
318 for (
size_t i = 0; i < Nullspace_->getNumVectors(); i++)
319 localNullspace[i] = Nullspace_->getData(i);
320 coordinateType localMinLen = Teuchos::ScalarTraits<coordinateType>::rmax();
321 coordinateType localMeanLen = Teuchos::ScalarTraits<coordinateType>::zero();
322 coordinateType localMaxLen = Teuchos::ScalarTraits<coordinateType>::zero();
323 for (
size_t j=0; j < Nullspace_->getMap()->getNodeNumElements(); j++) {
324 Scalar lenSC = Teuchos::ScalarTraits<Scalar>::zero();
325 for (
size_t i=0; i < Nullspace_->getNumVectors(); i++)
326 lenSC += localNullspace[i][j]*localNullspace[i][j];
327 coordinateType len = sqrt(Teuchos::ScalarTraits<Scalar>::real(lenSC));
328 localMinLen = std::min(localMinLen, len);
329 localMaxLen = std::max(localMaxLen, len);
333 RCP<const Teuchos::Comm<int> > comm = Nullspace_->getMap()->getComm();
337 meanLen /= Nullspace_->getMap()->getGlobalNumElements();
341 GetOStream(
Statistics0) <<
"Edge length (min/mean/max): " << minLen <<
" / " << meanLen <<
" / " << maxLen << std::endl;
346 GetOStream(
Runtime0) <<
"RefMaxwell::compute(): normalizing nullspace" << std::endl;
348 const Scalar one = Teuchos::ScalarTraits<Scalar>::one();
350 Array<Scalar> normsSC(Coords_->getNumVectors(), one / Teuchos::as<Scalar>(meanLen));
351 Nullspace_->scale(normsSC());
355 GetOStream(
Errors) <<
"MueLu::RefMaxwell::compute(): either the nullspace or the nodal coordinates must be provided." << std::endl;
358 if (!reuse && skipFirstLevel_) {
360#ifdef HAVE_MUELU_KOKKOS_REFACTOR
362 Utilities_kokkos::ZeroDirichletRows(Nullspace_,BCrowsKokkos_);
368 dump(*Nullspace_,
"nullspace.m");
375 if (skipFirstLevel_) {
377 Level fineLevel, coarseLevel;
383 fineLevel.
Set(
"A",Ms_Matrix_);
384 coarseLevel.
Set(
"P",D0_Matrix_);
385 coarseLevel.
setlib(Ms_Matrix_->getDomainMap()->lib());
386 fineLevel.
setlib(Ms_Matrix_->getDomainMap()->lib());
387 coarseLevel.setObjectLabel(
"RefMaxwell (1,1) A_nodal");
388 fineLevel.setObjectLabel(
"RefMaxwell (1,1) A_nodal");
390 RCP<RAPFactory> rapFact = rcp(
new RAPFactory());
391 ParameterList rapList = *(rapFact->GetValidParameterList());
392 rapList.set(
"transpose: use implicit",
true);
393 rapList.set(
"rap: fix zero diagonals", parameterList_.get<
bool>(
"rap: fix zero diagonals",
true));
394 rapList.set(
"rap: fix zero diagonals threshold", parameterList_.get<
double>(
"rap: fix zero diagonals threshold", Teuchos::ScalarTraits<double>::eps()));
395 rapList.set(
"rap: triple product", parameterList_.get<
bool>(
"rap: triple product",
false));
396 rapFact->SetParameterList(rapList);
399 coarseLevel.
Request(
"A", rapFact.get());
401 A_nodal_Matrix_ = coarseLevel.
Get< RCP<Matrix> >(
"A", rapFact.get());
403 if (applyBCsToAnodal_) {
405#ifdef HAVE_MUELU_KOKKOS_REFACTOR
407 Utilities_kokkos::ApplyOAZToMatrixRows(A_nodal_Matrix_,BCdomainKokkos_);
412 dump(*A_nodal_Matrix_,
"A_nodal.m");
416 GetOStream(
Runtime0) <<
"RefMaxwell::compute(): building special prolongator" << std::endl;
421 bool doRebalancing =
false;
422 doRebalancing = parameterList_.get<
bool>(
"refmaxwell: subsolves on subcommunicators", MasterList::getDefault<bool>(
"refmaxwell: subsolves on subcommunicators"));
423 int rebalanceStriding = parameterList_.get<
int>(
"refmaxwell: subsolves striding", -1);
424 int numProcsAH, numProcsA22;
425 int numProcs = SM_Matrix_->getDomainMap()->getComm()->getSize();
427 doRebalancing =
false;
446 ParameterList repartheurParams;
447 repartheurParams.set(
"repartition: start level", 0);
449 int defaultTargetRows = 10000;
450 repartheurParams.set(
"repartition: min rows per proc", precList11_.get<
int>(
"repartition: target rows per proc", defaultTargetRows));
451 repartheurParams.set(
"repartition: target rows per proc", precList11_.get<
int>(
"repartition: target rows per proc", defaultTargetRows));
452 repartheurParams.set(
"repartition: min rows per thread", precList11_.get<
int>(
"repartition: target rows per thread", defaultTargetRows));
453 repartheurParams.set(
"repartition: target rows per thread", precList11_.get<
int>(
"repartition: target rows per thread", defaultTargetRows));
454 repartheurParams.set(
"repartition: max imbalance", precList11_.get<
double>(
"repartition: max imbalance", 1.1));
455 repartheurFactory->SetParameterList(repartheurParams);
457 level.
Request(
"number of partitions", repartheurFactory.get());
458 repartheurFactory->Build(level);
459 numProcsAH = level.
Get<
int>(
"number of partitions", repartheurFactory.get());
460 numProcsAH = std::min(numProcsAH,numProcs);
470 level.
Set(
"Map",D0_Matrix_->getDomainMap());
473 ParameterList repartheurParams;
474 repartheurParams.set(
"repartition: start level", 0);
475 repartheurParams.set(
"repartition: use map",
true);
477 int defaultTargetRows = 10000;
478 repartheurParams.set(
"repartition: min rows per proc", precList22_.get<
int>(
"repartition: target rows per proc", defaultTargetRows));
479 repartheurParams.set(
"repartition: target rows per proc", precList22_.get<
int>(
"repartition: target rows per proc", defaultTargetRows));
480 repartheurParams.set(
"repartition: min rows per thread", precList22_.get<
int>(
"repartition: target rows per thread", defaultTargetRows));
481 repartheurParams.set(
"repartition: target rows per thread", precList22_.get<
int>(
"repartition: target rows per thread", defaultTargetRows));
483 repartheurFactory->SetParameterList(repartheurParams);
485 level.
Request(
"number of partitions", repartheurFactory.get());
486 repartheurFactory->Build(level);
487 numProcsA22 = level.
Get<
int>(
"number of partitions", repartheurFactory.get());
488 numProcsA22 = std::min(numProcsA22,numProcs);
491 if (rebalanceStriding >= 1) {
492 TEUCHOS_ASSERT(rebalanceStriding*numProcsAH<=numProcs);
493 TEUCHOS_ASSERT(rebalanceStriding*numProcsA22<=numProcs);
494 if (rebalanceStriding*(numProcsAH+numProcsA22)>numProcs) {
495 GetOStream(
Warnings0) <<
"RefMaxwell::compute(): Disabling striding = " << rebalanceStriding <<
", since AH needs " << numProcsAH
496 <<
" procs and A22 needs " << numProcsA22 <<
" procs."<< std::endl;
497 rebalanceStriding = -1;
505 RCP<Teuchos::TimeMonitor> tm = getTimer(
"MueLu RefMaxwell: Rebalance AH");
507 Level fineLevel, coarseLevel;
513 coarseLevel.
Set(
"A",AH_);
514 coarseLevel.
Set(
"P",P11_);
515 coarseLevel.
Set(
"Coordinates",CoordsH_);
516 if (!NullspaceH_.is_null())
517 coarseLevel.
Set(
"Nullspace",NullspaceH_);
518 coarseLevel.
Set(
"number of partitions", numProcsAH);
519 coarseLevel.
Set(
"repartition: heuristic target rows per process", 1000);
521 coarseLevel.
setlib(AH_->getDomainMap()->lib());
522 fineLevel.
setlib(AH_->getDomainMap()->lib());
523 coarseLevel.setObjectLabel(
"RefMaxwell coarse (1,1)");
524 fineLevel.setObjectLabel(
"RefMaxwell coarse (1,1)");
526 std::string partName = precList11_.get<std::string>(
"repartition: partitioner",
"zoltan2");
527 RCP<Factory> partitioner;
528 if (partName ==
"zoltan") {
529#ifdef HAVE_MUELU_ZOLTAN
536 }
else if (partName ==
"zoltan2") {
537#ifdef HAVE_MUELU_ZOLTAN2
539 ParameterList partParams;
540 RCP<const ParameterList> partpartParams = rcp(
new ParameterList(precList11_.sublist(
"repartition: params",
false)));
541 partParams.set(
"ParameterList", partpartParams);
542 partitioner->SetParameterList(partParams);
550 ParameterList repartParams;
551 repartParams.set(
"repartition: print partition distribution", precList11_.get<
bool>(
"repartition: print partition distribution",
false));
552 repartParams.set(
"repartition: remap parts", precList11_.get<
bool>(
"repartition: remap parts",
true));
553 if (rebalanceStriding >= 1) {
554 bool acceptPart = (SM_Matrix_->getDomainMap()->getComm()->getRank() % rebalanceStriding) == 0;
555 if (SM_Matrix_->getDomainMap()->getComm()->getRank() >= numProcsAH*rebalanceStriding)
557 repartParams.set(
"repartition: remap accept partition", acceptPart);
559 repartFactory->SetParameterList(repartParams);
561 repartFactory->SetFactory(
"Partition", partitioner);
564 ParameterList newPparams;
565 newPparams.set(
"type",
"Interpolation");
566 newPparams.set(
"repartition: rebalance P and R", precList11_.get<
bool>(
"repartition: rebalance P and R",
false));
567 newPparams.set(
"repartition: use subcommunicators",
true);
568 newPparams.set(
"repartition: rebalance Nullspace", !NullspaceH_.is_null());
570 if (!NullspaceH_.is_null())
572 newP->SetParameterList(newPparams);
573 newP->SetFactory(
"Importer", repartFactory);
576 ParameterList rebAcParams;
577 rebAcParams.set(
"repartition: use subcommunicators",
true);
578 newA->SetParameterList(rebAcParams);
579 newA->SetFactory(
"Importer", repartFactory);
581 coarseLevel.
Request(
"P", newP.get());
582 coarseLevel.
Request(
"Importer", repartFactory.get());
583 coarseLevel.
Request(
"A", newA.get());
584 coarseLevel.
Request(
"Coordinates", newP.get());
585 if (!NullspaceH_.is_null())
586 coarseLevel.
Request(
"Nullspace", newP.get());
587 repartFactory->Build(coarseLevel);
589 if (!precList11_.get<
bool>(
"repartition: rebalance P and R",
false))
590 ImporterH_ = coarseLevel.
Get< RCP<const Import> >(
"Importer", repartFactory.get());
591 P11_ = coarseLevel.
Get< RCP<Matrix> >(
"P", newP.get());
592 AH_ = coarseLevel.
Get< RCP<Matrix> >(
"A", newA.get());
593 CoordsH_ = coarseLevel.
Get< RCP<RealValuedMultiVector> >(
"Coordinates", newP.get());
594 if (!NullspaceH_.is_null())
595 NullspaceH_ = coarseLevel.
Get< RCP<MultiVector> >(
"Nullspace", newP.get());
597 AH_AP_reuse_data_ = Teuchos::null;
598 AH_RAP_reuse_data_ = Teuchos::null;
600 if (!disable_addon_ && enable_reuse_) {
602 RCP<const Import> ImporterH = coarseLevel.
Get< RCP<const Import> >(
"Importer", repartFactory.get());
603 RCP<const Map> targetMap = ImporterH->getTargetMap();
604 ParameterList XpetraList;
605 XpetraList.set(
"Restrict Communicator",
true);
606 Addon_Matrix_ = MatrixFactory::Build(Addon_Matrix_, *ImporterH, *ImporterH, targetMap, targetMap, rcp(&XpetraList,
false));
611#ifdef HAVE_MUELU_KOKKOS_REFACTOR
615 if (useKokkos_ && precList11_.isParameter(
"aggregation: drop tol") && precList11_.get<
double>(
"aggregation: drop tol") != 0.0) {
616 GetOStream(
Warnings0) <<
"RefMaxwell::compute(): Setting \"aggregation: drop tol\". to 0.0, since CoalesceDropFactory_kokkos does not "
617 <<
"support BlockSize > 1 and drop tol != 0.0" << std::endl;
618 precList11_.set(
"aggregation: drop tol", 0.0);
621 dump(*P11_,
"P11.m");
623 if (!implicitTranspose_) {
624#ifdef HAVE_MUELU_KOKKOS_REFACTOR
626 R11_ = Utilities_kokkos::Transpose(*P11_);
632 dump(*R11_,
"R11.m");
637 if (!AH_.is_null()) {
639 size_t dim = Nullspace_->getNumVectors();
640 AH_->SetFixedBlockSize(dim);
641 AH_->setObjectLabel(
"RefMaxwell coarse (1,1)");
643 int oldRank = SetProcRankVerbose(AH_->getDomainMap()->getComm()->getRank());
645 RCP<ParameterList> params = rcp(
new ParameterList());;
646 params->set(
"printLoadBalancingInfo",
true);
647 params->set(
"printCommInfo",
true);
650#if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
651 if (precList11_.isType<std::string>(
"Preconditioner Type")) {
653 if (precList11_.get<std::string>(
"Preconditioner Type") ==
"MueLu") {
654 ParameterList& userParamList = precList11_.sublist(
"Preconditioner Types").sublist(
"MueLu").sublist(
"user data");
655 userParamList.set<RCP<RealValuedMultiVector> >(
"Coordinates", CoordsH_);
664 dumpCoords(*CoordsH_,
"coordsH.m");
665 if (!NullspaceH_.is_null())
666 dump(*NullspaceH_,
"NullspaceH.m");
667 ParameterList& userParamList = precList11_.sublist(
"user data");
668 userParamList.set<RCP<RealValuedMultiVector> >(
"Coordinates", CoordsH_);
669 if (!NullspaceH_.is_null())
670 userParamList.set<RCP<MultiVector> >(
"Nullspace", NullspaceH_);
673 RCP<MueLu::Level> level0 = HierarchyH_->GetLevel(0);
674 level0->Set(
"A", AH_);
675 HierarchyH_->SetupRe();
678 SetProcRankVerbose(oldRank);
684 if(!reuse && applyBCsTo22_) {
685 GetOStream(
Runtime0) <<
"RefMaxwell::compute(): nuking BC nodes of D0" << std::endl;
687 D0_Matrix_->resumeFill();
689 if (D0_Matrix_->getRowMap()->lib() == Xpetra::UseEpetra)
690 replaceWith= Teuchos::ScalarTraits<SC>::eps();
692 replaceWith = Teuchos::ScalarTraits<SC>::zero();
693#ifdef HAVE_MUELU_KOKKOS_REFACTOR
695 Utilities_kokkos::ZeroDirichletCols(D0_Matrix_,BCcolsKokkos_,replaceWith);
702 D0_Matrix_->fillComplete(D0_Matrix_->getDomainMap(),D0_Matrix_->getRangeMap());
707 if (!allNodesBoundary_) {
708 GetOStream(
Runtime0) <<
"RefMaxwell::compute(): building MG for (2,2)-block" << std::endl;
711 RCP<Teuchos::TimeMonitor> tm = getTimer(
"MueLu RefMaxwell: Build A22");
713 Level fineLevel, coarseLevel;
719 fineLevel.
Set(
"A",SM_Matrix_);
720 coarseLevel.
Set(
"P",D0_Matrix_);
721 coarseLevel.
Set(
"Coordinates",Coords_);
723 coarseLevel.
setlib(SM_Matrix_->getDomainMap()->lib());
724 fineLevel.
setlib(SM_Matrix_->getDomainMap()->lib());
725 coarseLevel.setObjectLabel(
"RefMaxwell (2,2)");
726 fineLevel.setObjectLabel(
"RefMaxwell (2,2)");
728 RCP<RAPFactory> rapFact = rcp(
new RAPFactory());
729 ParameterList rapList = *(rapFact->GetValidParameterList());
730 rapList.set(
"transpose: use implicit",
true);
731 rapList.set(
"rap: fix zero diagonals", parameterList_.get<
bool>(
"rap: fix zero diagonals",
true));
732 rapList.set(
"rap: fix zero diagonals threshold", parameterList_.get<
double>(
"rap: fix zero diagonals threshold", Teuchos::ScalarTraits<double>::eps()));
733 rapList.set(
"rap: triple product", parameterList_.get<
bool>(
"rap: triple product",
false));
734 rapFact->SetParameterList(rapList);
736 if (!A22_AP_reuse_data_.is_null()) {
737 coarseLevel.
AddKeepFlag(
"AP reuse data", rapFact.get());
738 coarseLevel.
Set<Teuchos::RCP<Teuchos::ParameterList> >(
"AP reuse data", A22_AP_reuse_data_, rapFact.get());
740 if (!A22_RAP_reuse_data_.is_null()) {
741 coarseLevel.
AddKeepFlag(
"RAP reuse data", rapFact.get());
742 coarseLevel.
Set<Teuchos::RCP<Teuchos::ParameterList> >(
"RAP reuse data", A22_RAP_reuse_data_, rapFact.get());
748 coarseLevel.
Set(
"number of partitions", numProcsA22);
749 coarseLevel.
Set(
"repartition: heuristic target rows per process", 1000);
751 std::string partName = precList22_.get<std::string>(
"repartition: partitioner",
"zoltan2");
752 RCP<Factory> partitioner;
753 if (partName ==
"zoltan") {
754#ifdef HAVE_MUELU_ZOLTAN
756 partitioner->SetFactory(
"A", rapFact);
762 }
else if (partName ==
"zoltan2") {
763#ifdef HAVE_MUELU_ZOLTAN2
765 ParameterList partParams;
766 RCP<const ParameterList> partpartParams = rcp(
new ParameterList(precList22_.sublist(
"repartition: params",
false)));
767 partParams.set(
"ParameterList", partpartParams);
768 partitioner->SetParameterList(partParams);
769 partitioner->SetFactory(
"A", rapFact);
777 ParameterList repartParams;
778 repartParams.set(
"repartition: print partition distribution", precList22_.get<
bool>(
"repartition: print partition distribution",
false));
779 repartParams.set(
"repartition: remap parts", precList22_.get<
bool>(
"repartition: remap parts",
true));
780 if (rebalanceStriding >= 1) {
781 bool acceptPart = ((SM_Matrix_->getDomainMap()->getComm()->getSize()-1-SM_Matrix_->getDomainMap()->getComm()->getRank()) % rebalanceStriding) == 0;
782 if (SM_Matrix_->getDomainMap()->getComm()->getSize()-1-SM_Matrix_->getDomainMap()->getComm()->getRank() >= numProcsA22*rebalanceStriding)
785 TEUCHOS_ASSERT(AH_.is_null());
786 repartParams.set(
"repartition: remap accept partition", acceptPart);
788 repartParams.set(
"repartition: remap accept partition", AH_.is_null());
789 repartFactory->SetParameterList(repartParams);
790 repartFactory->SetFactory(
"A", rapFact);
792 repartFactory->SetFactory(
"Partition", partitioner);
795 ParameterList newPparams;
796 newPparams.set(
"type",
"Interpolation");
797 newPparams.set(
"repartition: rebalance P and R", precList22_.get<
bool>(
"repartition: rebalance P and R",
false));
798 newPparams.set(
"repartition: use subcommunicators",
true);
799 newPparams.set(
"repartition: rebalance Nullspace",
false);
801 newP->SetParameterList(newPparams);
802 newP->SetFactory(
"Importer", repartFactory);
805 ParameterList rebAcParams;
806 rebAcParams.set(
"repartition: use subcommunicators",
true);
807 newA->SetParameterList(rebAcParams);
808 newA->SetFactory(
"A", rapFact);
809 newA->SetFactory(
"Importer", repartFactory);
811 coarseLevel.
Request(
"P", newP.get());
812 coarseLevel.
Request(
"Importer", repartFactory.get());
813 coarseLevel.
Request(
"A", newA.get());
814 coarseLevel.
Request(
"Coordinates", newP.get());
815 rapFact->Build(fineLevel,coarseLevel);
816 repartFactory->Build(coarseLevel);
818 if (!precList22_.get<
bool>(
"repartition: rebalance P and R",
false))
819 Importer22_ = coarseLevel.
Get< RCP<const Import> >(
"Importer", repartFactory.get());
820 D0_Matrix_ = coarseLevel.
Get< RCP<Matrix> >(
"P", newP.get());
821 A22_ = coarseLevel.
Get< RCP<Matrix> >(
"A", newA.get());
822 Coords_ = coarseLevel.
Get< RCP<RealValuedMultiVector> >(
"Coordinates", newP.get());
827 coarseLevel.
Request(
"A", rapFact.get());
829 coarseLevel.
Request(
"AP reuse data", rapFact.get());
830 coarseLevel.
Request(
"RAP reuse data", rapFact.get());
833 A22_ = coarseLevel.
Get< RCP<Matrix> >(
"A", rapFact.get());
836 if (!parameterList_.get<
bool>(
"rap: triple product",
false))
837 A22_AP_reuse_data_ = coarseLevel.
Get< RCP<ParameterList> >(
"AP reuse data", rapFact.get());
838 A22_RAP_reuse_data_ = coarseLevel.
Get< RCP<ParameterList> >(
"RAP reuse data", rapFact.get());
842 RCP<Teuchos::TimeMonitor> tm = getTimer(
"MueLu RefMaxwell: Build A22");
843 if (Importer22_.is_null()) {
845 temp = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*SM_Matrix_,
false,*D0_Matrix_,
false,temp,GetOStream(
Runtime0),
true,
true);
846 if (!implicitTranspose_)
847 A22_ = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_T_Matrix_,
false,*temp,
false,A22_,GetOStream(
Runtime0),
true,
true);
849 A22_ = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,
true,*temp,
false,A22_,GetOStream(
Runtime0),
true,
true);
852 RCP<const Import> D0importer = rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getCrsGraph()->getImporter();
853 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->replaceDomainMapAndImporter(D0origDomainMap_, D0origImporter_);
855 RCP<Matrix> temp, temp2;
856 temp = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*SM_Matrix_,
false,*D0_Matrix_,
false,temp,GetOStream(
Runtime0),
true,
true);
857 if (!implicitTranspose_)
858 temp2 = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_T_Matrix_,
false,*temp,
false,temp2,GetOStream(
Runtime0),
true,
true);
860 temp2 = Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,
true,*temp,
false,temp2,GetOStream(
Runtime0),
true,
true);
863 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->replaceDomainMapAndImporter(Importer22_->getTargetMap(), D0importer);
865 ParameterList XpetraList;
866 XpetraList.set(
"Restrict Communicator",
true);
867 XpetraList.set(
"Timer Label",
"MueLu::RebalanceA22");
868 RCP<const Map> targetMap = Importer22_->getTargetMap();
869 A22_ = MatrixFactory::Build(temp2, *Importer22_, *Importer22_, targetMap, targetMap, rcp(&XpetraList,
false));
873 if (!implicitTranspose_ && !reuse) {
874#ifdef HAVE_MUELU_KOKKOS_REFACTOR
876 D0_T_Matrix_ = Utilities_kokkos::Transpose(*D0_Matrix_);
885 if (!A22_.is_null()) {
886 dump(*A22_,
"A22.m");
887 A22_->setObjectLabel(
"RefMaxwell (2,2)");
888 int oldRank = SetProcRankVerbose(A22_->getDomainMap()->getComm()->getRank());
890 RCP<ParameterList> params = rcp(
new ParameterList());;
891 params->set(
"printLoadBalancingInfo",
true);
892 params->set(
"printCommInfo",
true);
895#if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
896 if (precList22_.isType<std::string>(
"Preconditioner Type")) {
898 if (precList22_.get<std::string>(
"Preconditioner Type") ==
"MueLu") {
899 ParameterList& userParamList = precList22_.sublist(
"Preconditioner Types").sublist(
"MueLu").sublist(
"user data");
900 userParamList.set<RCP<RealValuedMultiVector> >(
"Coordinates", Coords_);
908 ParameterList& userParamList = precList22_.sublist(
"user data");
909 userParamList.set<RCP<RealValuedMultiVector> >(
"Coordinates", Coords_);
912 std::string coarseType =
"";
913 if (precList22_.isParameter(
"coarse: type")) {
914 coarseType = precList22_.get<std::string>(
"coarse: type");
916 std::transform(coarseType.begin(), coarseType.end(), coarseType.begin(), ::tolower);
917 std::transform(coarseType.begin(), ++coarseType.begin(), coarseType.begin(), ::toupper);
921 coarseType ==
"Klu" ||
922 coarseType ==
"Klu2") &&
923 (!precList22_.isSublist(
"coarse: params") ||
924 !precList22_.sublist(
"coarse: params").isParameter(
"fix nullspace")))
925 precList22_.sublist(
"coarse: params").set(
"fix nullspace",
true);
928 RCP<MueLu::Level> level0 = Hierarchy22_->GetLevel(0);
929 level0->Set(
"A", A22_);
930 Hierarchy22_->SetupRe();
933 SetProcRankVerbose(oldRank);
939 if(!reuse && !allNodesBoundary_ && applyBCsTo22_) {
940 GetOStream(
Runtime0) <<
"RefMaxwell::compute(): nuking BC edges of D0" << std::endl;
942 D0_Matrix_->resumeFill();
944 if (D0_Matrix_->getRowMap()->lib() == Xpetra::UseEpetra)
945 replaceWith= Teuchos::ScalarTraits<SC>::eps();
947 replaceWith = Teuchos::ScalarTraits<SC>::zero();
948#ifdef HAVE_MUELU_KOKKOS_REFACTOR
950 Utilities_kokkos::ZeroDirichletRows(D0_Matrix_,BCrowsKokkos_,replaceWith);
957 D0_Matrix_->fillComplete(D0_Matrix_->getDomainMap(),D0_Matrix_->getRangeMap());
958 dump(*D0_Matrix_,
"D0_nuked.m");
961 setFineLevelSmoother();
964 if (!ImporterH_.is_null()) {
965 RCP<const Import> ImporterP11 = ImportFactory::Build(ImporterH_->getTargetMap(),P11_->getColMap());
966 rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix()->replaceDomainMapAndImporter(ImporterH_->getTargetMap(), ImporterP11);
969 if (!Importer22_.is_null()) {
971 D0origDomainMap_ = D0_Matrix_->getDomainMap();
972 D0origImporter_ = rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getCrsGraph()->getImporter();
974 RCP<const Import> ImporterD0 = ImportFactory::Build(Importer22_->getTargetMap(),D0_Matrix_->getColMap());
975 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->replaceDomainMapAndImporter(Importer22_->getTargetMap(), ImporterD0);
978#ifdef HAVE_MUELU_TPETRA
979 if ((!D0_T_Matrix_.is_null()) &&
981 (!rcp_dynamic_cast<CrsMatrixWrap>(D0_T_Matrix_)->getCrsMatrix()->getCrsGraph()->getImporter().is_null()) &&
982 (!rcp_dynamic_cast<CrsMatrixWrap>(R11_)->getCrsMatrix()->getCrsGraph()->getImporter().is_null()) &&
983 (D0_T_Matrix_->getColMap()->lib() == Xpetra::UseTpetra) &&
984 (R11_->getColMap()->lib() == Xpetra::UseTpetra))
985 D0_T_R11_colMapsMatch_ = D0_T_Matrix_->getColMap()->isSameAs(*R11_->getColMap());
988 D0_T_R11_colMapsMatch_ =
false;
989 if (D0_T_R11_colMapsMatch_)
990 GetOStream(
Runtime0) <<
"RefMaxwell::compute(): D0_T and R11 have matching colMaps" << std::endl;
995 if (parameterList_.isSublist(
"matvec params"))
997 RCP<ParameterList> matvecParams = rcpFromRef(parameterList_.sublist(
"matvec params"));
1002 if (!ImporterH_.is_null()) ImporterH_->setDistributorParameters(matvecParams);
1003 if (!Importer22_.is_null()) Importer22_->setDistributorParameters(matvecParams);
1005 if (!ImporterH_.is_null() && parameterList_.isSublist(
"refmaxwell: ImporterH params")){
1006 RCP<ParameterList> importerParams = rcpFromRef(parameterList_.sublist(
"refmaxwell: ImporterH params"));
1007 ImporterH_->setDistributorParameters(importerParams);
1009 if (!Importer22_.is_null() && parameterList_.isSublist(
"refmaxwell: Importer22 params")){
1010 RCP<ParameterList> importerParams = rcpFromRef(parameterList_.sublist(
"refmaxwell: Importer22 params"));
1011 Importer22_->setDistributorParameters(importerParams);
1017#ifdef HAVE_MUELU_CUDA
1018 if (parameterList_.get<
bool>(
"refmaxwell: cuda profile setup",
false)) cudaProfilerStop();
1025 if (parameterList_.isType<std::string>(
"smoother: type") &&
1026 parameterList_.get<std::string>(
"smoother: type") ==
"hiptmair" &&
1027 SM_Matrix_->getDomainMap()->lib() == Xpetra::UseTpetra &&
1028 A22_->getDomainMap()->lib() == Xpetra::UseTpetra &&
1029 D0_Matrix_->getDomainMap()->lib() == Xpetra::UseTpetra) {
1030#if defined(MUELU_REFMAXWELL_CAN_USE_HIPTMAIR)
1031 ParameterList hiptmairPreList, hiptmairPostList, smootherPreList, smootherPostList;
1033 if (smootherList_.isSublist(
"smoother: pre params"))
1034 smootherPreList = smootherList_.sublist(
"smoother: pre params");
1035 else if (smootherList_.isSublist(
"smoother: params"))
1036 smootherPreList = smootherList_.sublist(
"smoother: params");
1037 hiptmairPreList.set(
"hiptmair: smoother type 1",
1038 smootherPreList.get<std::string>(
"hiptmair: smoother type 1",
"CHEBYSHEV"));
1039 hiptmairPreList.set(
"hiptmair: smoother type 2",
1040 smootherPreList.get<std::string>(
"hiptmair: smoother type 2",
"CHEBYSHEV"));
1041 if(smootherPreList.isSublist(
"hiptmair: smoother list 1"))
1042 hiptmairPreList.set(
"hiptmair: smoother list 1", smootherPreList.sublist(
"hiptmair: smoother list 1"));
1043 if(smootherPreList.isSublist(
"hiptmair: smoother list 2"))
1044 hiptmairPreList.set(
"hiptmair: smoother list 2", smootherPreList.sublist(
"hiptmair: smoother list 2"));
1045 hiptmairPreList.set(
"hiptmair: pre or post",
1046 smootherPreList.get<std::string>(
"hiptmair: pre or post",
"pre"));
1047 hiptmairPreList.set(
"hiptmair: zero starting solution",
1048 smootherPreList.get<
bool>(
"hiptmair: zero starting solution",
true));
1050 if (smootherList_.isSublist(
"smoother: post params"))
1051 smootherPostList = smootherList_.sublist(
"smoother: post params");
1052 else if (smootherList_.isSublist(
"smoother: params"))
1053 smootherPostList = smootherList_.sublist(
"smoother: params");
1054 hiptmairPostList.set(
"hiptmair: smoother type 1",
1055 smootherPostList.get<std::string>(
"hiptmair: smoother type 1",
"CHEBYSHEV"));
1056 hiptmairPostList.set(
"hiptmair: smoother type 2",
1057 smootherPostList.get<std::string>(
"hiptmair: smoother type 2",
"CHEBYSHEV"));
1058 if(smootherPostList.isSublist(
"hiptmair: smoother list 1"))
1059 hiptmairPostList.set(
"hiptmair: smoother list 1", smootherPostList.sublist(
"hiptmair: smoother list 1"));
1060 if(smootherPostList.isSublist(
"hiptmair: smoother list 2"))
1061 hiptmairPostList.set(
"hiptmair: smoother list 2", smootherPostList.sublist(
"hiptmair: smoother list 2"));
1062 hiptmairPostList.set(
"hiptmair: pre or post",
1063 smootherPostList.get<std::string>(
"hiptmair: pre or post",
"post"));
1064 hiptmairPostList.set(
"hiptmair: zero starting solution",
1065 smootherPostList.get<
bool>(
"hiptmair: zero starting solution",
false));
1067 typedef Tpetra::RowMatrix<SC, LO, GO, NO> TROW;
1073 hiptmairPreSmoother_ -> setParameters(hiptmairPreList);
1074 hiptmairPreSmoother_ -> initialize();
1075 hiptmairPreSmoother_ -> compute();
1077 hiptmairPostSmoother_ -> setParameters(hiptmairPostList);
1078 hiptmairPostSmoother_ -> initialize();
1079 hiptmairPostSmoother_ -> compute();
1080 useHiptmairSmoothing_ =
true;
1082 throw(Xpetra::Exceptions::RuntimeError(
"MueLu must be compiled with Ifpack2 for Hiptmair smoothing."));
1087 RCP<MueLu::FactoryManagerBase> factoryHandler = rcp(
new FactoryManager());
1090 level.setObjectLabel(
"RefMaxwell (1,1)");
1091 level.
Set(
"A",SM_Matrix_);
1092 level.
setlib(SM_Matrix_->getDomainMap()->lib());
1094 if (parameterList_.isType<std::string>(
"smoother: pre type") && parameterList_.isType<std::string>(
"smoother: post type")) {
1095 std::string preSmootherType = parameterList_.get<std::string>(
"smoother: pre type");
1096 std::string postSmootherType = parameterList_.get<std::string>(
"smoother: post type");
1098 ParameterList preSmootherList, postSmootherList;
1099 if (parameterList_.isSublist(
"smoother: pre params"))
1100 preSmootherList = parameterList_.sublist(
"smoother: pre params");
1101 if (parameterList_.isSublist(
"smoother: post params"))
1102 postSmootherList = parameterList_.sublist(
"smoother: post params");
1104 RCP<SmootherPrototype> preSmootherPrototype = rcp(
new TrilinosSmoother(preSmootherType, preSmootherList));
1105 RCP<SmootherPrototype> postSmootherPrototype = rcp(
new TrilinosSmoother(postSmootherType, postSmootherList));
1106 RCP<SmootherFactory> smootherFact = rcp(
new SmootherFactory(preSmootherPrototype, postSmootherPrototype));
1108 level.
Request(
"PreSmoother",smootherFact.get());
1109 level.
Request(
"PostSmoother",smootherFact.get());
1110 if (enable_reuse_) {
1111 ParameterList smootherFactoryParams;
1112 smootherFactoryParams.set(
"keep smoother data",
true);
1113 smootherFact->SetParameterList(smootherFactoryParams);
1114 level.
Request(
"PreSmoother data", smootherFact.get());
1115 level.
Request(
"PostSmoother data", smootherFact.get());
1116 if (!PreSmootherData_.is_null())
1117 level.
Set(
"PreSmoother data", PreSmootherData_, smootherFact.get());
1118 if (!PostSmootherData_.is_null())
1119 level.
Set(
"PostSmoother data", PostSmootherData_, smootherFact.get());
1121 smootherFact->Build(level);
1122 PreSmoother_ = level.
Get<RCP<SmootherBase> >(
"PreSmoother",smootherFact.get());
1123 PostSmoother_ = level.
Get<RCP<SmootherBase> >(
"PostSmoother",smootherFact.get());
1124 if (enable_reuse_) {
1125 PreSmootherData_ = level.
Get<RCP<SmootherPrototype> >(
"PreSmoother data",smootherFact.get());
1126 PostSmootherData_ = level.
Get<RCP<SmootherPrototype> >(
"PostSmoother data",smootherFact.get());
1129 std::string smootherType = parameterList_.get<std::string>(
"smoother: type",
"CHEBYSHEV");
1131 RCP<SmootherPrototype> smootherPrototype = rcp(
new TrilinosSmoother(smootherType, smootherList_));
1132 RCP<SmootherFactory> smootherFact = rcp(
new SmootherFactory(smootherPrototype));
1133 level.
Request(
"PreSmoother",smootherFact.get());
1134 if (enable_reuse_) {
1135 ParameterList smootherFactoryParams;
1136 smootherFactoryParams.set(
"keep smoother data",
true);
1137 smootherFact->SetParameterList(smootherFactoryParams);
1138 level.
Request(
"PreSmoother data", smootherFact.get());
1139 if (!PreSmootherData_.is_null())
1140 level.
Set(
"PreSmoother data", PreSmootherData_, smootherFact.get());
1142 smootherFact->Build(level);
1143 PreSmoother_ = level.
Get<RCP<SmootherBase> >(
"PreSmoother",smootherFact.get());
1144 PostSmoother_ = PreSmoother_;
1146 PreSmootherData_ = level.
Get<RCP<SmootherPrototype> >(
"PreSmoother data",smootherFact.get());
1148 useHiptmairSmoothing_ =
false;
1295 const SC SC_ZERO = Teuchos::ScalarTraits<SC>::zero();
1296 const SC SC_ONE = Teuchos::ScalarTraits<SC>::one();
1297 const Scalar half = SC_ONE / (SC_ONE + SC_ONE);
1298 size_t dim = Nullspace_->getNumVectors();
1299 size_t numLocalRows = SM_Matrix_->getNodeNumRows();
1301 RCP<Matrix> P_nodal;
1302 RCP<CrsMatrix> P_nodal_imported;
1303 RCP<MultiVector> Nullspace_nodal;
1304 if (skipFirstLevel_) {
1307 bool read_P_from_file = parameterList_.get(
"refmaxwell: read_P_from_file",
false);
1308 if (read_P_from_file) {
1311 std::string P_filename = parameterList_.get(
"refmaxwell: P_filename",std::string(
"P.m"));
1312 std::string domainmap_filename = parameterList_.get(
"refmaxwell: P_domainmap_filename",std::string(
"domainmap_P.m"));
1313 std::string colmap_filename = parameterList_.get(
"refmaxwell: P_colmap_filename",std::string(
"colmap_P.m"));
1314 std::string coords_filename = parameterList_.get(
"refmaxwell: CoordsH",std::string(
"coordsH.m"));
1315 RCP<const Map> colmap = Xpetra::IO<SC, LO, GO, NO>::ReadMap(colmap_filename, A_nodal_Matrix_->getDomainMap()->lib(),A_nodal_Matrix_->getDomainMap()->getComm());
1316 RCP<const Map> domainmap = Xpetra::IO<SC, LO, GO, NO>::ReadMap(domainmap_filename, A_nodal_Matrix_->getDomainMap()->lib(),A_nodal_Matrix_->getDomainMap()->getComm());
1317 P_nodal = Xpetra::IO<SC, LO, GO, NO>::Read(P_filename, A_nodal_Matrix_->getDomainMap(), colmap, domainmap, A_nodal_Matrix_->getDomainMap());
1318 CoordsH_ = Xpetra::IO<typename RealValuedMultiVector::scalar_type, LO, GO, NO>::ReadMultiVector(coords_filename, domainmap);
1320 Level fineLevel, coarseLevel;
1326 fineLevel.
Set(
"A",A_nodal_Matrix_);
1327 fineLevel.
Set(
"Coordinates",Coords_);
1328 fineLevel.
Set(
"DofsPerNode",1);
1329 coarseLevel.
setlib(A_nodal_Matrix_->getDomainMap()->lib());
1330 fineLevel.
setlib(A_nodal_Matrix_->getDomainMap()->lib());
1331 coarseLevel.setObjectLabel(
"RefMaxwell (1,1) A_nodal");
1332 fineLevel.setObjectLabel(
"RefMaxwell (1,1) A_nodal");
1335 RCP<MultiVector> nullSpace = MultiVectorFactory::Build(A_nodal_Matrix_->getRowMap(),NSdim);
1336 nullSpace->putScalar(SC_ONE);
1337 fineLevel.
Set(
"Nullspace",nullSpace);
1339 RCP<Factory> amalgFact, dropFact, UncoupledAggFact, coarseMapFact, TentativePFact, Tfact, SaPFact;
1340#ifdef HAVE_MUELU_KOKKOS_REFACTOR
1342 amalgFact = rcp(
new AmalgamationFactory_kokkos());
1343 dropFact = rcp(
new CoalesceDropFactory_kokkos());
1344 UncoupledAggFact = rcp(
new UncoupledAggregationFactory_kokkos());
1345 coarseMapFact = rcp(
new CoarseMapFactory_kokkos());
1346 TentativePFact = rcp(
new TentativePFactory_kokkos());
1347 if (parameterList_.get(
"multigrid algorithm",
"unsmoothed") ==
"sa")
1348 SaPFact = rcp(
new SaPFactory_kokkos());
1349 Tfact = rcp(
new CoordinatesTransferFactory_kokkos());
1358 if (parameterList_.get(
"multigrid algorithm",
"unsmoothed") ==
"sa")
1362 dropFact->SetFactory(
"UnAmalgamationInfo", amalgFact);
1363 double dropTol = parameterList_.get(
"aggregation: drop tol",0.0);
1364 std::string dropScheme = parameterList_.get(
"aggregation: drop scheme",
"classical");
1365 std::string distLaplAlgo = parameterList_.get(
"aggregation: distance laplacian algo",
"default");
1366 dropFact->SetParameter(
"aggregation: drop tol",Teuchos::ParameterEntry(dropTol));
1367 dropFact->SetParameter(
"aggregation: drop scheme",Teuchos::ParameterEntry(dropScheme));
1369 dropFact->SetParameter(
"aggregation: distance laplacian algo",Teuchos::ParameterEntry(distLaplAlgo));
1371 UncoupledAggFact->SetFactory(
"Graph", dropFact);
1372 int minAggSize = parameterList_.get(
"aggregation: min agg size",2);
1373 UncoupledAggFact->SetParameter(
"aggregation: min agg size",Teuchos::ParameterEntry(minAggSize));
1374 int maxAggSize = parameterList_.get(
"aggregation: max agg size",-1);
1375 UncoupledAggFact->SetParameter(
"aggregation: max agg size",Teuchos::ParameterEntry(maxAggSize));
1377 coarseMapFact->SetFactory(
"Aggregates", UncoupledAggFact);
1379 TentativePFact->SetFactory(
"Aggregates", UncoupledAggFact);
1380 TentativePFact->SetFactory(
"UnAmalgamationInfo", amalgFact);
1381 TentativePFact->SetFactory(
"CoarseMap", coarseMapFact);
1383 Tfact->SetFactory(
"Aggregates", UncoupledAggFact);
1384 Tfact->SetFactory(
"CoarseMap", coarseMapFact);
1386 if (parameterList_.get(
"multigrid algorithm",
"unsmoothed") ==
"sa") {
1387 SaPFact->SetFactory(
"P", TentativePFact);
1388 coarseLevel.
Request(
"P", SaPFact.get());
1390 coarseLevel.
Request(
"P",TentativePFact.get());
1391 coarseLevel.
Request(
"Nullspace",TentativePFact.get());
1392 coarseLevel.
Request(
"Coordinates",Tfact.get());
1394 RCP<AggregationExportFactory> aggExport;
1395 if (parameterList_.get(
"aggregation: export visualization data",
false)) {
1397 ParameterList aggExportParams;
1398 aggExportParams.set(
"aggregation: output filename",
"aggs.vtk");
1399 aggExportParams.set(
"aggregation: output file: agg style",
"Jacks");
1400 aggExport->SetParameterList(aggExportParams);
1402 aggExport->SetFactory(
"Aggregates", UncoupledAggFact);
1403 aggExport->SetFactory(
"UnAmalgamationInfo", amalgFact);
1404 fineLevel.
Request(
"Aggregates",UncoupledAggFact.get());
1405 fineLevel.
Request(
"UnAmalgamationInfo",amalgFact.get());
1408 if (parameterList_.get(
"multigrid algorithm",
"unsmoothed") ==
"sa")
1409 coarseLevel.
Get(
"P",P_nodal,SaPFact.get());
1411 coarseLevel.
Get(
"P",P_nodal,TentativePFact.get());
1412 coarseLevel.
Get(
"Nullspace",Nullspace_nodal,TentativePFact.get());
1413 coarseLevel.
Get(
"Coordinates",CoordsH_,Tfact.get());
1416 if (parameterList_.get(
"aggregation: export visualization data",
false))
1417 aggExport->Build(fineLevel, coarseLevel);
1419 dump(*P_nodal,
"P_nodal.m");
1420 dump(*Nullspace_nodal,
"Nullspace_nodal.m");
1423 RCP<CrsMatrix> D0Crs = rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix();
1426 int numProcs = P_nodal->getDomainMap()->getComm()->getSize();
1428 RCP<CrsMatrixWrap> P_nodal_temp;
1429 RCP<const Map> targetMap = D0Crs->getColMap();
1430 P_nodal_temp = rcp(
new CrsMatrixWrap(targetMap));
1431 RCP<const Import> importer = D0Crs->getCrsGraph()->getImporter();
1432 P_nodal_temp->doImport(*P_nodal, *importer, Xpetra::INSERT);
1433 P_nodal_temp->fillComplete(rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix()->getDomainMap(),
1434 rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix()->getRangeMap());
1435 P_nodal_imported = P_nodal_temp->getCrsMatrix();
1436 dump(*P_nodal_temp,
"P_nodal_imported.m");
1438 P_nodal_imported = rcp_dynamic_cast<CrsMatrixWrap>(P_nodal)->getCrsMatrix();
1442#ifdef HAVE_MUELU_KOKKOS_REFACTOR
1445 using ATS = Kokkos::ArithTraits<SC>;
1446 using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
1449 typedef typename Matrix::local_matrix_type KCRS;
1450 typedef typename KCRS::device_type device_t;
1451 typedef typename KCRS::StaticCrsGraphType graph_t;
1452 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
1453 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
1454 typedef typename KCRS::values_type::non_const_type scalar_view_t;
1460 std::string defaultAlgo =
"mat-mat";
1461 std::string algo = parameterList_.get(
"refmaxwell: prolongator compute algorithm",defaultAlgo);
1463 if (skipFirstLevel_) {
1466 if (algo ==
"mat-mat") {
1467 RCP<Matrix> D0_P_nodal = MatrixFactory::Build(SM_Matrix_->getRowMap());
1468 Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,
false,*P_nodal,
false,*D0_P_nodal,
true,
true);
1470#ifdef HAVE_MUELU_DEBUG
1471 TEUCHOS_ASSERT(D0_P_nodal->getColMap()->isSameAs(*P_nodal_imported->getColMap()));
1475 auto localD0P = D0_P_nodal->getLocalMatrixDevice();
1478 RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1479 RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1481 size_t nnzEstimate = dim*localD0P.graph.entries.size();
1482 lno_view_t P11rowptr(
"P11_rowptr", numLocalRows+1);
1483 lno_nnz_view_t P11colind(
"P11_colind",nnzEstimate);
1484 scalar_view_t P11vals(
"P11_vals",nnzEstimate);
1487 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_adjustRowptr", range_type(0,numLocalRows+1),
1488 KOKKOS_LAMBDA(
const size_t i) {
1489 P11rowptr(i) = dim*localD0P.graph.row_map(i);
1493 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_adjustColind", range_type(0,localD0P.graph.entries.size()),
1494 KOKKOS_LAMBDA(
const size_t jj) {
1495 for (
size_t k = 0; k < dim; k++) {
1496 P11colind(dim*jj+k) = dim*localD0P.graph.entries(jj)+k;
1497 P11vals(dim*jj+k) = SC_ZERO;
1501 auto localNullspace = Nullspace_->getDeviceLocalView(Xpetra::Access::ReadOnly);
1504 if (D0_Matrix_->getNodeMaxNumRowEntries()>2) {
1508 GetOStream(
Warnings0) <<
"RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1510 magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1512 auto localD0 = D0_Matrix_->getLocalMatrixDevice();
1513 auto localP = P_nodal_imported->getLocalMatrixDevice();
1514 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_enterValues_D0wZeros", range_type(0,numLocalRows),
1515 KOKKOS_LAMBDA(
const size_t i) {
1516 for (
size_t ll = localD0.graph.row_map(i); ll < localD0.graph.row_map(i+1); ll++) {
1517 LO l = localD0.graph.entries(ll);
1518 SC p = localD0.values(ll);
1519 if (impl_ATS::magnitude(p) < tol)
1521 for (
size_t jj = localP.graph.row_map(l); jj < localP.graph.row_map(l+1); jj++) {
1522 LO j = localP.graph.entries(jj);
1523 SC v = localP.values(jj);
1524 for (
size_t k = 0; k < dim; k++) {
1526 SC n = localNullspace(i,k);
1528 for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1529 if (P11colind(m) == jNew)
1531#if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP)
1532 TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1534 P11vals(m) += half * v * n;
1541 auto localD0 = D0_Matrix_->getLocalMatrixDevice();
1542 auto localP = P_nodal_imported->getLocalMatrixDevice();
1543 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_enterValues", range_type(0,numLocalRows),
1544 KOKKOS_LAMBDA(
const size_t i) {
1545 for (
size_t ll = localD0.graph.row_map(i); ll < localD0.graph.row_map(i+1); ll++) {
1546 LO l = localD0.graph.entries(ll);
1547 for (
size_t jj = localP.graph.row_map(l); jj < localP.graph.row_map(l+1); jj++) {
1548 LO j = localP.graph.entries(jj);
1549 SC v = localP.values(jj);
1550 for (
size_t k = 0; k < dim; k++) {
1552 SC n = localNullspace(i,k);
1554 for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1555 if (P11colind(m) == jNew)
1557#if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP)
1558 TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1560 P11vals(m) += half * v * n;
1567 P11_ = rcp(
new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1568 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1569 P11Crs->setAllValues(P11rowptr, P11colind, P11vals);
1570 P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1573 TEUCHOS_TEST_FOR_EXCEPTION(
false,std::invalid_argument,algo <<
" is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1575 NullspaceH_ = MultiVectorFactory::Build(P11_->getDomainMap(), dim);
1577 auto localNullspace_nodal = Nullspace_nodal->getDeviceLocalView(Xpetra::Access::ReadOnly);
1578 auto localNullspaceH = NullspaceH_->getDeviceLocalView(Xpetra::Access::ReadWrite);
1579 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_nullspace", range_type(0,Nullspace_nodal->getLocalLength()),
1580 KOKKOS_LAMBDA(
const size_t i) {
1581 Scalar val = localNullspace_nodal(i,0);
1582 for (
size_t j = 0; j < dim; j++)
1583 localNullspaceH(dim*i+j, j) = val;
1588 auto localD0 = D0_Matrix_->getLocalMatrixDevice();
1592 if (algo ==
"mat-mat") {
1595 RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(D0_Matrix_->getColMap(), dim);
1596 RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(D0_Matrix_->getDomainMap(), dim);
1598 size_t nnzEstimate = dim*localD0.graph.entries.size();
1599 lno_view_t P11rowptr(
"P11_rowptr", numLocalRows+1);
1600 lno_nnz_view_t P11colind(
"P11_colind",nnzEstimate);
1601 scalar_view_t P11vals(
"P11_vals",nnzEstimate);
1604 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_adjustRowptr", range_type(0,numLocalRows+1),
1605 KOKKOS_LAMBDA(
const size_t i) {
1606 P11rowptr(i) = dim*localD0.graph.row_map(i);
1610 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_adjustColind", range_type(0,localD0.graph.entries.size()),
1611 KOKKOS_LAMBDA(
const size_t jj) {
1612 for (
size_t k = 0; k < dim; k++) {
1613 P11colind(dim*jj+k) = dim*localD0.graph.entries(jj)+k;
1614 P11vals(dim*jj+k) = SC_ZERO;
1618 auto localNullspace = Nullspace_->getDeviceLocalView(Xpetra::Access::ReadOnly);
1621 if (D0_Matrix_->getNodeMaxNumRowEntries()>2) {
1625 GetOStream(
Warnings0) <<
"RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1627 magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1629 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_enterValues_D0wZeros", range_type(0,numLocalRows),
1630 KOKKOS_LAMBDA(
const size_t i) {
1631 for (
size_t jj = localD0.graph.row_map(i); jj < localD0.graph.row_map(i+1); jj++) {
1632 LO j = localD0.graph.entries(jj);
1633 SC p = localD0.values(jj);
1634 if (impl_ATS::magnitude(p) < tol)
1636 for (
size_t k = 0; k < dim; k++) {
1638 SC n = localNullspace(i,k);
1640 for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1641 if (P11colind(m) == jNew)
1643#if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP)
1644 TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1646 P11vals(m) += half * n;
1652 Kokkos::parallel_for(
"MueLu:RefMaxwell::buildProlongator_enterValues", range_type(0,numLocalRows),
1653 KOKKOS_LAMBDA(
const size_t i) {
1654 for (
size_t jj = localD0.graph.row_map(i); jj < localD0.graph.row_map(i+1); jj++) {
1655 LO j = localD0.graph.entries(jj);
1656 for (
size_t k = 0; k < dim; k++) {
1658 SC n = localNullspace(i,k);
1660 for (m = P11rowptr(i); m < P11rowptr(i+1); m++)
1661 if (P11colind(m) == jNew)
1663#if defined(HAVE_MUELU_DEBUG) && !defined(HAVE_MUELU_CUDA) && !defined(HAVE_MUELU_HIP)
1664 TEUCHOS_ASSERT_EQUALITY(P11colind(m),jNew);
1666 P11vals(m) += half * n;
1672 P11_ = rcp(
new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1673 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1674 P11Crs->setAllValues(P11rowptr, P11colind, P11vals);
1675 P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1677 TEUCHOS_TEST_FOR_EXCEPTION(
false,std::invalid_argument,algo <<
" is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1684 ArrayRCP<ArrayRCP<const SC> > nullspaceRCP(dim);
1685 ArrayRCP<ArrayView<const SC> > nullspace(dim);
1686 for(
size_t i=0; i<dim; i++) {
1687 nullspaceRCP[i] = Nullspace_->getData(i);
1688 nullspace[i] = nullspaceRCP[i]();
1692 ArrayRCP<size_t> P11rowptr_RCP;
1693 ArrayRCP<LO> P11colind_RCP;
1694 ArrayRCP<SC> P11vals_RCP;
1703 std::string defaultAlgo =
"mat-mat";
1704 std::string algo = parameterList_.get(
"refmaxwell: prolongator compute algorithm",defaultAlgo);
1706 if (skipFirstLevel_) {
1708 if (algo ==
"mat-mat") {
1709 RCP<Matrix> D0_P_nodal = MatrixFactory::Build(SM_Matrix_->getRowMap());
1710 Xpetra::MatrixMatrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Multiply(*D0_Matrix_,
false,*P_nodal,
false,*D0_P_nodal,
true,
true);
1713 ArrayRCP<const size_t> D0rowptr_RCP;
1714 ArrayRCP<const LO> D0colind_RCP;
1715 ArrayRCP<const SC> D0vals_RCP;
1716 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getAllValues(D0rowptr_RCP, D0colind_RCP, D0vals_RCP);
1720 ArrayView<const size_t> D0rowptr;
1721 ArrayView<const LO> D0colind;
1722 ArrayView<const SC> D0vals;
1723 D0rowptr = D0rowptr_RCP(); D0colind = D0colind_RCP(); D0vals = D0vals_RCP();
1726 ArrayRCP<const size_t> Prowptr_RCP;
1727 ArrayRCP<const LO> Pcolind_RCP;
1728 ArrayRCP<const SC> Pvals_RCP;
1729 P_nodal_imported->getAllValues(Prowptr_RCP, Pcolind_RCP, Pvals_RCP);
1730 ArrayView<const size_t> Prowptr;
1731 ArrayView<const LO> Pcolind;
1732 ArrayView<const SC> Pvals;
1733 Prowptr = Prowptr_RCP(); Pcolind = Pcolind_RCP(); Pvals = Pvals_RCP();
1736 ArrayRCP<const size_t> D0Prowptr_RCP;
1737 ArrayRCP<const LO> D0Pcolind_RCP;
1738 ArrayRCP<const SC> D0Pvals_RCP;
1739 rcp_dynamic_cast<CrsMatrixWrap>(D0_P_nodal)->getCrsMatrix()->getAllValues(D0Prowptr_RCP, D0Pcolind_RCP, D0Pvals_RCP);
1744 ArrayView<const size_t> D0Prowptr;
1745 ArrayView<const LO> D0Pcolind;
1746 D0Prowptr = D0Prowptr_RCP(); D0Pcolind = D0Pcolind_RCP();
1749 RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1750 RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1751 P11_ = rcp(
new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1752 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1753 size_t nnzEstimate = dim*D0Prowptr[numLocalRows];
1754 P11Crs->allocateAllValues(nnzEstimate, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1756 ArrayView<size_t> P11rowptr = P11rowptr_RCP();
1757 ArrayView<LO> P11colind = P11colind_RCP();
1758 ArrayView<SC> P11vals = P11vals_RCP();
1761 for (
size_t i = 0; i < numLocalRows+1; i++) {
1762 P11rowptr[i] = dim*D0Prowptr[i];
1766 for (
size_t jj = 0; jj < (size_t) D0Prowptr[numLocalRows]; jj++)
1767 for (
size_t k = 0; k < dim; k++) {
1768 P11colind[dim*jj+k] = dim*D0Pcolind[jj]+k;
1769 P11vals[dim*jj+k] = SC_ZERO;
1772 RCP<const Map> P_nodal_imported_colmap = P_nodal_imported->getColMap();
1773 RCP<const Map> D0_P_nodal_colmap = D0_P_nodal->getColMap();
1775 if (D0_Matrix_->getNodeMaxNumRowEntries()>2) {
1779 GetOStream(
Warnings0) <<
"RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1781 magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1782 for (
size_t i = 0; i < numLocalRows; i++) {
1783 for (
size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1784 LO l = D0colind[ll];
1786 if (Teuchos::ScalarTraits<Scalar>::magnitude(p) < tol)
1788 for (
size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1790 j = D0_P_nodal_colmap->getLocalElement(P_nodal_imported_colmap->getGlobalElement(j));
1792 for (
size_t k = 0; k < dim; k++) {
1794 SC n = nullspace[k][i];
1796 for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1797 if (P11colind[m] == jNew)
1799#ifdef HAVE_MUELU_DEBUG
1800 TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1802 P11vals[m] += half * v * n;
1809 for (
size_t i = 0; i < numLocalRows; i++) {
1810 for (
size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1811 LO l = D0colind[ll];
1812 for (
size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1814 j = D0_P_nodal_colmap->getLocalElement(P_nodal_imported_colmap->getGlobalElement(j));
1816 for (
size_t k = 0; k < dim; k++) {
1818 SC n = nullspace[k][i];
1820 for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
1821 if (P11colind[m] == jNew)
1823#ifdef HAVE_MUELU_DEBUG
1824 TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
1826 P11vals[m] += half * v * n;
1833 P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1834 P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1836 }
else if (algo ==
"gustavson") {
1837 ArrayRCP<const size_t> D0rowptr_RCP;
1838 ArrayRCP<const LO> D0colind_RCP;
1839 ArrayRCP<const SC> D0vals_RCP;
1840 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getAllValues(D0rowptr_RCP, D0colind_RCP, D0vals_RCP);
1844 ArrayView<const size_t> D0rowptr;
1845 ArrayView<const LO> D0colind;
1846 ArrayView<const SC> D0vals;
1847 D0rowptr = D0rowptr_RCP(); D0colind = D0colind_RCP(); D0vals = D0vals_RCP();
1850 ArrayRCP<const size_t> Prowptr_RCP;
1851 ArrayRCP<const LO> Pcolind_RCP;
1852 ArrayRCP<const SC> Pvals_RCP;
1853 P_nodal_imported->getAllValues(Prowptr_RCP, Pcolind_RCP, Pvals_RCP);
1854 ArrayView<const size_t> Prowptr;
1855 ArrayView<const LO> Pcolind;
1856 ArrayView<const SC> Pvals;
1857 Prowptr = Prowptr_RCP(); Pcolind = Pcolind_RCP(); Pvals = Pvals_RCP();
1859 LO maxP11col = dim * P_nodal_imported->getColMap()->getMaxLocalIndex();
1860 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1861 Array<size_t> P11_status(dim*maxP11col, ST_INVALID);
1863 size_t nnz_alloc = dim*D0vals_RCP.size();
1866 RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal_imported->getColMap(), dim);
1867 RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(P_nodal->getDomainMap(), dim);
1868 P11_ = rcp(
new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1869 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1870 P11Crs->allocateAllValues(nnz_alloc, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1872 ArrayView<size_t> P11rowptr = P11rowptr_RCP();
1873 ArrayView<LO> P11colind = P11colind_RCP();
1874 ArrayView<SC> P11vals = P11vals_RCP();
1877 if (D0_Matrix_->getNodeMaxNumRowEntries()>2) {
1881 GetOStream(
Warnings0) <<
"RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
1883 magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
1886 for (
size_t i = 0; i < numLocalRows; i++) {
1888 for (
size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1889 LO l = D0colind[ll];
1891 if (Teuchos::ScalarTraits<Scalar>::magnitude(p) < tol)
1893 for (
size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1896 for (
size_t k = 0; k < dim; k++) {
1898 SC n = nullspace[k][i];
1900 if (P11_status[jNew] == ST_INVALID || P11_status[jNew] < nnz_old) {
1901 P11_status[jNew] = nnz;
1902 P11colind[nnz] = jNew;
1903 P11vals[nnz] = half * v * n;
1908 P11vals[P11_status[jNew]] += half * v * n;
1917 P11rowptr[numLocalRows] = nnz;
1921 for (
size_t i = 0; i < numLocalRows; i++) {
1923 for (
size_t ll = D0rowptr[i]; ll < D0rowptr[i+1]; ll++) {
1924 LO l = D0colind[ll];
1925 for (
size_t jj = Prowptr[l]; jj < Prowptr[l+1]; jj++) {
1928 for (
size_t k = 0; k < dim; k++) {
1930 SC n = nullspace[k][i];
1932 if (P11_status[jNew] == ST_INVALID || P11_status[jNew] < nnz_old) {
1933 P11_status[jNew] = nnz;
1934 P11colind[nnz] = jNew;
1935 P11vals[nnz] = half * v * n;
1940 P11vals[P11_status[jNew]] += half * v * n;
1949 P11rowptr[numLocalRows] = nnz;
1952 if (blockDomainMap->lib() == Xpetra::UseTpetra) {
1956 P11vals_RCP.resize(nnz);
1957 P11colind_RCP.resize(nnz);
1960 P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
1961 P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
1963 TEUCHOS_TEST_FOR_EXCEPTION(
false,std::invalid_argument,algo <<
" is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
1965 NullspaceH_ = MultiVectorFactory::Build(P11_->getDomainMap(), dim);
1967 ArrayRCP<const Scalar> ns_rcp = Nullspace_nodal->getData(0);
1968 ArrayView<const Scalar> ns_view = ns_rcp();
1969 for (
size_t i = 0; i < Nullspace_nodal->getLocalLength(); i++) {
1971 for (
size_t j = 0; j < dim; j++)
1972 NullspaceH_->replaceLocalValue(dim*i+j, j, val);
1977 ArrayRCP<const size_t> D0rowptr_RCP;
1978 ArrayRCP<const LO> D0colind_RCP;
1979 ArrayRCP<const SC> D0vals_RCP;
1980 rcp_dynamic_cast<CrsMatrixWrap>(D0_Matrix_)->getCrsMatrix()->getAllValues(D0rowptr_RCP, D0colind_RCP, D0vals_RCP);
1984 ArrayView<const size_t> D0rowptr;
1985 ArrayView<const LO> D0colind;
1986 ArrayView<const SC> D0vals;
1987 D0rowptr = D0rowptr_RCP(); D0colind = D0colind_RCP(); D0vals = D0vals_RCP();
1990 if (algo ==
"mat-mat") {
1993 RCP<Map> blockColMap = Xpetra::MapFactory<LO,GO,NO>::Build(D0_Matrix_->getColMap(), dim);
1994 RCP<Map> blockDomainMap = Xpetra::MapFactory<LO,GO,NO>::Build(D0_Matrix_->getDomainMap(), dim);
1995 P11_ = rcp(
new CrsMatrixWrap(SM_Matrix_->getRowMap(), blockColMap, 0));
1996 RCP<CrsMatrix> P11Crs = rcp_dynamic_cast<CrsMatrixWrap>(P11_)->getCrsMatrix();
1997 size_t nnzEstimate = dim*D0rowptr[numLocalRows];
1998 P11Crs->allocateAllValues(nnzEstimate, P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
2000 ArrayView<size_t> P11rowptr = P11rowptr_RCP();
2001 ArrayView<LO> P11colind = P11colind_RCP();
2002 ArrayView<SC> P11vals = P11vals_RCP();
2005 for (
size_t i = 0; i < numLocalRows+1; i++) {
2006 P11rowptr[i] = dim*D0rowptr[i];
2010 for (
size_t jj = 0; jj < (size_t) D0rowptr[numLocalRows]; jj++)
2011 for (
size_t k = 0; k < dim; k++) {
2012 P11colind[dim*jj+k] = dim*D0colind[jj]+k;
2013 P11vals[dim*jj+k] = SC_ZERO;
2017 if (D0_Matrix_->getNodeMaxNumRowEntries()>2) {
2021 GetOStream(
Warnings0) <<
"RefMaxwell::buildProlongator(): D0 matrix has more than 2 entries per row. Taking inefficient code path." << std::endl;
2023 magnitudeType tol = Teuchos::ScalarTraits<magnitudeType>::eps();
2024 for (
size_t i = 0; i < numLocalRows; i++) {
2025 for (
size_t jj = D0rowptr[i]; jj < D0rowptr[i+1]; jj++) {
2026 LO j = D0colind[jj];
2028 if (Teuchos::ScalarTraits<Scalar>::magnitude(p) < tol)
2030 for (
size_t k = 0; k < dim; k++) {
2032 SC n = nullspace[k][i];
2034 for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
2035 if (P11colind[m] == jNew)
2037#ifdef HAVE_MUELU_DEBUG
2038 TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
2040 P11vals[m] += half * n;
2046 for (
size_t i = 0; i < numLocalRows; i++) {
2047 for (
size_t jj = D0rowptr[i]; jj < D0rowptr[i+1]; jj++) {
2048 LO j = D0colind[jj];
2050 for (
size_t k = 0; k < dim; k++) {
2052 SC n = nullspace[k][i];
2054 for (m = P11rowptr[i]; m < P11rowptr[i+1]; m++)
2055 if (P11colind[m] == jNew)
2057#ifdef HAVE_MUELU_DEBUG
2058 TEUCHOS_ASSERT_EQUALITY(P11colind[m],jNew);
2060 P11vals[m] += half * n;
2066 P11Crs->setAllValues(P11rowptr_RCP, P11colind_RCP, P11vals_RCP);
2067 P11Crs->expertStaticFillComplete(blockDomainMap, SM_Matrix_->getRangeMap());
2070 TEUCHOS_TEST_FOR_EXCEPTION(
false,std::invalid_argument,algo <<
" is not a valid option for \"refmaxwell: prolongator compute algorithm\"");
2229 Scalar one = Teuchos::ScalarTraits<Scalar>::one();
2233 RCP<Teuchos::TimeMonitor> tmRes = getTimer(
"MueLu RefMaxwell: residual calculation");
2239 if (implicitTranspose_) {
2241 RCP<Teuchos::TimeMonitor> tmRes = getTimer(
"MueLu RefMaxwell: restriction coarse (1,1) (implicit)");
2242 P11_->apply(*residual_,*P11res_,Teuchos::TRANS);
2244 if (!allNodesBoundary_) {
2245 RCP<Teuchos::TimeMonitor> tmD0 = getTimer(
"MueLu RefMaxwell: restriction (2,2) (implicit)");
2246 D0_Matrix_->apply(*residual_,*D0res_,Teuchos::TRANS);
2249#ifdef MUELU_HAVE_TPETRA
2250 if (D0_T_R11_colMapsMatch_) {
2253 RCP<Teuchos::TimeMonitor> tmD0 = getTimer(
"MueLu RefMaxwell: restrictions import");
2254 D0TR11Tmp_->doImport(*residual_, *rcp_dynamic_cast<CrsMatrixWrap>(R11_)->getCrsMatrix()->getCrsGraph()->getImporter(), Xpetra::INSERT);
2256 if (!allNodesBoundary_) {
2257 RCP<Teuchos::TimeMonitor> tmD0 = getTimer(
"MueLu RefMaxwell: restriction (2,2) (explicit)");
2258 rcp_dynamic_cast<TpetraCrsMatrix>(rcp_dynamic_cast<CrsMatrixWrap>(D0_T_Matrix_)->getCrsMatrix())->getTpetra_CrsMatrix()->localApply(toTpetra(*D0TR11Tmp_),toTpetra(*D0res_),Teuchos::NO_TRANS);
2261 RCP<Teuchos::TimeMonitor> tmP11 = getTimer(
"MueLu RefMaxwell: restriction coarse (1,1) (explicit)");
2262 rcp_dynamic_cast<TpetraCrsMatrix>(rcp_dynamic_cast<CrsMatrixWrap>(R11_)->getCrsMatrix())->getTpetra_CrsMatrix()->localApply(toTpetra(*D0TR11Tmp_),toTpetra(*P11res_),Teuchos::NO_TRANS);
2268 RCP<Teuchos::TimeMonitor> tmP11 = getTimer(
"MueLu RefMaxwell: restriction coarse (1,1) (explicit)");
2269 R11_->apply(*residual_,*P11res_,Teuchos::NO_TRANS);
2271 if (!allNodesBoundary_) {
2272 RCP<Teuchos::TimeMonitor> tmD0 = getTimer(
"MueLu RefMaxwell: restriction (2,2) (explicit)");
2273 D0_T_Matrix_->apply(*residual_,*D0res_,Teuchos::NO_TRANS);
2280 RCP<Teuchos::TimeMonitor> tmSubSolves = getTimer(
"MueLu RefMaxwell: subsolves");
2284 if (!ImporterH_.is_null() && !implicitTranspose_) {
2285 RCP<Teuchos::TimeMonitor> tmH = getTimer(
"MueLu RefMaxwell: import coarse (1,1)");
2286 P11resTmp_->beginImport(*P11res_, *ImporterH_, Xpetra::INSERT);
2288 if (!allNodesBoundary_ && !Importer22_.is_null() && !implicitTranspose_) {
2289 RCP<Teuchos::TimeMonitor> tm22 = getTimer(
"MueLu RefMaxwell: import (2,2)");
2290 D0resTmp_->beginImport(*D0res_, *Importer22_, Xpetra::INSERT);
2294 if (!AH_.is_null()) {
2295 if (!ImporterH_.is_null() && !implicitTranspose_)
2296 P11resTmp_->endImport(*P11res_, *ImporterH_, Xpetra::INSERT);
2298 RCP<Teuchos::TimeMonitor> tmH = getTimer(
"MueLu RefMaxwell: solve coarse (1,1)", AH_->getRowMap()->getComm());
2300#if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
2301 if (!thyraPrecH_.is_null()) {
2302 Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
2303 RCP<Thyra::MultiVectorBase<Scalar> > thyraP11x = Teuchos::rcp_const_cast<Thyra::MultiVectorBase<Scalar> >(Xpetra::ThyraUtils<Scalar,LocalOrdinal,GlobalOrdinal,Node>::toThyraMultiVector(P11xSubComm_));
2304 RCP<const Thyra::MultiVectorBase<Scalar> > thyraP11res = Xpetra::ThyraUtils<Scalar,LocalOrdinal,GlobalOrdinal,Node>::toThyraMultiVector(P11resSubComm_);
2305 thyraPrecH_->getUnspecifiedPrecOp()->apply(Thyra::NOTRANS, *thyraP11res, thyraP11x.ptr(), one, zero);
2306 RCP<MultiVector> thyXpP11x = Xpetra::ThyraUtils<Scalar,LocalOrdinal,GlobalOrdinal,Node>::toXpetra(thyraP11x, P11x_->getMap()->getComm());
2307 *P11xSubComm_ = *thyXpP11x;
2311 HierarchyH_->Iterate(*P11resSubComm_, *P11xSubComm_, numItersH_,
true);
2315 if (!A22_.is_null()) {
2316 if (!allNodesBoundary_ && !Importer22_.is_null() && !implicitTranspose_)
2317 D0resTmp_->endImport(*D0res_, *Importer22_, Xpetra::INSERT);
2319 RCP<Teuchos::TimeMonitor> tm22 = getTimer(
"MueLu RefMaxwell: solve (2,2)", A22_->getRowMap()->getComm());
2321#if defined(HAVE_MUELU_STRATIMIKOS) && defined(HAVE_MUELU_THYRA)
2322 if (!thyraPrec22_.is_null()) {
2323 Scalar zero = Teuchos::ScalarTraits<Scalar>::zero();
2324 RCP<Thyra::MultiVectorBase<Scalar> > thyraD0x = Teuchos::rcp_const_cast<Thyra::MultiVectorBase<Scalar> >(Xpetra::ThyraUtils<Scalar,LocalOrdinal,GlobalOrdinal,Node>::toThyraMultiVector(D0xSubComm_));
2325 RCP<const Thyra::MultiVectorBase<Scalar> > thyraD0res = Xpetra::ThyraUtils<Scalar,LocalOrdinal,GlobalOrdinal,Node>::toThyraMultiVector(D0resSubComm_);
2326 thyraPrec22_->getUnspecifiedPrecOp()->apply(Thyra::NOTRANS, *thyraD0res, thyraD0x.ptr(), one, zero);
2327 RCP<MultiVector> thyXpD0x = Xpetra::ThyraUtils<Scalar,LocalOrdinal,GlobalOrdinal,Node>::toXpetra(thyraD0x, D0x_->getMap()->getComm());
2328 *D0xSubComm_ = *thyXpD0x;
2332 Hierarchy22_->Iterate(*D0resSubComm_, *D0xSubComm_, numIters22_,
true);
2335 if (AH_.is_null() && !ImporterH_.is_null() && !implicitTranspose_)
2336 P11resTmp_->endImport(*P11res_, *ImporterH_, Xpetra::INSERT);
2337 if (A22_.is_null() && !allNodesBoundary_ && !Importer22_.is_null() && !implicitTranspose_)
2338 D0resTmp_->endImport(*D0res_, *Importer22_, Xpetra::INSERT);
2341 if (fuseProlongationAndUpdate_) {
2343 RCP<Teuchos::TimeMonitor> tmP11 = getTimer(
"MueLu RefMaxwell: prolongation coarse (1,1) (fused)");
2344 P11_->apply(*P11x_,X,Teuchos::NO_TRANS,one,one);
2347 if (!allNodesBoundary_) {
2348 RCP<Teuchos::TimeMonitor> tmD0 = getTimer(
"MueLu RefMaxwell: prolongation (2,2) (fused)");
2349 D0_Matrix_->apply(*D0x_,X,Teuchos::NO_TRANS,one,one);
2353 RCP<Teuchos::TimeMonitor> tmP11 = getTimer(
"MueLu RefMaxwell: prolongation coarse (1,1) (unfused)");
2354 P11_->apply(*P11x_,*residual_,Teuchos::NO_TRANS);
2357 if (!allNodesBoundary_) {
2358 RCP<Teuchos::TimeMonitor> tmD0 = getTimer(
"MueLu RefMaxwell: prolongation (2,2) (unfused)");
2359 D0_Matrix_->apply(*D0x_,*residual_,Teuchos::NO_TRANS,one,one);
2363 RCP<Teuchos::TimeMonitor> tmUpdate = getTimer(
"MueLu RefMaxwell: update");
2364 X.update(one, *residual_, one);