47 #ifndef MUELU_AGGREGATES_KOKKOS_DEF_HPP 48 #define MUELU_AGGREGATES_KOKKOS_DEF_HPP 50 #include <Xpetra_Map.hpp> 51 #include <Xpetra_Vector.hpp> 52 #include <Xpetra_MultiVectorFactory.hpp> 53 #include <Xpetra_VectorFactory.hpp> 55 #include "MueLu_LWGraph_kokkos.hpp" 62 template <
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
63 Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::Aggregates_kokkos(LWGraph_kokkos graph) {
66 vertex2AggId_ = LOVectorFactory::Build(graph.GetImportMap());
69 procWinner_ = LOVectorFactory::Build(graph.GetImportMap());
72 isRoot_ = Teuchos::ArrayRCP<bool>(graph.GetImportMap()->getNodeNumElements(),
false);
75 aggregatesIncludeGhosts_ =
true;
79 template <
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
80 Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::Aggregates_kokkos(
const RCP<const Map>& map) {
83 vertex2AggId_ = LOVectorFactory::Build(map);
86 procWinner_ = LOVectorFactory::Build(map);
89 isRoot_ = Teuchos::ArrayRCP<bool>(map->getNodeNumElements(),
false);
92 aggregatesIncludeGhosts_ =
true;
96 template<
class ProcWinnerType,
class Vertex2AggIdType,
class AggregateSizesType,
class LO>
106 procWinner(procWinner_),
107 vertex2AggId(vertex2AggId_),
109 aggregateSizes(aggregateSizes_)
112 KOKKOS_INLINE_FUNCTION
118 template <
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
119 typename Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::aggregates_sizes_type::const_type
120 Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::ComputeAggregateSizes(
bool forceRecompute,
bool cacheSizes)
const {
121 if (aggregateSizes_.size() && !forceRecompute) {
122 return aggregateSizes_;
127 aggregateSizes_ = aggregates_sizes_type(
"aggregates", 0);
131 int myPID = GetMap()->getComm()->getRank();
133 auto vertex2AggId = vertex2AggId_->template getLocalView<DeviceType>();
134 auto procWinner = procWinner_ ->template getLocalView<DeviceType>();
136 typename AppendTrait<decltype(aggregateSizes_), Kokkos::Atomic>::type aggregateSizesAtomic =
aggregateSizes;
139 computeAggSizesFunctor(procWinner,
vertex2AggId, myPID, aggregateSizesAtomic);
140 Kokkos::parallel_for(
"MueLu:Aggregates:ComputeAggregateSizes:for", range_type(0,procWinner.size()), computeAggSizesFunctor);
150 template <
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
151 typename Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::local_graph_type
152 Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::GetGraph()
const {
153 typedef typename local_graph_type::row_map_type row_map_type;
154 typedef typename local_graph_type::entries_type entries_type;
156 int myPID = GetMap()->getComm()->getRank();
158 ArrayRCP<LO>
vertex2AggId = vertex2AggId_->getDataNonConst(0);
159 ArrayRCP<LO>
procWinner = procWinner_->getDataNonConst(0);
161 typename aggregates_sizes_type::const_type sizes = ComputeAggregateSizes();
163 int numAggregates = nAggregates_;
165 typename row_map_type::non_const_type rows(
"row_map", numAggregates+1);
166 for (LO i = 0; i < nAggregates_; i++)
167 rows(i+1) = rows(i) + sizes(i);
169 aggregates_sizes_type offsets(
"offsets", numAggregates);
170 for (LO i = 0; i < numAggregates; i++)
171 offsets(i) = rows(i);
173 typename entries_type::non_const_type cols(
"entries", rows(nAggregates_));
174 for (LO i = 0; i < procWinner.size(); i++)
175 if (procWinner[i] == myPID)
176 cols(offsets(vertex2AggId[i])++) = i;
178 return local_graph_type(cols, rows);
181 template <
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
182 std::string Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::description()
const {
186 template <
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
187 void Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::print(Teuchos::FancyOStream& out,
const Teuchos::EVerbosityLevel verbLevel)
const {
191 out0 <<
"Global number of aggregates: " << GetNumGlobalAggregates() << std::endl;
194 template <
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
195 GlobalOrdinal Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::GetNumGlobalAggregates()
const {
196 LO nAggregates = GetNumAggregates();
197 GO nGlobalAggregates;
198 MueLu_sumAll(vertex2AggId_->getMap()->getComm(), (GO)nAggregates, nGlobalAggregates);
199 return nGlobalAggregates;
202 template <
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
203 const RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>> >
204 Aggregates_kokkos<LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::GetMap()
const {
205 return vertex2AggId_->getMap();
210 #endif // MUELU_AGGREGATES_KOKKOS_DEF_HPP
#define MueLu_sumAll(rcpComm, in, out)
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
ComputeAggregateSizesFunctor(ProcWinnerType procWinner_, Vertex2AggIdType vertex2AggId_, int myPID_, AggregateSizesType aggregateSizes_)
Namespace for MueLu classes and methods.
Vertex2AggIdType vertex2AggId
#define MUELU_UNAGGREGATED
AggregateSizesType aggregateSizes
KOKKOS_INLINE_FUNCTION void operator()(const LO k) const
void parallel_for(const ExecPolicy &policy, const FunctorType &functor, const std::string &str="", typename Impl::enable_if< ! Impl::is_integral< ExecPolicy >::value >::type *=0)
#define MUELU_DESCRIBE
Helper macro for implementing Describable::describe() for BaseClass objects.
ProcWinnerType procWinner
virtual std::string description() const
Return a simple one-line description of this object.