42#ifndef TPETRA_DETAILS_IDOT_HPP
43#define TPETRA_DETAILS_IDOT_HPP
62#include "Tpetra_MultiVector.hpp"
63#include "Tpetra_Vector.hpp"
64#include "Teuchos_CommHelpers.hpp"
65#include "KokkosBlas1_dot.hpp"
78 using DevView =
typename MV::dual_view_type::t_dev::const_type;
79 using HostView =
typename MV::dual_view_type::t_host::const_type;
81 template<
typename exec_space>
82 static DevView get(
const MV& x,
typename std::enable_if<std::is_same<exec_space, typename MV::execution_space>::value>::type* =
nullptr)
84 return x.getLocalViewDevice(Tpetra::Access::ReadOnly);
87 template<
typename exec_space>
88 static HostView get(
const MV& x,
typename std::enable_if<!std::is_same<exec_space, typename MV::execution_space>::value>::type* =
nullptr)
90 return x.getLocalViewHost(Tpetra::Access::ReadOnly);
96template<
class MV,
class ResultView,
bool runOnDevice>
101 using pair_type = Kokkos::pair<size_t, size_t>;
102 using exec_space =
typename std::conditional<runOnDevice, typename MV::execution_space, Kokkos::DefaultHostExecutionSpace>::type;
104 static_assert(Kokkos::SpaceAccessibility<exec_space, typename ResultView::memory_space>::accessible,
105 "idotLocal: Execution space must be able to access localResult");
108 Kokkos::View<typename ResultView::data_type, typename exec_space::memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged>>
110 const size_t numRows =
X.getLocalLength ();
120 std::ostringstream
os;
121 os <<
"Tpetra::idot: X.getNumVectors() = " <<
X_numVecs
122 <<
" != Y.getNumVectors() = " <<
Y_numVecs
123 <<
", but neither is 1.";
124 throw std::invalid_argument (
os.str ());
165template<
typename MV,
typename ResultView>
168 using dot_type =
typename MV::dot_type;
171 template<
typename exec_space>
172 static std::shared_ptr< ::Tpetra::Details::CommRequest> run(
174 typename std::enable_if<Kokkos::SpaceAccessibility<exec_space, typename ResultView::memory_space>::accessible>::type* =
nullptr)
176 constexpr bool runOnDevice = std::is_same<exec_space, typename MV::execution_space>::value;
181 auto comm =
X.getMap()->getComm();
186 template<
typename exec_space>
187 static std::shared_ptr< ::Tpetra::Details::CommRequest> run(
188 const ResultView& globalResult,
const MV& X,
const MV& Y,
189 typename std::enable_if<!Kokkos::SpaceAccessibility<exec_space, typename ResultView::memory_space>::accessible>::type* =
nullptr)
191 constexpr bool runOnDevice = std::is_same<exec_space, typename MV::execution_space>::value;
192 Kokkos::View<dot_type*, typename exec_space::memory_space> localResult(Kokkos::ViewAllocateWithoutInitializing(
"idot:localResult"), X.getNumVectors());
193 idotLocal<MV, decltype(localResult), runOnDevice>(localResult, X, Y);
196 exec_space().fence();
197 auto comm = X.getMap()->getComm();
198 return iallreduce(localResult, globalResult, ::Teuchos::REDUCE_SUM, *comm);
204template<
class MV,
class ResultView>
205std::shared_ptr< ::Tpetra::Details::CommRequest>
210 static_assert(std::is_same<typename ResultView::non_const_value_type, typename MV::dot_type>::value,
211 "Tpetra::idot: result view's element type must match MV::dot_type");
214 if(
X.need_sync_device())
283template<
class SC,
class LO,
class GO,
class NT>
284std::shared_ptr< ::Tpetra::Details::CommRequest>
286 const ::Tpetra::MultiVector<SC, LO, GO, NT>&
X,
287 const ::Tpetra::MultiVector<SC, LO, GO, NT>&
Y)
289 using dot_type = typename ::Tpetra::Vector<SC, LO, GO, NT>::dot_type;
293 Kokkos::View<dot_type*, Kokkos::HostSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>>
360template<
class SC,
class LO,
class GO,
class NT>
361std::shared_ptr< ::Tpetra::Details::CommRequest>
362idot (
const Kokkos::View<typename ::Tpetra::MultiVector<SC, LO, GO, NT>::dot_type*,
363 typename ::Tpetra::MultiVector<SC, LO, GO, NT>::device_type>&
result,
364 const ::Tpetra::MultiVector<SC, LO, GO, NT>&
X,
365 const ::Tpetra::MultiVector<SC, LO, GO, NT>&
Y)
411template<
class SC,
class LO,
class GO,
class NT>
412std::shared_ptr< ::Tpetra::Details::CommRequest>
413idot (
const Kokkos::View<typename ::Tpetra::Vector<SC, LO, GO, NT>::dot_type,
414 typename ::Tpetra::Vector<SC, LO, GO, NT>::device_type>&
result,
415 const ::Tpetra::Vector<SC, LO, GO, NT>&
X,
416 const ::Tpetra::Vector<SC, LO, GO, NT>&
Y)
418 using dot_type = typename ::Tpetra::Vector<SC, LO, GO, NT>::dot_type;
419 using result_device_t = typename ::Tpetra::Vector<SC, LO, GO, NT>::device_type;
420 Kokkos::View<dot_type*, result_device_t, Kokkos::MemoryTraits<Kokkos::Unmanaged>>
result1D(
result.data(), 1);
Declaration of Tpetra::iallreduce.
Struct that holds views of the contents of a CrsMatrix.
Implementation details of Tpetra.
std::shared_ptr< ::Tpetra::Details::CommRequest > idotImpl(const ResultView &globalResult, const MV &X, const MV &Y)
Internal (common) version of idot, a global dot product that uses a non-blocking MPI reduction.
void idotLocal(const ResultView &localResult, const MV &X, const MV &Y)
Compute dot product locally. Where the kernel runs controlled by runOnDevice.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
std::shared_ptr< ::Tpetra::Details::CommRequest > idot(typename ::Tpetra::MultiVector< SC, LO, GO, NT >::dot_type *resultRaw, const ::Tpetra::MultiVector< SC, LO, GO, NT > &X, const ::Tpetra::MultiVector< SC, LO, GO, NT > &Y)
Nonblocking dot product, with either Tpetra::MultiVector or Tpetra::Vector inputs,...