Tpetra parallel linear algebra  Version of the Day
Tpetra_idot.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_DETAILS_IDOT_HPP
43 #define TPETRA_DETAILS_IDOT_HPP
44 
60 
61 #include "Tpetra_iallreduce.hpp"
62 #include "Tpetra_MultiVector.hpp"
63 #include "Tpetra_Vector.hpp"
64 #include "Teuchos_CommHelpers.hpp"
65 #include "KokkosBlas1_dot.hpp"
66 #include <stdexcept>
67 #include <sstream>
68 
69 namespace Tpetra {
70 namespace Details {
71 
72 // Temporary helper to get read-only view from multivector in requested space.
73 // TODO: when https://github.com/kokkos/kokkos/issues/3850 is resolved,
74 // remove this and just use templated getLocalView<Device>(ReadOnly).
75 template<typename MV>
76 struct GetReadOnly
77 {
78  using DevView = typename MV::dual_view_type::t_dev::const_type;
79  using HostView = typename MV::dual_view_type::t_host::const_type;
80 
81  template<typename exec_space>
82  static DevView get(const MV& x, typename std::enable_if<std::is_same<exec_space, typename MV::execution_space>::value>::type* = nullptr)
83  {
84  return x.getLocalViewDevice(Tpetra::Access::ReadOnly);
85  }
86 
87  template<typename exec_space>
88  static HostView get(const MV& x, typename std::enable_if<!std::is_same<exec_space, typename MV::execution_space>::value>::type* = nullptr)
89  {
90  return x.getLocalViewHost(Tpetra::Access::ReadOnly);
91  }
92 };
93 
95 
96 template<class MV, class ResultView, bool runOnDevice>
97 void idotLocal(const ResultView& localResult,
98  const MV& X,
99  const MV& Y)
100 {
101  using pair_type = Kokkos::pair<size_t, size_t>;
102  using exec_space = typename std::conditional<runOnDevice, typename MV::execution_space, Kokkos::DefaultHostExecutionSpace>::type;
103  //if the execution space can access localResult, use it directly. Otherwise need to make a copy.
104  static_assert(Kokkos::SpaceAccessibility<exec_space, typename ResultView::memory_space>::accessible,
105  "idotLocal: Execution space must be able to access localResult");
106  //If Dot executes on Serial, it requires the result to be HostSpace. If localResult is CudaUVMSpace,
107  //we can just treat it like HostSpace.
108  Kokkos::View<typename ResultView::data_type, typename exec_space::memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged>>
109  localResultUnmanaged(localResult.data(), localResult.extent(0));
110  const size_t numRows = X.getLocalLength ();
111  const pair_type rowRange (0, numRows);
112  const size_t X_numVecs = X.getNumVectors ();
113  const size_t Y_numVecs = Y.getNumVectors ();
114  const size_t numVecs = X_numVecs > Y_numVecs ? X_numVecs : Y_numVecs;
115  // Check compatibility of number of columns; allow special cases of
116  // a multivector dot a single vector, or vice versa.
117  if (X_numVecs != Y_numVecs &&
118  X_numVecs != size_t (1) &&
119  Y_numVecs != size_t (1)) {
120  std::ostringstream os;
121  os << "Tpetra::idot: X.getNumVectors() = " << X_numVecs
122  << " != Y.getNumVectors() = " << Y_numVecs
123  << ", but neither is 1.";
124  throw std::invalid_argument (os.str ());
125  }
126  auto X_lcl = GetReadOnly<MV>::template get<exec_space>(X);
127  auto Y_lcl = GetReadOnly<MV>::template get<exec_space>(Y);
128  //Can the multivector dot kernel be used?
129  bool useMVDot = X.isConstantStride() && Y.isConstantStride() && X_numVecs == Y_numVecs;
130  if(useMVDot)
131  {
132  if (numVecs == size_t (1)) {
133  auto X_lcl_1d = Kokkos::subview (X_lcl, rowRange, 0);
134  auto Y_lcl_1d = Kokkos::subview (Y_lcl, rowRange, 0);
135  auto result_0d = Kokkos::subview (localResultUnmanaged, 0);
136  KokkosBlas::dot (result_0d, X_lcl_1d, Y_lcl_1d);
137  }
138  else {
139  auto X_lcl_2d = Kokkos::subview (X_lcl, rowRange, pair_type (0, X_numVecs));
140  auto Y_lcl_2d = Kokkos::subview (Y_lcl, rowRange, pair_type (0, Y_numVecs));
141  KokkosBlas::dot (localResultUnmanaged, X_lcl_2d, Y_lcl_2d);
142  }
143  }
144  else
145  {
146  auto XWhichVectors = Tpetra::getMultiVectorWhichVectors(X);
147  auto YWhichVectors = Tpetra::getMultiVectorWhichVectors(Y);
148  //Need to compute each dot individually, using 1D subviews of X_lcl and Y_lcl
149  for(size_t vec = 0; vec < numVecs; vec++) {
150  //Get the Vectors for each column of X and Y that will be dotted together.
151  //Note: "which vectors" is not populated for constant stride MVs (but it's the identity mapping)
152  size_t Xj = (numVecs == X_numVecs) ? vec : 0;
153  Xj = X.isConstantStride() ? Xj : XWhichVectors[Xj];
154  size_t Yj = (numVecs == Y_numVecs) ? vec : 0;
155  Yj = Y.isConstantStride() ? Yj : YWhichVectors[Yj];
156  auto Xcol = Kokkos::subview(X_lcl, rowRange, Xj);
157  auto Ycol = Kokkos::subview(Y_lcl, rowRange, Yj);
158  //Compute the rank-1 dot product, and place the result in an element of localResult
159  KokkosBlas::dot(Kokkos::subview(localResultUnmanaged, vec), Xcol, Ycol);
160  }
161  }
162 }
163 
164 //Helper to avoid extra instantiations of KokkosBlas::dot and iallreduce.
165 template<typename MV, typename ResultView>
166 struct IdotHelper
167 {
168  using dot_type = typename MV::dot_type;
169 
170  //First version: use result directly
171  template<typename exec_space>
172  static std::shared_ptr< ::Tpetra::Details::CommRequest> run(
173  const ResultView& globalResult, const MV& X, const MV& Y,
174  typename std::enable_if<Kokkos::SpaceAccessibility<exec_space, typename ResultView::memory_space>::accessible>::type* = nullptr)
175  {
176  constexpr bool runOnDevice = std::is_same<exec_space, typename MV::execution_space>::value;
177  idotLocal<MV, ResultView, runOnDevice>(globalResult, X, Y);
178  //Fence because we're giving result of device kernel to MPI
179  if(runOnDevice)
180  exec_space().fence();
181  auto comm = X.getMap()->getComm();
182  return iallreduce(globalResult, globalResult, ::Teuchos::REDUCE_SUM, *comm);
183  }
184 
185  //Second version: use a temporary local result, because exec_space can't write to globalResult
186  template<typename exec_space>
187  static std::shared_ptr< ::Tpetra::Details::CommRequest> run(
188  const ResultView& globalResult, const MV& X, const MV& Y,
189  typename std::enable_if<!Kokkos::SpaceAccessibility<exec_space, typename ResultView::memory_space>::accessible>::type* = nullptr)
190  {
191  constexpr bool runOnDevice = std::is_same<exec_space, typename MV::execution_space>::value;
192  Kokkos::View<dot_type*, typename exec_space::memory_space> localResult(Kokkos::ViewAllocateWithoutInitializing("idot:localResult"), X.getNumVectors());
193  idotLocal<MV, decltype(localResult), runOnDevice>(localResult, X, Y);
194  //Fence because we're giving result of device kernel to MPI
195  if(runOnDevice)
196  exec_space().fence();
197  auto comm = X.getMap()->getComm();
198  return iallreduce(localResult, globalResult, ::Teuchos::REDUCE_SUM, *comm);
199  }
200 };
201 
204 template<class MV, class ResultView>
205 std::shared_ptr< ::Tpetra::Details::CommRequest>
206 idotImpl(const ResultView& globalResult,
207  const MV& X,
208  const MV& Y)
209 {
210  static_assert(std::is_same<typename ResultView::non_const_value_type, typename MV::dot_type>::value,
211  "Tpetra::idot: result view's element type must match MV::dot_type");
212 
213  // Execution space to use for dot kernel(s) is whichever has access to up-to-date X.
214  if(X.need_sync_device())
215  {
216  //run on host.
217  return IdotHelper<MV, ResultView>::template run<Kokkos::DefaultHostExecutionSpace>(globalResult, X, Y);
218  }
219  else
220  {
221  //run on device.
222  return IdotHelper<MV, ResultView>::template run<typename MV::execution_space>(globalResult, X, Y);
223  }
224 }
225 } // namespace Details
226 
227 //
228 // SKIP DOWN TO HERE
229 //
230 
283 template<class SC, class LO, class GO, class NT>
284 std::shared_ptr< ::Tpetra::Details::CommRequest>
285 idot (typename ::Tpetra::MultiVector<SC, LO, GO, NT>::dot_type* resultRaw,
286  const ::Tpetra::MultiVector<SC, LO, GO, NT>& X,
287  const ::Tpetra::MultiVector<SC, LO, GO, NT>& Y)
288 {
289  using dot_type = typename ::Tpetra::Vector<SC, LO, GO, NT>::dot_type;
290  const size_t X_numVecs = X.getNumVectors ();
291  const size_t Y_numVecs = Y.getNumVectors ();
292  const size_t numVecs = (X_numVecs > Y_numVecs) ? X_numVecs : Y_numVecs;
293  Kokkos::View<dot_type*, Kokkos::HostSpace, Kokkos::MemoryTraits<Kokkos::Unmanaged>>
294  resultView(resultRaw, numVecs);
295  return Details::idotImpl(resultView, X, Y);
296 }
297 
360 template<class SC, class LO, class GO, class NT>
361 std::shared_ptr< ::Tpetra::Details::CommRequest>
362 idot (const Kokkos::View<typename ::Tpetra::MultiVector<SC, LO, GO, NT>::dot_type*,
363  typename ::Tpetra::MultiVector<SC, LO, GO, NT>::device_type>& result,
364  const ::Tpetra::MultiVector<SC, LO, GO, NT>& X,
365  const ::Tpetra::MultiVector<SC, LO, GO, NT>& Y)
366 {
367  return Details::idotImpl(result, X, Y);
368 }
369 
411 template<class SC, class LO, class GO, class NT>
412 std::shared_ptr< ::Tpetra::Details::CommRequest>
413 idot (const Kokkos::View<typename ::Tpetra::Vector<SC, LO, GO, NT>::dot_type,
414  typename ::Tpetra::Vector<SC, LO, GO, NT>::device_type>& result,
415  const ::Tpetra::Vector<SC, LO, GO, NT>& X,
416  const ::Tpetra::Vector<SC, LO, GO, NT>& Y)
417 {
418  using dot_type = typename ::Tpetra::Vector<SC, LO, GO, NT>::dot_type;
419  using result_device_t = typename ::Tpetra::Vector<SC, LO, GO, NT>::device_type;
420  Kokkos::View<dot_type*, result_device_t, Kokkos::MemoryTraits<Kokkos::Unmanaged>> result1D(result.data(), 1);
421  return Details::idotImpl(result1D, X, Y);
422 }
423 
424 } // namespace Tpetra
425 
426 #endif // TPETRA_DETAILS_IDOT_HPP
Declaration of Tpetra::iallreduce.
Implementation details of Tpetra.
std::shared_ptr< ::Tpetra::Details::CommRequest > idotImpl(const ResultView &globalResult, const MV &X, const MV &Y)
Internal (common) version of idot, a global dot product that uses a non-blocking MPI reduction.
void idotLocal(const ResultView &localResult, const MV &X, const MV &Y)
Compute dot product locally. Where the kernel runs controlled by runOnDevice.
Definition: Tpetra_idot.hpp:97
Namespace Tpetra contains the class and methods constituting the Tpetra library.
std::shared_ptr< ::Tpetra::Details::CommRequest > idot(typename ::Tpetra::MultiVector< SC, LO, GO, NT >::dot_type *resultRaw, const ::Tpetra::MultiVector< SC, LO, GO, NT > &X, const ::Tpetra::MultiVector< SC, LO, GO, NT > &Y)
Nonblocking dot product, with either Tpetra::MultiVector or Tpetra::Vector inputs,...