Tpetra parallel linear algebra  Version of the Day
Tpetra_BlockMultiVector_def.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
40 #ifndef TPETRA_BLOCKMULTIVECTOR_DEF_HPP
41 #define TPETRA_BLOCKMULTIVECTOR_DEF_HPP
42 
44 #include "Tpetra_BlockView.hpp"
45 #include "Teuchos_OrdinalTraits.hpp"
46 
47 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
48 namespace { // anonymous
49 
61  template<class MultiVectorType>
62  struct RawHostPtrFromMultiVector {
63  typedef typename MultiVectorType::impl_scalar_type impl_scalar_type;
64 
65  static impl_scalar_type* getRawPtr (MultiVectorType& X) {
66  return nullptr;
67  //auto X_view_host = X.getLocalViewHost ();
68  //impl_scalar_type* X_raw = X_view_host.data ();
69  //return X_raw;
70  }
71  };
72 
85  template<class S, class LO, class GO, class N>
87  getRawHostPtrFromMultiVector (Tpetra::MultiVector<S, LO, GO, N>& X) {
89  return RawHostPtrFromMultiVector<MV>::getRawPtr (X);
90  }
91 
92 } // namespace (anonymous)
93 #endif
94 
95 namespace Tpetra {
96 
97 template<class Scalar, class LO, class GO, class Node>
100 getMultiVectorView () const
101 {
102  return mv_;
103 }
104 
105 template<class Scalar, class LO, class GO, class Node>
106 Teuchos::RCP<const BlockMultiVector<Scalar, LO, GO, Node> >
109 {
111  const BMV* src_bmv = dynamic_cast<const BMV*> (&src);
112  TEUCHOS_TEST_FOR_EXCEPTION(
113  src_bmv == nullptr, std::invalid_argument, "Tpetra::"
114  "BlockMultiVector: The source object of an Import or Export to a "
115  "BlockMultiVector, must also be a BlockMultiVector.");
116  return Teuchos::rcp (src_bmv, false); // nonowning RCP
117 }
118 
119 template<class Scalar, class LO, class GO, class Node>
122  const Teuchos::DataAccess copyOrView) :
123  dist_object_type (in),
124  meshMap_ (in.meshMap_),
125  pointMap_ (in.pointMap_),
126  mv_ (in.mv_, copyOrView),
127 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
128  mvData_ (getRawHostPtrFromMultiVector (mv_)),
129 #endif // TPETRA_ENABLE_DEPRECATED_CODE
130  blockSize_ (in.blockSize_)
131 {}
132 
133 template<class Scalar, class LO, class GO, class Node>
135 BlockMultiVector (const map_type& meshMap,
136  const LO blockSize,
137  const LO numVecs) :
138  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
139  meshMap_ (meshMap),
140  pointMap_ (makePointMap (meshMap, blockSize)),
141  mv_ (Teuchos::rcpFromRef (pointMap_), numVecs), // nonowning RCP is OK, since pointMap_ won't go away
142 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
143  mvData_ (getRawHostPtrFromMultiVector (mv_)),
144 #endif // TPETRA_ENABLE_DEPRECATED_CODE
145  blockSize_ (blockSize)
146 {}
147 
148 template<class Scalar, class LO, class GO, class Node>
150 BlockMultiVector (const map_type& meshMap,
151  const map_type& pointMap,
152  const LO blockSize,
153  const LO numVecs) :
154  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
155  meshMap_ (meshMap),
156  pointMap_ (pointMap),
157  mv_ (Teuchos::rcpFromRef (pointMap_), numVecs),
158 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
159  mvData_ (getRawHostPtrFromMultiVector (mv_)),
160 #endif // TPETRA_ENABLE_DEPRECATED_CODE
161  blockSize_ (blockSize)
162 {}
163 
164 template<class Scalar, class LO, class GO, class Node>
166 BlockMultiVector (const mv_type& X_mv,
167  const map_type& meshMap,
168  const LO blockSize) :
169  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
170  meshMap_ (meshMap),
171 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
172  mvData_ (nullptr), // just for now
173 #endif // TPETRA_ENABLE_DEPRECATED_CODE
174  blockSize_ (blockSize)
175 {
176  using Teuchos::RCP;
177 
178  if (X_mv.getCopyOrView () == Teuchos::View) {
179  // The input MultiVector has view semantics, so assignment just
180  // does a shallow copy.
181  mv_ = X_mv;
182  }
183  else if (X_mv.getCopyOrView () == Teuchos::Copy) {
184  // The input MultiVector has copy semantics. We can't change
185  // that, but we can make a view of the input MultiVector and
186  // change the view to have view semantics. (That sounds silly;
187  // shouldn't views always have view semantics? but remember that
188  // "view semantics" just governs the default behavior of the copy
189  // constructor and assignment operator.)
190  RCP<const mv_type> X_view_const;
191  const size_t numCols = X_mv.getNumVectors ();
192  if (numCols == 0) {
193  Teuchos::Array<size_t> cols (0); // view will have zero columns
194  X_view_const = X_mv.subView (cols ());
195  } else { // Range1D is an inclusive range
196  X_view_const = X_mv.subView (Teuchos::Range1D (0, numCols-1));
197  }
198  TEUCHOS_TEST_FOR_EXCEPTION(
199  X_view_const.is_null (), std::logic_error, "Tpetra::"
200  "BlockMultiVector constructor: X_mv.subView(...) returned null. This "
201  "should never happen. Please report this bug to the Tpetra developers.");
202 
203  // It's perfectly OK to cast away const here. Those view methods
204  // should be marked const anyway, because views can't change the
205  // allocation (just the entries themselves).
206  RCP<mv_type> X_view = Teuchos::rcp_const_cast<mv_type> (X_view_const);
207  TEUCHOS_TEST_FOR_EXCEPTION(
208  X_view->getCopyOrView () != Teuchos::View, std::logic_error, "Tpetra::"
209  "BlockMultiVector constructor: We just set a MultiVector "
210  "to have view semantics, but it claims that it doesn't have view "
211  "semantics. This should never happen. "
212  "Please report this bug to the Tpetra developers.");
213  mv_ = *X_view; // MultiVector::operator= does a shallow copy here
214  }
215 
216  // At this point, mv_ has been assigned, so we can ignore X_mv.
217  Teuchos::RCP<const map_type> pointMap = mv_.getMap ();
218  if (! pointMap.is_null ()) {
219  pointMap_ = *pointMap; // Map::operator= also does a shallow copy
220  }
221 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
222  mvData_ = getRawHostPtrFromMultiVector (mv_);
223 #endif // TPETRA_ENABLE_DEPRECATED_CODE
224 }
225 
226 template<class Scalar, class LO, class GO, class Node>
229  const map_type& newMeshMap,
230  const map_type& newPointMap,
231  const size_t offset) :
232  dist_object_type (Teuchos::rcp (new map_type (newMeshMap))), // shallow copy
233  meshMap_ (newMeshMap),
234  pointMap_ (newPointMap),
235  mv_ (X.mv_, newPointMap, offset * X.getBlockSize ()), // MV "offset view" constructor
236 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
237  mvData_ (getRawHostPtrFromMultiVector (mv_)),
238 #endif // TPETRA_ENABLE_DEPRECATED_CODE
239  blockSize_ (X.getBlockSize ())
240 {}
241 
242 template<class Scalar, class LO, class GO, class Node>
245  const map_type& newMeshMap,
246  const size_t offset) :
247  dist_object_type (Teuchos::rcp (new map_type (newMeshMap))), // shallow copy
248  meshMap_ (newMeshMap),
249  pointMap_ (makePointMap (newMeshMap, X.getBlockSize ())),
250  mv_ (X.mv_, pointMap_, offset * X.getBlockSize ()), // MV "offset view" constructor
251 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
252  mvData_ (getRawHostPtrFromMultiVector (mv_)),
253 #endif // TPETRA_ENABLE_DEPRECATED_CODE
254  blockSize_ (X.getBlockSize ())
255 {}
256 
257 template<class Scalar, class LO, class GO, class Node>
259 BlockMultiVector () :
260  dist_object_type (Teuchos::null),
261 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
262  mvData_ (nullptr),
263 #endif // TPETRA_ENABLE_DEPRECATED_CODE
264  blockSize_ (0)
265 {}
266 
267 template<class Scalar, class LO, class GO, class Node>
270 makePointMap (const map_type& meshMap, const LO blockSize)
271 {
272  typedef Tpetra::global_size_t GST;
273  typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
274 
275  const GST gblNumMeshMapInds =
276  static_cast<GST> (meshMap.getGlobalNumElements ());
277  const size_t lclNumMeshMapIndices =
278  static_cast<size_t> (meshMap.getNodeNumElements ());
279  const GST gblNumPointMapInds =
280  gblNumMeshMapInds * static_cast<GST> (blockSize);
281  const size_t lclNumPointMapInds =
282  lclNumMeshMapIndices * static_cast<size_t> (blockSize);
283  const GO indexBase = meshMap.getIndexBase ();
284 
285  if (meshMap.isContiguous ()) {
286  return map_type (gblNumPointMapInds, lclNumPointMapInds, indexBase,
287  meshMap.getComm ());
288  }
289  else {
290  // "Hilbert's Hotel" trick: multiply each process' GIDs by
291  // blockSize, and fill in. That ensures correctness even if the
292  // mesh Map is overlapping.
293  Teuchos::ArrayView<const GO> lclMeshGblInds = meshMap.getNodeElementList ();
294  const size_type lclNumMeshGblInds = lclMeshGblInds.size ();
295  Teuchos::Array<GO> lclPointGblInds (lclNumPointMapInds);
296  for (size_type g = 0; g < lclNumMeshGblInds; ++g) {
297  const GO meshGid = lclMeshGblInds[g];
298  const GO pointGidStart = indexBase +
299  (meshGid - indexBase) * static_cast<GO> (blockSize);
300  const size_type offset = g * static_cast<size_type> (blockSize);
301  for (LO k = 0; k < blockSize; ++k) {
302  const GO pointGid = pointGidStart + static_cast<GO> (k);
303  lclPointGblInds[offset + static_cast<size_type> (k)] = pointGid;
304  }
305  }
306  return map_type (gblNumPointMapInds, lclPointGblInds (), indexBase,
307  meshMap.getComm ());
308  }
309 }
310 
311 
312 template<class Scalar, class LO, class GO, class Node>
313 void
315 replaceLocalValuesImpl (const LO localRowIndex,
316  const LO colIndex,
317  const Scalar vals[])
318 {
319  auto X_dst = getLocalBlockHost (localRowIndex, colIndex, Access::ReadWrite);
320  typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
321  getBlockSize ());
322  Kokkos::deep_copy (X_dst, X_src);
323 }
324 
325 
326 template<class Scalar, class LO, class GO, class Node>
327 bool
329 replaceLocalValues (const LO localRowIndex,
330  const LO colIndex,
331  const Scalar vals[])
332 {
333  if (! meshMap_.isNodeLocalElement (localRowIndex)) {
334  return false;
335  } else {
336  replaceLocalValuesImpl (localRowIndex, colIndex, vals);
337  return true;
338  }
339 }
340 
341 template<class Scalar, class LO, class GO, class Node>
342 bool
344 replaceGlobalValues (const GO globalRowIndex,
345  const LO colIndex,
346  const Scalar vals[])
347 {
348  const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
349  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
350  return false;
351  } else {
352  replaceLocalValuesImpl (localRowIndex, colIndex, vals);
353  return true;
354  }
355 }
356 
357 template<class Scalar, class LO, class GO, class Node>
358 void
360 sumIntoLocalValuesImpl (const LO localRowIndex,
361  const LO colIndex,
362  const Scalar vals[])
363 {
364  auto X_dst = getLocalBlockHost (localRowIndex, colIndex, Access::ReadWrite);
365  typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
366  getBlockSize ());
367  AXPY (static_cast<impl_scalar_type> (STS::one ()), X_src, X_dst);
368 }
369 
370 template<class Scalar, class LO, class GO, class Node>
371 bool
373 sumIntoLocalValues (const LO localRowIndex,
374  const LO colIndex,
375  const Scalar vals[])
376 {
377  if (! meshMap_.isNodeLocalElement (localRowIndex)) {
378  return false;
379  } else {
380  sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
381  return true;
382  }
383 }
384 
385 template<class Scalar, class LO, class GO, class Node>
386 bool
388 sumIntoGlobalValues (const GO globalRowIndex,
389  const LO colIndex,
390  const Scalar vals[])
391 {
392  const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
393  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
394  return false;
395  } else {
396  sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
397  return true;
398  }
399 }
400 
401 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
402 
403 template<class Scalar, class LO, class GO, class Node>
404 bool
405 // TPETRA_DEPRECATED
407 getLocalRowView (const LO localRowIndex, const LO colIndex, Scalar*& vals)
408 {
409  if (! meshMap_.isNodeLocalElement (localRowIndex)) {
410  return false;
411  } else {
412  auto X_ij = getLocalBlockHost (localRowIndex, colIndex, Access::ReadWrite);
413  vals = reinterpret_cast<Scalar*> (X_ij.data ());
414  return true;
415  }
416 }
417 
418 template<class Scalar, class LO, class GO, class Node>
419 bool
420 // TPETRA_DEPRECATED
421 BlockMultiVector<Scalar, LO, GO, Node>::
422 getGlobalRowView (const GO globalRowIndex, const LO colIndex, Scalar*& vals)
423 {
424  const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
425  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
426  return false;
427  } else {
428  auto X_ij = getLocalBlockHost (localRowIndex, colIndex, Access::ReadWrite);
429  vals = reinterpret_cast<Scalar*> (X_ij.data ());
430  return true;
431  }
432 }
433 
434 template<class Scalar, class LO, class GO, class Node>
435 typename BlockMultiVector<Scalar, LO, GO, Node>::little_host_vec_type
436 // TPETRA_DEPRECATED
437 BlockMultiVector<Scalar, LO, GO, Node>::
438 getLocalBlock (const LO localRowIndex,
439  const LO colIndex)
440 {
441  if (! isValidLocalMeshIndex (localRowIndex)) {
442  return little_host_vec_type ();
443  } else {
444  const size_t blockSize = getBlockSize ();
445  const size_t offset = colIndex * this->getStrideY () +
446  localRowIndex * blockSize;
447  impl_scalar_type* blockRaw = this->getRawPtr () + offset;
448  return little_host_vec_type (blockRaw, blockSize);
449  }
450 }
451 
452 #endif // TPETRA_ENABLE_DEPRECATED_CODE
453 
454 template<class Scalar, class LO, class GO, class Node>
455 typename BlockMultiVector<Scalar, LO, GO, Node>::const_little_host_vec_type
456 BlockMultiVector<Scalar, LO, GO, Node>::
457 getLocalBlockHost (const LO localRowIndex,
458  const LO colIndex,
459  const Access::ReadOnlyStruct) const
460 {
461  if (!isValidLocalMeshIndex(localRowIndex)) {
462  return const_little_host_vec_type();
463  } else {
464  const size_t blockSize = getBlockSize();
465  auto hostView = mv_.getLocalViewHost(Access::ReadOnly);
466  LO startRow = localRowIndex*blockSize;
467  LO endRow = startRow + blockSize;
468  return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
469  colIndex);
470  }
471 }
472 
473 template<class Scalar, class LO, class GO, class Node>
474 typename BlockMultiVector<Scalar, LO, GO, Node>::little_host_vec_type
476 getLocalBlockHost (const LO localRowIndex,
477  const LO colIndex,
478  const Access::OverwriteAllStruct)
479 {
480  if (!isValidLocalMeshIndex(localRowIndex)) {
481  return little_host_vec_type();
482  } else {
483  const size_t blockSize = getBlockSize();
484  auto hostView = mv_.getLocalViewHost(Access::OverwriteAll);
485  LO startRow = localRowIndex*blockSize;
486  LO endRow = startRow + blockSize;
487  return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
488  colIndex);
489  }
490 }
491 
492 template<class Scalar, class LO, class GO, class Node>
493 typename BlockMultiVector<Scalar, LO, GO, Node>::little_host_vec_type
495 getLocalBlockHost (const LO localRowIndex,
496  const LO colIndex,
497  const Access::ReadWriteStruct)
498 {
499  if (!isValidLocalMeshIndex(localRowIndex)) {
500  return little_host_vec_type();
501  } else {
502  const size_t blockSize = getBlockSize();
503  auto hostView = mv_.getLocalViewHost(Access::ReadWrite);
504  LO startRow = localRowIndex*blockSize;
505  LO endRow = startRow + blockSize;
506  return Kokkos::subview(hostView, Kokkos::make_pair(startRow, endRow),
507  colIndex);
508  }
509 }
510 
511 template<class Scalar, class LO, class GO, class Node>
512 Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::mv_type>
513 BlockMultiVector<Scalar, LO, GO, Node>::
514 getMultiVectorFromSrcDistObject (const Tpetra::SrcDistObject& src)
515 {
516  using Teuchos::rcp;
517  using Teuchos::rcpFromRef;
518 
519  // The source object of an Import or Export must be a
520  // BlockMultiVector or MultiVector (a Vector is a MultiVector). Try
521  // them in that order; one must succeed. Note that the target of
522  // the Import or Export calls checkSizes in DistObject's doTransfer.
523  typedef BlockMultiVector<Scalar, LO, GO, Node> this_type;
524  const this_type* srcBlkVec = dynamic_cast<const this_type*> (&src);
525  if (srcBlkVec == nullptr) {
526  const mv_type* srcMultiVec = dynamic_cast<const mv_type*> (&src);
527  if (srcMultiVec == nullptr) {
528  // FIXME (mfh 05 May 2014) Tpetra::MultiVector's operator=
529  // currently does a shallow copy by default. This is why we
530  // return by RCP, rather than by value.
531  return rcp (new mv_type ());
532  } else { // src is a MultiVector
533  return rcp (srcMultiVec, false); // nonowning RCP
534  }
535  } else { // src is a BlockMultiVector
536  return rcpFromRef (srcBlkVec->mv_); // nonowning RCP
537  }
538 }
539 
540 template<class Scalar, class LO, class GO, class Node>
543 {
544  return ! getMultiVectorFromSrcDistObject (src).is_null ();
545 }
546 
547 template<class Scalar, class LO, class GO, class Node>
550 (const SrcDistObject& src,
551  const size_t numSameIDs,
552  const Kokkos::DualView<const local_ordinal_type*,
553  buffer_device_type>& permuteToLIDs,
554  const Kokkos::DualView<const local_ordinal_type*,
555  buffer_device_type>& permuteFromLIDs,
556  const CombineMode CM)
557 {
558  TEUCHOS_TEST_FOR_EXCEPTION
559  (true, std::logic_error,
560  "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this "
561  "instead, create a point importer using makePointMap function.");
562 }
563 
564 template<class Scalar, class LO, class GO, class Node>
565 void BlockMultiVector<Scalar, LO, GO, Node>::
566 packAndPrepare
567 (const SrcDistObject& src,
568  const Kokkos::DualView<const local_ordinal_type*,
569  buffer_device_type>& exportLIDs,
570  Kokkos::DualView<packet_type*,
571  buffer_device_type>& exports,
572  Kokkos::DualView<size_t*,
573  buffer_device_type> numPacketsPerLID,
574  size_t& constantNumPackets)
575 {
576  TEUCHOS_TEST_FOR_EXCEPTION
577  (true, std::logic_error,
578  "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this; "
579  "instead, create a point importer using makePointMap function.");
580 }
581 
582 template<class Scalar, class LO, class GO, class Node>
583 void BlockMultiVector<Scalar, LO, GO, Node>::
584 unpackAndCombine
585 (const Kokkos::DualView<const local_ordinal_type*,
586  buffer_device_type>& importLIDs,
587  Kokkos::DualView<packet_type*,
588  buffer_device_type> imports,
589  Kokkos::DualView<size_t*,
590  buffer_device_type> numPacketsPerLID,
591  const size_t constantNumPackets,
592  const CombineMode combineMode)
593 {
594  TEUCHOS_TEST_FOR_EXCEPTION
595  (true, std::logic_error,
596  "Tpetra::BlockMultiVector::copyAndPermute: Do NOT use this; "
597  "instead, create a point importer using makePointMap function.");
598 }
599 
600 template<class Scalar, class LO, class GO, class Node>
602 isValidLocalMeshIndex (const LO meshLocalIndex) const
603 {
604  return meshLocalIndex != Teuchos::OrdinalTraits<LO>::invalid () &&
605  meshMap_.isNodeLocalElement (meshLocalIndex);
606 }
607 
608 template<class Scalar, class LO, class GO, class Node>
610 putScalar (const Scalar& val)
611 {
612  mv_.putScalar (val);
613 }
614 
615 template<class Scalar, class LO, class GO, class Node>
617 scale (const Scalar& val)
618 {
619  mv_.scale (val);
620 }
621 
622 template<class Scalar, class LO, class GO, class Node>
624 update (const Scalar& alpha,
626  const Scalar& beta)
627 {
628  mv_.update (alpha, X.mv_, beta);
629 }
630 
631 namespace Impl {
632 // Y := alpha * D * X
633 template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
634 struct BlockWiseMultiply {
635  typedef typename ViewD::size_type Size;
636 
637 private:
638  typedef typename ViewD::device_type Device;
639  typedef typename ViewD::non_const_value_type ImplScalar;
640  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
641 
642  template <typename View>
643  using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
644  typename View::device_type, Unmanaged>;
645  typedef UnmanagedView<ViewY> UnMViewY;
646  typedef UnmanagedView<ViewD> UnMViewD;
647  typedef UnmanagedView<ViewX> UnMViewX;
648 
649  const Size block_size_;
650  Scalar alpha_;
651  UnMViewY Y_;
652  UnMViewD D_;
653  UnMViewX X_;
654 
655 public:
656  BlockWiseMultiply (const Size block_size, const Scalar& alpha,
657  const ViewY& Y, const ViewD& D, const ViewX& X)
658  : block_size_(block_size), alpha_(alpha), Y_(Y), D_(D), X_(X)
659  {}
660 
661  KOKKOS_INLINE_FUNCTION
662  void operator() (const Size k) const {
663  const auto zero = Kokkos::Details::ArithTraits<Scalar>::zero();
664  auto D_curBlk = Kokkos::subview(D_, k, Kokkos::ALL (), Kokkos::ALL ());
665  const auto num_vecs = X_.extent(1);
666  for (Size i = 0; i < num_vecs; ++i) {
667  Kokkos::pair<Size, Size> kslice(k*block_size_, (k+1)*block_size_);
668  auto X_curBlk = Kokkos::subview(X_, kslice, i);
669  auto Y_curBlk = Kokkos::subview(Y_, kslice, i);
670  // Y_curBlk := alpha * D_curBlk * X_curBlk.
671  // Recall that GEMV does an update (+=) of the last argument.
672  Tpetra::FILL(Y_curBlk, zero);
673  Tpetra::GEMV(alpha_, D_curBlk, X_curBlk, Y_curBlk);
674  }
675  }
676 };
677 
678 template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
679 inline BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>
680 createBlockWiseMultiply (const int block_size, const Scalar& alpha,
681  const ViewY& Y, const ViewD& D, const ViewX& X) {
682  return BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>(block_size, alpha, Y, D, X);
683 }
684 
685 template <typename ViewY,
686  typename Scalar,
687  typename ViewD,
688  typename ViewZ,
689  typename LO = typename ViewY::size_type>
690 class BlockJacobiUpdate {
691 private:
692  typedef typename ViewD::device_type Device;
693  typedef typename ViewD::non_const_value_type ImplScalar;
694  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
695 
696  template <typename ViewType>
697  using UnmanagedView = Kokkos::View<typename ViewType::data_type,
698  typename ViewType::array_layout,
699  typename ViewType::device_type,
700  Unmanaged>;
701  typedef UnmanagedView<ViewY> UnMViewY;
702  typedef UnmanagedView<ViewD> UnMViewD;
703  typedef UnmanagedView<ViewZ> UnMViewZ;
704 
705  const LO blockSize_;
706  UnMViewY Y_;
707  const Scalar alpha_;
708  UnMViewD D_;
709  UnMViewZ Z_;
710  const Scalar beta_;
711 
712 public:
713  BlockJacobiUpdate (const ViewY& Y,
714  const Scalar& alpha,
715  const ViewD& D,
716  const ViewZ& Z,
717  const Scalar& beta) :
718  blockSize_ (D.extent (1)),
719  // numVecs_ (static_cast<int> (ViewY::rank) == 1 ? static_cast<size_type> (1) : static_cast<size_type> (Y_.extent (1))),
720  Y_ (Y),
721  alpha_ (alpha),
722  D_ (D),
723  Z_ (Z),
724  beta_ (beta)
725  {
726  static_assert (static_cast<int> (ViewY::rank) == 1,
727  "Y must have rank 1.");
728  static_assert (static_cast<int> (ViewD::rank) == 3, "D must have rank 3.");
729  static_assert (static_cast<int> (ViewZ::rank) == 1,
730  "Z must have rank 1.");
731  // static_assert (static_cast<int> (ViewZ::rank) ==
732  // static_cast<int> (ViewY::rank),
733  // "Z must have the same rank as Y.");
734  }
735 
736  KOKKOS_INLINE_FUNCTION void
737  operator() (const LO& k) const
738  {
739  using Kokkos::ALL;
740  using Kokkos::subview;
741  typedef Kokkos::pair<LO, LO> range_type;
742  typedef Kokkos::Details::ArithTraits<Scalar> KAT;
743 
744  // We only have to implement the alpha != 0 case.
745 
746  auto D_curBlk = subview (D_, k, ALL (), ALL ());
747  const range_type kslice (k*blockSize_, (k+1)*blockSize_);
748 
749  // Z.update (STS::one (), X, -STS::one ()); // assume this is done
750 
751  auto Z_curBlk = subview (Z_, kslice);
752  auto Y_curBlk = subview (Y_, kslice);
753  // Y_curBlk := beta * Y_curBlk + alpha * D_curBlk * Z_curBlk
754  if (beta_ == KAT::zero ()) {
755  Tpetra::FILL (Y_curBlk, KAT::zero ());
756  }
757  else if (beta_ != KAT::one ()) {
758  Tpetra::SCAL (beta_, Y_curBlk);
759  }
760  Tpetra::GEMV (alpha_, D_curBlk, Z_curBlk, Y_curBlk);
761  }
762 };
763 
764 template<class ViewY,
765  class Scalar,
766  class ViewD,
767  class ViewZ,
768  class LO = typename ViewD::size_type>
769 void
770 blockJacobiUpdate (const ViewY& Y,
771  const Scalar& alpha,
772  const ViewD& D,
773  const ViewZ& Z,
774  const Scalar& beta)
775 {
776  static_assert (Kokkos::Impl::is_view<ViewY>::value, "Y must be a Kokkos::View.");
777  static_assert (Kokkos::Impl::is_view<ViewD>::value, "D must be a Kokkos::View.");
778  static_assert (Kokkos::Impl::is_view<ViewZ>::value, "Z must be a Kokkos::View.");
779  static_assert (static_cast<int> (ViewY::rank) == static_cast<int> (ViewZ::rank),
780  "Y and Z must have the same rank.");
781  static_assert (static_cast<int> (ViewD::rank) == 3, "D must have rank 3.");
782 
783  const auto lclNumMeshRows = D.extent (0);
784 
785 #ifdef HAVE_TPETRA_DEBUG
786  // D.extent(0) is the (local) number of mesh rows.
787  // D.extent(1) is the block size. Thus, their product should be
788  // the local number of point rows, that is, the number of rows in Y.
789  const auto blkSize = D.extent (1);
790  const auto lclNumPtRows = lclNumMeshRows * blkSize;
791  TEUCHOS_TEST_FOR_EXCEPTION
792  (Y.extent (0) != lclNumPtRows, std::invalid_argument,
793  "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) << " != "
794  "D.extent(0)*D.extent(1) = " << lclNumMeshRows << " * " << blkSize
795  << " = " << lclNumPtRows << ".");
796  TEUCHOS_TEST_FOR_EXCEPTION
797  (Y.extent (0) != Z.extent (0), std::invalid_argument,
798  "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) << " != "
799  "Z.extent(0) = " << Z.extent (0) << ".");
800  TEUCHOS_TEST_FOR_EXCEPTION
801  (Y.extent (1) != Z.extent (1), std::invalid_argument,
802  "blockJacobiUpdate: Y.extent(1) = " << Y.extent (1) << " != "
803  "Z.extent(1) = " << Z.extent (1) << ".");
804 #endif // HAVE_TPETRA_DEBUG
805 
806  BlockJacobiUpdate<ViewY, Scalar, ViewD, ViewZ, LO> functor (Y, alpha, D, Z, beta);
807  typedef Kokkos::RangePolicy<typename ViewY::execution_space, LO> range_type;
808  // lclNumMeshRows must fit in LO, else the Map would not be correct.
809  range_type range (0, static_cast<LO> (lclNumMeshRows));
810  Kokkos::parallel_for (range, functor);
811 }
812 
813 } // namespace Impl
814 
815 template<class Scalar, class LO, class GO, class Node>
817 blockWiseMultiply (const Scalar& alpha,
818  const Kokkos::View<const impl_scalar_type***,
819  device_type, Kokkos::MemoryUnmanaged>& D,
821 {
822  using Kokkos::ALL;
823  typedef typename device_type::execution_space execution_space;
824  const LO lclNumMeshRows = meshMap_.getNodeNumElements ();
825 
826  if (alpha == STS::zero ()) {
827  this->putScalar (STS::zero ());
828  }
829  else { // alpha != 0
830  const LO blockSize = this->getBlockSize ();
831  const impl_scalar_type alphaImpl = static_cast<impl_scalar_type> (alpha);
832  auto X_lcl = X.mv_.getLocalViewDevice (Access::ReadOnly);
833  auto Y_lcl = this->mv_.getLocalViewDevice (Access::ReadWrite);
834  auto bwm = Impl::createBlockWiseMultiply (blockSize, alphaImpl, Y_lcl, D, X_lcl);
835 
836  // Use an explicit RangePolicy with the desired execution space.
837  // Otherwise, if you just use a number, it runs on the default
838  // execution space. For example, if the default execution space
839  // is Cuda but the current execution space is Serial, using just a
840  // number would incorrectly run with Cuda.
841  Kokkos::RangePolicy<execution_space, LO> range (0, lclNumMeshRows);
842  Kokkos::parallel_for (range, bwm);
843  }
844 }
845 
846 template<class Scalar, class LO, class GO, class Node>
848 blockJacobiUpdate (const Scalar& alpha,
849  const Kokkos::View<const impl_scalar_type***,
850  device_type, Kokkos::MemoryUnmanaged>& D,
853  const Scalar& beta)
854 {
855  using Kokkos::ALL;
856  using Kokkos::subview;
857  typedef impl_scalar_type IST;
858 
859  const IST alphaImpl = static_cast<IST> (alpha);
860  const IST betaImpl = static_cast<IST> (beta);
861  const LO numVecs = mv_.getNumVectors ();
862 
863  if (alpha == STS::zero ()) { // Y := beta * Y
864  this->scale (beta);
865  }
866  else { // alpha != 0
867  Z.update (STS::one (), X, -STS::one ());
868  for (LO j = 0; j < numVecs; ++j) {
869  auto Y_lcl = this->mv_.getLocalViewDevice (Access::ReadWrite);
870  auto Z_lcl = Z.mv_.getLocalViewDevice (Access::ReadWrite);
871  auto Y_lcl_j = subview (Y_lcl, ALL (), j);
872  auto Z_lcl_j = subview (Z_lcl, ALL (), j);
873  Impl::blockJacobiUpdate (Y_lcl_j, alphaImpl, D, Z_lcl_j, betaImpl);
874  }
875  }
876 }
877 
878 } // namespace Tpetra
879 
880 //
881 // Explicit instantiation macro
882 //
883 // Must be expanded from within the Tpetra namespace!
884 //
885 #define TPETRA_BLOCKMULTIVECTOR_INSTANT(S,LO,GO,NODE) \
886  template class BlockMultiVector< S, LO, GO, NODE >;
887 
888 #endif // TPETRA_BLOCKMULTIVECTOR_DEF_HPP
Linear algebra kernels for small dense matrices and vectors.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
MultiVector for multiple degrees of freedom per mesh point.
virtual bool checkSizes(const Tpetra::SrcDistObject &source)
Compare the source and target (this) objects for compatibility.
void putScalar(const Scalar &val)
Fill all entries with the given value val.
void blockWiseMultiply(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X)
*this := alpha * D * X, where D is a block diagonal matrix.
bool sumIntoLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[])
Sum into all values at the given mesh point, using a local index.
void blockJacobiUpdate(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Z, const Scalar &beta)
Block Jacobi update .
void update(const Scalar &alpha, const BlockMultiVector< Scalar, LO, GO, Node > &X, const Scalar &beta)
Update: this = beta*this + alpha*X.
typename mv_type::impl_scalar_type impl_scalar_type
The implementation type of entries in the object.
bool sumIntoGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[])
Sum into all values at the given mesh point, using a global index.
mv_type getMultiVectorView() const
Get a Tpetra::MultiVector that views this BlockMultiVector's data.
bool replaceLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[])
Replace all values at the given mesh point, using local row and column indices.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
Tpetra::MultiVector< Scalar, LO, GO, Node > mv_type
The specialization of Tpetra::MultiVector that this class uses.
typename mv_type::device_type device_type
The Kokkos Device type.
void scale(const Scalar &val)
Multiply all entries in place by the given value val.
bool replaceGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[])
Replace all values at the given mesh point, using a global index.
bool isValidLocalMeshIndex(const LO meshLocalIndex) const
True if and only if meshLocalIndex is a valid local index in the mesh Map.
mv_type mv_
The Tpetra::MultiVector used to represent the data.
typename device_type::execution_space execution_space
The Kokkos execution space.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Teuchos::ArrayView< const global_ordinal_type > getNodeElementList() const
Return a NONOWNING view of the global indices owned by this process.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
global_ordinal_type getIndexBase() const
The index base for this Map.
global_size_t getGlobalNumElements() const
The number of elements in this Map.
bool isContiguous() const
True if this Map is distributed contiguously, else false.
size_t getNodeNumElements() const
The number of elements belonging to the calling process.
One or more distributed dense vectors.
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subView(const Teuchos::Range1D &colRng) const
Return a const MultiVector with const views of selected columns.
typename Kokkos::Details::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
size_t getNumVectors() const
Number of columns in the multivector.
Teuchos::DataAccess getCopyOrView() const
Get whether this has copy (copyOrView = Teuchos::Copy) or view (copyOrView = Teuchos::View) semantics...
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
Abstract base class for objects that can be the source of an Import or Export operation.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
size_t global_size_t
Global size_t object.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
CombineMode
Rule for combining data in an Import or Export.