Tpetra parallel linear algebra  Version of the Day
Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Tpetra: Templated Linear Algebra Services Package
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ************************************************************************
41 // @HEADER
42 */
43 
44 // mfh 13/14 Sep 2013 The "should use as<size_t>" comments are both
45 // incorrect (as() is not a device function) and usually irrelevant
46 // (it would only matter if LocalOrdinal were bigger than size_t on a
47 // particular platform, which is unlikely).
48 
49 // KDD Aug 2020: In the permute/pack/unpack functors,
50 // the Enabled template parameter is specialized in
51 // downstream packages like Stokhos using SFINAE to provide partial
52 // specializations based on the scalar type of the SrcView and DstView
53 // template parameters. See #7898.
54 // Do not remove it before checking with Stokhos and other specializing users.
55 
56 #ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
57 #define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
58 
59 #include "Kokkos_Core.hpp"
60 #include "Kokkos_ArithTraits.hpp"
61 #include "impl/Kokkos_Atomic_Generic.hpp"
62 #include <sstream>
63 #include <stdexcept>
64 
65 namespace Tpetra {
66 namespace KokkosRefactor {
67 namespace Details {
68 
74 namespace Impl {
75 
82 template<class IntegerType,
83  const bool isSigned = std::numeric_limits<IntegerType>::is_signed>
84 struct OutOfBounds {
85  static KOKKOS_INLINE_FUNCTION bool
86  test (const IntegerType x,
87  const IntegerType exclusiveUpperBound);
88 };
89 
90 // Partial specialization for the case where IntegerType IS signed.
91 template<class IntegerType>
92 struct OutOfBounds<IntegerType, true> {
93  static KOKKOS_INLINE_FUNCTION bool
94  test (const IntegerType x,
95  const IntegerType exclusiveUpperBound)
96  {
97  return x < static_cast<IntegerType> (0) || x >= exclusiveUpperBound;
98  }
99 };
100 
101 // Partial specialization for the case where IntegerType is NOT signed.
102 template<class IntegerType>
103 struct OutOfBounds<IntegerType, false> {
104  static KOKKOS_INLINE_FUNCTION bool
105  test (const IntegerType x,
106  const IntegerType exclusiveUpperBound)
107  {
108  return x >= exclusiveUpperBound;
109  }
110 };
111 
114 template<class IntegerType>
115 KOKKOS_INLINE_FUNCTION bool
116 outOfBounds (const IntegerType x, const IntegerType exclusiveUpperBound)
117 {
118  return OutOfBounds<IntegerType>::test (x, exclusiveUpperBound);
119 }
120 
121 } // namespace Impl
122 
123  // Functors for implementing packAndPrepare and unpackAndCombine
124  // through parallel_for
125 
126  template <typename DstView, typename SrcView, typename IdxView,
127  typename Enabled = void>
128  struct PackArraySingleColumn {
129  typedef typename DstView::execution_space execution_space;
130  typedef typename execution_space::size_type size_type;
131 
132  DstView dst;
133  SrcView src;
134  IdxView idx;
135  size_t col;
136 
137  PackArraySingleColumn (const DstView& dst_,
138  const SrcView& src_,
139  const IdxView& idx_,
140  const size_t col_) :
141  dst(dst_), src(src_), idx(idx_), col(col_) {}
142 
143  KOKKOS_INLINE_FUNCTION void
144  operator() (const size_type k) const {
145  dst(k) = src(idx(k), col);
146  }
147 
148  static void
149  pack (const DstView& dst,
150  const SrcView& src,
151  const IdxView& idx,
152  const size_t col)
153  {
154  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
155  Kokkos::parallel_for
156  ("Tpetra::MultiVector pack one col",
157  range_type (0, idx.size ()),
158  PackArraySingleColumn (dst, src, idx, col));
159  }
160  };
161 
162  template <typename DstView,
163  typename SrcView,
164  typename IdxView,
165  typename SizeType = typename DstView::execution_space::size_type,
166  typename Enabled = void>
167  class PackArraySingleColumnWithBoundsCheck {
168  private:
169  static_assert (Kokkos::Impl::is_view<DstView>::value,
170  "DstView must be a Kokkos::View.");
171  static_assert (Kokkos::Impl::is_view<SrcView>::value,
172  "SrcView must be a Kokkos::View.");
173  static_assert (Kokkos::Impl::is_view<IdxView>::value,
174  "IdxView must be a Kokkos::View.");
175  static_assert (static_cast<int> (DstView::rank) == 1,
176  "DstView must be a rank-1 Kokkos::View.");
177  static_assert (static_cast<int> (SrcView::rank) == 2,
178  "SrcView must be a rank-2 Kokkos::View.");
179  static_assert (static_cast<int> (IdxView::rank) == 1,
180  "IdxView must be a rank-1 Kokkos::View.");
181  static_assert (std::is_integral<SizeType>::value,
182  "SizeType must be a built-in integer type.");
183  public:
184  typedef SizeType size_type;
185  using value_type = size_t;
186 
187  private:
188  DstView dst;
189  SrcView src;
190  IdxView idx;
191  size_type col;
192 
193  public:
194  PackArraySingleColumnWithBoundsCheck (const DstView& dst_,
195  const SrcView& src_,
196  const IdxView& idx_,
197  const size_type col_) :
198  dst (dst_), src (src_), idx (idx_), col (col_) {}
199 
200  KOKKOS_INLINE_FUNCTION void
201  operator() (const size_type k, value_type& lclErrCount) const {
202  using index_type = typename IdxView::non_const_value_type;
203 
204  const index_type lclRow = idx(k);
205  if (lclRow < static_cast<index_type> (0) ||
206  lclRow >= static_cast<index_type> (src.extent (0))) {
207  ++lclErrCount;
208  }
209  else {
210  dst(k) = src(lclRow, col);
211  }
212  }
213 
214  KOKKOS_INLINE_FUNCTION
215  void init (value_type& initialErrorCount) const {
216  initialErrorCount = 0;
217  }
218 
219  KOKKOS_INLINE_FUNCTION void
220  join (volatile value_type& dstErrorCount,
221  const volatile value_type& srcErrorCount) const
222  {
223  dstErrorCount += srcErrorCount;
224  }
225 
226  static void
227  pack (const DstView& dst,
228  const SrcView& src,
229  const IdxView& idx,
230  const size_type col)
231  {
232  typedef typename DstView::execution_space execution_space;
233  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
234  typedef typename IdxView::non_const_value_type index_type;
235 
236  size_t errorCount = 0;
237  Kokkos::parallel_reduce
238  ("Tpetra::MultiVector pack one col debug only",
239  range_type (0, idx.size ()),
240  PackArraySingleColumnWithBoundsCheck (dst, src, idx, col),
241  errorCount);
242 
243  if (errorCount != 0) {
244  // Go back and find the out-of-bounds entries in the index
245  // array. Performance doesn't matter since we are already in
246  // an error state, so we can do this sequentially, on host.
247  auto idx_h = Kokkos::create_mirror_view (idx);
248  Kokkos::deep_copy (idx_h, idx);
249 
250  std::vector<index_type> badIndices;
251  const size_type numInds = idx_h.extent (0);
252  for (size_type k = 0; k < numInds; ++k) {
253  if (idx_h(k) < static_cast<index_type> (0) ||
254  idx_h(k) >= static_cast<index_type> (src.extent (0))) {
255  badIndices.push_back (idx_h(k));
256  }
257  }
258 
259  TEUCHOS_TEST_FOR_EXCEPTION
260  (errorCount != badIndices.size (), std::logic_error,
261  "PackArraySingleColumnWithBoundsCheck: errorCount = " << errorCount
262  << " != badIndices.size() = " << badIndices.size () << ". This sho"
263  "uld never happen. Please report this to the Tpetra developers.");
264 
265  std::ostringstream os;
266  os << "MultiVector single-column pack kernel had "
267  << badIndices.size () << " out-of bounds index/ices. "
268  "Here they are: [";
269  for (size_t k = 0; k < badIndices.size (); ++k) {
270  os << badIndices[k];
271  if (k + 1 < badIndices.size ()) {
272  os << ", ";
273  }
274  }
275  os << "].";
276  throw std::runtime_error (os.str ());
277  }
278  }
279  };
280 
281 
282  template <typename DstView, typename SrcView, typename IdxView>
283  void
284  pack_array_single_column (const DstView& dst,
285  const SrcView& src,
286  const IdxView& idx,
287  const size_t col,
288  const bool debug = true)
289  {
290  static_assert (Kokkos::Impl::is_view<DstView>::value,
291  "DstView must be a Kokkos::View.");
292  static_assert (Kokkos::Impl::is_view<SrcView>::value,
293  "SrcView must be a Kokkos::View.");
294  static_assert (Kokkos::Impl::is_view<IdxView>::value,
295  "IdxView must be a Kokkos::View.");
296  static_assert (static_cast<int> (DstView::rank) == 1,
297  "DstView must be a rank-1 Kokkos::View.");
298  static_assert (static_cast<int> (SrcView::rank) == 2,
299  "SrcView must be a rank-2 Kokkos::View.");
300  static_assert (static_cast<int> (IdxView::rank) == 1,
301  "IdxView must be a rank-1 Kokkos::View.");
302 
303  if (debug) {
304  typedef PackArraySingleColumnWithBoundsCheck<DstView,SrcView,IdxView> impl_type;
305  impl_type::pack (dst, src, idx, col);
306  }
307  else {
308  typedef PackArraySingleColumn<DstView,SrcView,IdxView> impl_type;
309  impl_type::pack (dst, src, idx, col);
310  }
311  }
312 
313  template <typename DstView, typename SrcView, typename IdxView,
314  typename Enabled = void>
315  struct PackArrayMultiColumn {
316  typedef typename DstView::execution_space execution_space;
317  typedef typename execution_space::size_type size_type;
318 
319  DstView dst;
320  SrcView src;
321  IdxView idx;
322  size_t numCols;
323 
324  PackArrayMultiColumn (const DstView& dst_,
325  const SrcView& src_,
326  const IdxView& idx_,
327  const size_t numCols_) :
328  dst(dst_), src(src_), idx(idx_), numCols(numCols_) {}
329 
330  KOKKOS_INLINE_FUNCTION void
331  operator() (const size_type k) const {
332  const typename IdxView::value_type localRow = idx(k);
333  const size_t offset = k*numCols;
334  for (size_t j = 0; j < numCols; ++j) {
335  dst(offset + j) = src(localRow, j);
336  }
337  }
338 
339  static void pack(const DstView& dst,
340  const SrcView& src,
341  const IdxView& idx,
342  size_t numCols) {
343  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
344  Kokkos::parallel_for
345  ("Tpetra::MultiVector pack multicol const stride",
346  range_type (0, idx.size ()),
347  PackArrayMultiColumn (dst, src, idx, numCols));
348  }
349  };
350 
351  template <typename DstView,
352  typename SrcView,
353  typename IdxView,
354  typename SizeType = typename DstView::execution_space::size_type,
355  typename Enabled = void>
356  class PackArrayMultiColumnWithBoundsCheck {
357  public:
358  using size_type = SizeType;
359  using value_type = size_t;
360 
361  private:
362  DstView dst;
363  SrcView src;
364  IdxView idx;
365  size_type numCols;
366 
367  public:
368  PackArrayMultiColumnWithBoundsCheck (const DstView& dst_,
369  const SrcView& src_,
370  const IdxView& idx_,
371  const size_type numCols_) :
372  dst (dst_), src (src_), idx (idx_), numCols (numCols_) {}
373 
374  KOKKOS_INLINE_FUNCTION void
375  operator() (const size_type k, value_type& lclErrorCount) const {
376  typedef typename IdxView::non_const_value_type index_type;
377 
378  const index_type lclRow = idx(k);
379  if (lclRow < static_cast<index_type> (0) ||
380  lclRow >= static_cast<index_type> (src.extent (0))) {
381  ++lclErrorCount; // failed
382  }
383  else {
384  const size_type offset = k*numCols;
385  for (size_type j = 0; j < numCols; ++j) {
386  dst(offset + j) = src(lclRow, j);
387  }
388  }
389  }
390 
391  KOKKOS_INLINE_FUNCTION
392  void init (value_type& initialErrorCount) const {
393  initialErrorCount = 0;
394  }
395 
396  KOKKOS_INLINE_FUNCTION void
397  join (volatile value_type& dstErrorCount,
398  const volatile value_type& srcErrorCount) const
399  {
400  dstErrorCount += srcErrorCount;
401  }
402 
403  static void
404  pack (const DstView& dst,
405  const SrcView& src,
406  const IdxView& idx,
407  const size_type numCols)
408  {
409  typedef typename DstView::execution_space execution_space;
410  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
411  typedef typename IdxView::non_const_value_type index_type;
412 
413  size_t errorCount = 0;
414  Kokkos::parallel_reduce
415  ("Tpetra::MultiVector pack multicol const stride debug only",
416  range_type (0, idx.size ()),
417  PackArrayMultiColumnWithBoundsCheck (dst, src, idx, numCols),
418  errorCount);
419  if (errorCount != 0) {
420  // Go back and find the out-of-bounds entries in the index
421  // array. Performance doesn't matter since we are already in
422  // an error state, so we can do this sequentially, on host.
423  auto idx_h = Kokkos::create_mirror_view (idx);
424  Kokkos::deep_copy (idx_h, idx);
425 
426  std::vector<index_type> badIndices;
427  const size_type numInds = idx_h.extent (0);
428  for (size_type k = 0; k < numInds; ++k) {
429  if (idx_h(k) < static_cast<index_type> (0) ||
430  idx_h(k) >= static_cast<index_type> (src.extent (0))) {
431  badIndices.push_back (idx_h(k));
432  }
433  }
434 
435  TEUCHOS_TEST_FOR_EXCEPTION
436  (errorCount != badIndices.size (), std::logic_error,
437  "PackArrayMultiColumnWithBoundsCheck: errorCount = " << errorCount
438  << " != badIndices.size() = " << badIndices.size () << ". This sho"
439  "uld never happen. Please report this to the Tpetra developers.");
440 
441  std::ostringstream os;
442  os << "Tpetra::MultiVector multiple-column pack kernel had "
443  << badIndices.size () << " out-of bounds index/ices (errorCount = "
444  << errorCount << "): [";
445  for (size_t k = 0; k < badIndices.size (); ++k) {
446  os << badIndices[k];
447  if (k + 1 < badIndices.size ()) {
448  os << ", ";
449  }
450  }
451  os << "].";
452  throw std::runtime_error (os.str ());
453  }
454  }
455  };
456 
457 
458  template <typename DstView,
459  typename SrcView,
460  typename IdxView>
461  void
462  pack_array_multi_column (const DstView& dst,
463  const SrcView& src,
464  const IdxView& idx,
465  const size_t numCols,
466  const bool debug = true)
467  {
468  static_assert (Kokkos::Impl::is_view<DstView>::value,
469  "DstView must be a Kokkos::View.");
470  static_assert (Kokkos::Impl::is_view<SrcView>::value,
471  "SrcView must be a Kokkos::View.");
472  static_assert (Kokkos::Impl::is_view<IdxView>::value,
473  "IdxView must be a Kokkos::View.");
474  static_assert (static_cast<int> (DstView::rank) == 1,
475  "DstView must be a rank-1 Kokkos::View.");
476  static_assert (static_cast<int> (SrcView::rank) == 2,
477  "SrcView must be a rank-2 Kokkos::View.");
478  static_assert (static_cast<int> (IdxView::rank) == 1,
479  "IdxView must be a rank-1 Kokkos::View.");
480 
481  if (debug) {
482  typedef PackArrayMultiColumnWithBoundsCheck<DstView,
483  SrcView, IdxView> impl_type;
484  impl_type::pack (dst, src, idx, numCols);
485  }
486  else {
487  typedef PackArrayMultiColumn<DstView, SrcView, IdxView> impl_type;
488  impl_type::pack (dst, src, idx, numCols);
489  }
490  }
491 
492  template <typename DstView, typename SrcView, typename IdxView,
493  typename ColView, typename Enabled = void>
494  struct PackArrayMultiColumnVariableStride {
495  typedef typename DstView::execution_space execution_space;
496  typedef typename execution_space::size_type size_type;
497 
498  DstView dst;
499  SrcView src;
500  IdxView idx;
501  ColView col;
502  size_t numCols;
503 
504  PackArrayMultiColumnVariableStride (const DstView& dst_,
505  const SrcView& src_,
506  const IdxView& idx_,
507  const ColView& col_,
508  const size_t numCols_) :
509  dst(dst_), src(src_), idx(idx_), col(col_), numCols(numCols_) {}
510 
511  KOKKOS_INLINE_FUNCTION
512  void operator() (const size_type k) const {
513  const typename IdxView::value_type localRow = idx(k);
514  const size_t offset = k*numCols;
515  for (size_t j = 0; j < numCols; ++j) {
516  dst(offset + j) = src(localRow, col(j));
517  }
518  }
519 
520  static void pack(const DstView& dst,
521  const SrcView& src,
522  const IdxView& idx,
523  const ColView& col,
524  size_t numCols) {
525  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
526  Kokkos::parallel_for
527  ("Tpetra::MultiVector pack multicol var stride",
528  range_type (0, idx.size ()),
529  PackArrayMultiColumnVariableStride (dst, src, idx, col, numCols));
530  }
531  };
532 
533  template <typename DstView,
534  typename SrcView,
535  typename IdxView,
536  typename ColView,
537  typename SizeType = typename DstView::execution_space::size_type,
538  typename Enabled = void>
539  class PackArrayMultiColumnVariableStrideWithBoundsCheck {
540  public:
541  using size_type = SizeType;
542  using value_type = size_t;
543 
544  private:
545  DstView dst;
546  SrcView src;
547  IdxView idx;
548  ColView col;
549  size_type numCols;
550 
551  public:
552  PackArrayMultiColumnVariableStrideWithBoundsCheck (const DstView& dst_,
553  const SrcView& src_,
554  const IdxView& idx_,
555  const ColView& col_,
556  const size_type numCols_) :
557  dst (dst_), src (src_), idx (idx_), col (col_), numCols (numCols_) {}
558 
559  KOKKOS_INLINE_FUNCTION void
560  operator() (const size_type k, value_type& lclErrorCount) const {
561  typedef typename IdxView::non_const_value_type row_index_type;
562  typedef typename ColView::non_const_value_type col_index_type;
563 
564  const row_index_type lclRow = idx(k);
565  if (lclRow < static_cast<row_index_type> (0) ||
566  lclRow >= static_cast<row_index_type> (src.extent (0))) {
567  ++lclErrorCount = 0;
568  }
569  else {
570  const size_type offset = k*numCols;
571  for (size_type j = 0; j < numCols; ++j) {
572  const col_index_type lclCol = col(j);
573  if (Impl::outOfBounds<col_index_type> (lclCol, src.extent (1))) {
574  ++lclErrorCount = 0;
575  }
576  else { // all indices are valid; do the assignment
577  dst(offset + j) = src(lclRow, lclCol);
578  }
579  }
580  }
581  }
582 
583  KOKKOS_INLINE_FUNCTION void
584  init (value_type& initialErrorCount) const {
585  initialErrorCount = 0;
586  }
587 
588  KOKKOS_INLINE_FUNCTION void
589  join (volatile value_type& dstErrorCount,
590  const volatile value_type& srcErrorCount) const
591  {
592  dstErrorCount += srcErrorCount;
593  }
594 
595  static void
596  pack (const DstView& dst,
597  const SrcView& src,
598  const IdxView& idx,
599  const ColView& col,
600  const size_type numCols)
601  {
602  using execution_space = typename DstView::execution_space;
603  using range_type = Kokkos::RangePolicy<execution_space, size_type>;
604  using row_index_type = typename IdxView::non_const_value_type;
605  using col_index_type = typename ColView::non_const_value_type;
606 
607  size_t errorCount = 0;
608  Kokkos::parallel_reduce
609  ("Tpetra::MultiVector pack multicol var stride debug only",
610  range_type (0, idx.size ()),
611  PackArrayMultiColumnVariableStrideWithBoundsCheck (dst, src, idx,
612  col, numCols),
613  errorCount);
614  if (errorCount != 0) {
615  constexpr size_t maxNumBadIndicesToPrint = 100;
616 
617  std::ostringstream os; // for error reporting
618  os << "Tpetra::MultiVector multicolumn variable stride pack kernel "
619  "found " << errorCount
620  << " error" << (errorCount != size_t (1) ? "s" : "") << ". ";
621 
622  // Go back and find any out-of-bounds entries in the array of
623  // row indices. Performance doesn't matter since we are already
624  // in an error state, so we can do this sequentially, on host.
625  auto idx_h = Kokkos::create_mirror_view (idx);
626  Kokkos::deep_copy (idx_h, idx);
627 
628  std::vector<row_index_type> badRows;
629  const size_type numRowInds = idx_h.extent (0);
630  for (size_type k = 0; k < numRowInds; ++k) {
631  if (Impl::outOfBounds<row_index_type> (idx_h(k), src.extent (0))) {
632  badRows.push_back (idx_h(k));
633  }
634  }
635 
636  if (badRows.size () != 0) {
637  os << badRows.size () << " out-of-bounds row ind"
638  << (badRows.size () != size_t (1) ? "ices" : "ex");
639  if (badRows.size () <= maxNumBadIndicesToPrint) {
640  os << ": [";
641  for (size_t k = 0; k < badRows.size (); ++k) {
642  os << badRows[k];
643  if (k + 1 < badRows.size ()) {
644  os << ", ";
645  }
646  }
647  os << "]. ";
648  }
649  else {
650  os << ". ";
651  }
652  }
653  else {
654  os << "No out-of-bounds row indices. ";
655  }
656 
657  // Go back and find any out-of-bounds entries in the array
658  // of column indices.
659  auto col_h = Kokkos::create_mirror_view (col);
660  Kokkos::deep_copy (col_h, col);
661 
662  std::vector<col_index_type> badCols;
663  const size_type numColInds = col_h.extent (0);
664  for (size_type k = 0; k < numColInds; ++k) {
665  if (Impl::outOfBounds<col_index_type> (col_h(k), src.extent (1))) {
666  badCols.push_back (col_h(k));
667  }
668  }
669 
670  if (badCols.size () != 0) {
671  os << badCols.size () << " out-of-bounds column ind"
672  << (badCols.size () != size_t (1) ? "ices" : "ex");
673  if (badCols.size () <= maxNumBadIndicesToPrint) {
674  os << ": [";
675  for (size_t k = 0; k < badCols.size (); ++k) {
676  os << badCols[k];
677  if (k + 1 < badCols.size ()) {
678  os << ", ";
679  }
680  }
681  os << "]. ";
682  }
683  else {
684  os << ". ";
685  }
686  }
687  else {
688  os << "No out-of-bounds column indices. ";
689  }
690 
691  TEUCHOS_TEST_FOR_EXCEPTION
692  (errorCount != 0 && badRows.size () == 0 && badCols.size () == 0,
693  std::logic_error, "Tpetra::MultiVector variable stride pack "
694  "kernel reports errorCount=" << errorCount << ", but we failed "
695  "to find any bad rows or columns. This should never happen. "
696  "Please report this to the Tpetra developers.");
697 
698  throw std::runtime_error (os.str ());
699  } // hasErr
700  }
701  };
702 
703  template <typename DstView,
704  typename SrcView,
705  typename IdxView,
706  typename ColView>
707  void
708  pack_array_multi_column_variable_stride (const DstView& dst,
709  const SrcView& src,
710  const IdxView& idx,
711  const ColView& col,
712  const size_t numCols,
713  const bool debug = true)
714  {
715  static_assert (Kokkos::Impl::is_view<DstView>::value,
716  "DstView must be a Kokkos::View.");
717  static_assert (Kokkos::Impl::is_view<SrcView>::value,
718  "SrcView must be a Kokkos::View.");
719  static_assert (Kokkos::Impl::is_view<IdxView>::value,
720  "IdxView must be a Kokkos::View.");
721  static_assert (Kokkos::Impl::is_view<ColView>::value,
722  "ColView must be a Kokkos::View.");
723  static_assert (static_cast<int> (DstView::rank) == 1,
724  "DstView must be a rank-1 Kokkos::View.");
725  static_assert (static_cast<int> (SrcView::rank) == 2,
726  "SrcView must be a rank-2 Kokkos::View.");
727  static_assert (static_cast<int> (IdxView::rank) == 1,
728  "IdxView must be a rank-1 Kokkos::View.");
729  static_assert (static_cast<int> (ColView::rank) == 1,
730  "ColView must be a rank-1 Kokkos::View.");
731 
732  if (debug) {
733  typedef PackArrayMultiColumnVariableStrideWithBoundsCheck<DstView,
734  SrcView, IdxView, ColView> impl_type;
735  impl_type::pack (dst, src, idx, col, numCols);
736  }
737  else {
738  typedef PackArrayMultiColumnVariableStride<DstView,
739  SrcView, IdxView, ColView> impl_type;
740  impl_type::pack (dst, src, idx, col, numCols);
741  }
742  }
743 
744  // Tag types to indicate whether to use atomic updates in the
745  // various CombineMode "Op"s.
746  struct atomic_tag {};
747  struct nonatomic_tag {};
748 
749  struct AddOp {
750  template<class SC>
751  KOKKOS_INLINE_FUNCTION
752  void operator() (atomic_tag, SC& dest, const SC& src) const {
753  Kokkos::atomic_add (&dest, src);
754  }
755 
756  template<class SC>
757  KOKKOS_INLINE_FUNCTION
758  void operator() (nonatomic_tag, SC& dest, const SC& src) const {
759  dest += src;
760  }
761  };
762 
763  struct InsertOp {
764  // There's no point to using Kokkos::atomic_assign for the REPLACE
765  // or INSERT CombineModes, since this is not a well-defined
766  // reduction for MultiVector anyway. See GitHub Issue #4417
767  // (which this fixes).
768  template<class SC>
769  KOKKOS_INLINE_FUNCTION
770  void operator() (atomic_tag, SC& dest, const SC& src) const {
771  dest = src;
772  }
773 
774  template<class SC>
775  KOKKOS_INLINE_FUNCTION
776  void operator() (nonatomic_tag, SC& dest, const SC& src) const {
777  dest = src;
778  }
779  };
780 
781  // Kokkos::Impl::atomic_fetch_oper wants a class like this.
782  template<class Scalar1, class Scalar2>
783  struct AbsMaxOper {
784  KOKKOS_INLINE_FUNCTION
785  static Scalar1 apply(const Scalar1& val1, const Scalar2& val2) {
786  const auto val1_abs = Kokkos::ArithTraits<Scalar1>::abs(val1);
787  const auto val2_abs = Kokkos::ArithTraits<Scalar2>::abs(val2);
788  return val1_abs > val2_abs ? Scalar1(val1_abs) : Scalar1(val2_abs);
789  }
790  };
791 
792  struct AbsMaxOp {
793  template <typename SC>
794  KOKKOS_INLINE_FUNCTION
795  void operator() (atomic_tag, SC& dest, const SC& src) const {
796  Kokkos::Impl::atomic_fetch_oper (AbsMaxOper<SC, SC> (), &dest, src);
797  }
798 
799  template <typename SC>
800  KOKKOS_INLINE_FUNCTION
801  void operator() (nonatomic_tag, SC& dest, const SC& src) const {
802  dest = AbsMaxOper<SC, SC> ().apply (dest, src);
803  }
804  };
805 
806  template <typename ExecutionSpace,
807  typename DstView,
808  typename SrcView,
809  typename IdxView,
810  typename Op,
811  typename Enabled = void>
812  class UnpackArrayMultiColumn {
813  private:
814  static_assert (Kokkos::Impl::is_view<DstView>::value,
815  "DstView must be a Kokkos::View.");
816  static_assert (Kokkos::Impl::is_view<SrcView>::value,
817  "SrcView must be a Kokkos::View.");
818  static_assert (Kokkos::Impl::is_view<IdxView>::value,
819  "IdxView must be a Kokkos::View.");
820  static_assert (static_cast<int> (DstView::rank) == 2,
821  "DstView must be a rank-2 Kokkos::View.");
822  static_assert (static_cast<int> (SrcView::rank) == 1,
823  "SrcView must be a rank-1 Kokkos::View.");
824  static_assert (static_cast<int> (IdxView::rank) == 1,
825  "IdxView must be a rank-1 Kokkos::View.");
826 
827  public:
828  typedef typename ExecutionSpace::execution_space execution_space;
829  typedef typename execution_space::size_type size_type;
830 
831  private:
832  DstView dst;
833  SrcView src;
834  IdxView idx;
835  Op op;
836  size_t numCols;
837 
838  public:
839  UnpackArrayMultiColumn (const ExecutionSpace& /* execSpace */,
840  const DstView& dst_,
841  const SrcView& src_,
842  const IdxView& idx_,
843  const Op& op_,
844  const size_t numCols_) :
845  dst (dst_),
846  src (src_),
847  idx (idx_),
848  op (op_),
849  numCols (numCols_)
850  {}
851 
852  template<class TagType>
853  KOKKOS_INLINE_FUNCTION void
854  operator() (TagType tag, const size_type k) const
855  {
856  static_assert
857  (std::is_same<TagType, atomic_tag>::value ||
858  std::is_same<TagType, nonatomic_tag>::value,
859  "TagType must be atomic_tag or nonatomic_tag.");
860 
861  const typename IdxView::value_type localRow = idx(k);
862  const size_t offset = k*numCols;
863  for (size_t j = 0; j < numCols; ++j) {
864  op (tag, dst(localRow, j), src(offset+j));
865  }
866  }
867 
868  static void
869  unpack (const ExecutionSpace& execSpace,
870  const DstView& dst,
871  const SrcView& src,
872  const IdxView& idx,
873  const Op& op,
874  const size_t numCols,
875  const bool use_atomic_updates)
876  {
877  if (use_atomic_updates) {
878  using range_type =
879  Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
880  Kokkos::parallel_for
881  ("Tpetra::MultiVector unpack const stride atomic",
882  range_type (0, idx.size ()),
883  UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
884  }
885  else {
886  using range_type =
887  Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
888  Kokkos::parallel_for
889  ("Tpetra::MultiVector unpack const stride nonatomic",
890  range_type (0, idx.size ()),
891  UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
892  }
893  }
894  };
895 
896  template <typename ExecutionSpace,
897  typename DstView,
898  typename SrcView,
899  typename IdxView,
900  typename Op,
901  typename SizeType = typename ExecutionSpace::execution_space::size_type,
902  typename Enabled = void>
903  class UnpackArrayMultiColumnWithBoundsCheck {
904  private:
905  static_assert (Kokkos::Impl::is_view<DstView>::value,
906  "DstView must be a Kokkos::View.");
907  static_assert (Kokkos::Impl::is_view<SrcView>::value,
908  "SrcView must be a Kokkos::View.");
909  static_assert (Kokkos::Impl::is_view<IdxView>::value,
910  "IdxView must be a Kokkos::View.");
911  static_assert (static_cast<int> (DstView::rank) == 2,
912  "DstView must be a rank-2 Kokkos::View.");
913  static_assert (static_cast<int> (SrcView::rank) == 1,
914  "SrcView must be a rank-1 Kokkos::View.");
915  static_assert (static_cast<int> (IdxView::rank) == 1,
916  "IdxView must be a rank-1 Kokkos::View.");
917  static_assert (std::is_integral<SizeType>::value,
918  "SizeType must be a built-in integer type.");
919 
920  public:
921  using execution_space = typename ExecutionSpace::execution_space;
922  using size_type = SizeType;
923  using value_type = size_t;
924 
925  private:
926  DstView dst;
927  SrcView src;
928  IdxView idx;
929  Op op;
930  size_type numCols;
931 
932  public:
933  UnpackArrayMultiColumnWithBoundsCheck (const ExecutionSpace& /* execSpace */,
934  const DstView& dst_,
935  const SrcView& src_,
936  const IdxView& idx_,
937  const Op& op_,
938  const size_type numCols_) :
939  dst (dst_),
940  src (src_),
941  idx (idx_),
942  op (op_),
943  numCols (numCols_)
944  {}
945 
946  template<class TagType>
947  KOKKOS_INLINE_FUNCTION void
948  operator() (TagType tag,
949  const size_type k,
950  size_t& lclErrCount) const
951  {
952  static_assert
953  (std::is_same<TagType, atomic_tag>::value ||
954  std::is_same<TagType, nonatomic_tag>::value,
955  "TagType must be atomic_tag or nonatomic_tag.");
956  using index_type = typename IdxView::non_const_value_type;
957 
958  const index_type lclRow = idx(k);
959  if (lclRow < static_cast<index_type> (0) ||
960  lclRow >= static_cast<index_type> (dst.extent (0))) {
961  ++lclErrCount;
962  }
963  else {
964  const size_type offset = k*numCols;
965  for (size_type j = 0; j < numCols; ++j) {
966  op (tag, dst(lclRow,j), src(offset+j));
967  }
968  }
969  }
970 
971  template<class TagType>
972  KOKKOS_INLINE_FUNCTION void
973  init (TagType, size_t& initialErrorCount) const {
974  initialErrorCount = 0;
975  }
976 
977  template<class TagType>
978  KOKKOS_INLINE_FUNCTION void
979  join (TagType,
980  volatile size_t& dstErrorCount,
981  const volatile size_t& srcErrorCount) const
982  {
983  dstErrorCount += srcErrorCount;
984  }
985 
986  static void
987  unpack (const ExecutionSpace& execSpace,
988  const DstView& dst,
989  const SrcView& src,
990  const IdxView& idx,
991  const Op& op,
992  const size_type numCols,
993  const bool use_atomic_updates)
994  {
995  using index_type = typename IdxView::non_const_value_type;
996 
997  size_t errorCount = 0;
998  if (use_atomic_updates) {
999  using range_type =
1000  Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1001  Kokkos::parallel_reduce
1002  ("Tpetra::MultiVector unpack multicol const stride atomic debug only",
1003  range_type (0, idx.size ()),
1004  UnpackArrayMultiColumnWithBoundsCheck (execSpace, dst, src,
1005  idx, op, numCols),
1006  errorCount);
1007  }
1008  else {
1009  using range_type =
1010  Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1011  Kokkos::parallel_reduce
1012  ("Tpetra::MultiVector unpack multicol const stride nonatomic debug only",
1013  range_type (0, idx.size ()),
1014  UnpackArrayMultiColumnWithBoundsCheck (execSpace, dst, src,
1015  idx, op, numCols),
1016  errorCount);
1017  }
1018 
1019  if (errorCount != 0) {
1020  // Go back and find the out-of-bounds entries in the index
1021  // array. Performance doesn't matter since we are already in
1022  // an error state, so we can do this sequentially, on host.
1023  auto idx_h = Kokkos::create_mirror_view (idx);
1024  Kokkos::deep_copy (idx_h, idx);
1025 
1026  std::vector<index_type> badIndices;
1027  const size_type numInds = idx_h.extent (0);
1028  for (size_type k = 0; k < numInds; ++k) {
1029  if (idx_h(k) < static_cast<index_type> (0) ||
1030  idx_h(k) >= static_cast<index_type> (dst.extent (0))) {
1031  badIndices.push_back (idx_h(k));
1032  }
1033  }
1034 
1035  if (errorCount != badIndices.size ()) {
1036  std::ostringstream os;
1037  os << "MultiVector unpack kernel: errorCount = " << errorCount
1038  << " != badIndices.size() = " << badIndices.size ()
1039  << ". This should never happen. "
1040  "Please report this to the Tpetra developers.";
1041  throw std::logic_error (os.str ());
1042  }
1043 
1044  std::ostringstream os;
1045  os << "MultiVector unpack kernel had " << badIndices.size ()
1046  << " out-of bounds index/ices. Here they are: [";
1047  for (size_t k = 0; k < badIndices.size (); ++k) {
1048  os << badIndices[k];
1049  if (k + 1 < badIndices.size ()) {
1050  os << ", ";
1051  }
1052  }
1053  os << "].";
1054  throw std::runtime_error (os.str ());
1055  }
1056  }
1057  };
1058 
1059  template <typename ExecutionSpace,
1060  typename DstView,
1061  typename SrcView,
1062  typename IdxView,
1063  typename Op>
1064  void
1065  unpack_array_multi_column (const ExecutionSpace& execSpace,
1066  const DstView& dst,
1067  const SrcView& src,
1068  const IdxView& idx,
1069  const Op& op,
1070  const size_t numCols,
1071  const bool use_atomic_updates,
1072  const bool debug)
1073  {
1074  static_assert (Kokkos::Impl::is_view<DstView>::value,
1075  "DstView must be a Kokkos::View.");
1076  static_assert (Kokkos::Impl::is_view<SrcView>::value,
1077  "SrcView must be a Kokkos::View.");
1078  static_assert (Kokkos::Impl::is_view<IdxView>::value,
1079  "IdxView must be a Kokkos::View.");
1080  static_assert (static_cast<int> (DstView::rank) == 2,
1081  "DstView must be a rank-2 Kokkos::View.");
1082  static_assert (static_cast<int> (SrcView::rank) == 1,
1083  "SrcView must be a rank-1 Kokkos::View.");
1084  static_assert (static_cast<int> (IdxView::rank) == 1,
1085  "IdxView must be a rank-1 Kokkos::View.");
1086 
1087  if (debug) {
1088  typedef UnpackArrayMultiColumnWithBoundsCheck<ExecutionSpace,
1089  DstView, SrcView, IdxView, Op> impl_type;
1090  impl_type::unpack (execSpace, dst, src, idx, op, numCols,
1091  use_atomic_updates);
1092  }
1093  else {
1094  typedef UnpackArrayMultiColumn<ExecutionSpace,
1095  DstView, SrcView, IdxView, Op> impl_type;
1096  impl_type::unpack (execSpace, dst, src, idx, op, numCols,
1097  use_atomic_updates);
1098  }
1099  }
1100 
1101  template <typename ExecutionSpace,
1102  typename DstView,
1103  typename SrcView,
1104  typename IdxView,
1105  typename ColView,
1106  typename Op,
1107  typename Enabled = void>
1108  class UnpackArrayMultiColumnVariableStride {
1109  private:
1110  static_assert (Kokkos::Impl::is_view<DstView>::value,
1111  "DstView must be a Kokkos::View.");
1112  static_assert (Kokkos::Impl::is_view<SrcView>::value,
1113  "SrcView must be a Kokkos::View.");
1114  static_assert (Kokkos::Impl::is_view<IdxView>::value,
1115  "IdxView must be a Kokkos::View.");
1116  static_assert (Kokkos::Impl::is_view<ColView>::value,
1117  "ColView must be a Kokkos::View.");
1118  static_assert (static_cast<int> (DstView::rank) == 2,
1119  "DstView must be a rank-2 Kokkos::View.");
1120  static_assert (static_cast<int> (SrcView::rank) == 1,
1121  "SrcView must be a rank-1 Kokkos::View.");
1122  static_assert (static_cast<int> (IdxView::rank) == 1,
1123  "IdxView must be a rank-1 Kokkos::View.");
1124  static_assert (static_cast<int> (ColView::rank) == 1,
1125  "ColView must be a rank-1 Kokkos::View.");
1126 
1127  public:
1128  using execution_space = typename ExecutionSpace::execution_space;
1129  using size_type = typename execution_space::size_type;
1130 
1131  private:
1132  DstView dst;
1133  SrcView src;
1134  IdxView idx;
1135  ColView col;
1136  Op op;
1137  size_t numCols;
1138 
1139  public:
1140  UnpackArrayMultiColumnVariableStride (const ExecutionSpace& /* execSpace */,
1141  const DstView& dst_,
1142  const SrcView& src_,
1143  const IdxView& idx_,
1144  const ColView& col_,
1145  const Op& op_,
1146  const size_t numCols_) :
1147  dst (dst_),
1148  src (src_),
1149  idx (idx_),
1150  col (col_),
1151  op (op_),
1152  numCols (numCols_)
1153  {}
1154 
1155  template<class TagType>
1156  KOKKOS_INLINE_FUNCTION void
1157  operator() (TagType tag, const size_type k) const
1158  {
1159  static_assert
1160  (std::is_same<TagType, atomic_tag>::value ||
1161  std::is_same<TagType, nonatomic_tag>::value,
1162  "TagType must be atomic_tag or nonatomic_tag.");
1163 
1164  const typename IdxView::value_type localRow = idx(k);
1165  const size_t offset = k*numCols;
1166  for (size_t j = 0; j < numCols; ++j) {
1167  op (tag, dst(localRow, col(j)), src(offset+j));
1168  }
1169  }
1170 
1171  static void
1172  unpack (const ExecutionSpace& execSpace,
1173  const DstView& dst,
1174  const SrcView& src,
1175  const IdxView& idx,
1176  const ColView& col,
1177  const Op& op,
1178  const size_t numCols,
1179  const bool use_atomic_updates)
1180  {
1181  if (use_atomic_updates) {
1182  using range_type =
1183  Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1184  Kokkos::parallel_for
1185  ("Tpetra::MultiVector unpack var stride atomic",
1186  range_type (0, idx.size ()),
1187  UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
1188  idx, col, op, numCols));
1189  }
1190  else {
1191  using range_type =
1192  Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1193  Kokkos::parallel_for
1194  ("Tpetra::MultiVector unpack var stride nonatomic",
1195  range_type (0, idx.size ()),
1196  UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
1197  idx, col, op, numCols));
1198  }
1199  }
1200  };
1201 
1202  template <typename ExecutionSpace,
1203  typename DstView,
1204  typename SrcView,
1205  typename IdxView,
1206  typename ColView,
1207  typename Op,
1208  typename SizeType = typename ExecutionSpace::execution_space::size_type,
1209  typename Enabled = void>
1210  class UnpackArrayMultiColumnVariableStrideWithBoundsCheck {
1211  private:
1212  static_assert (Kokkos::Impl::is_view<DstView>::value,
1213  "DstView must be a Kokkos::View.");
1214  static_assert (Kokkos::Impl::is_view<SrcView>::value,
1215  "SrcView must be a Kokkos::View.");
1216  static_assert (Kokkos::Impl::is_view<IdxView>::value,
1217  "IdxView must be a Kokkos::View.");
1218  static_assert (Kokkos::Impl::is_view<ColView>::value,
1219  "ColView must be a Kokkos::View.");
1220  static_assert (static_cast<int> (DstView::rank) == 2,
1221  "DstView must be a rank-2 Kokkos::View.");
1222  static_assert (static_cast<int> (SrcView::rank) == 1,
1223  "SrcView must be a rank-1 Kokkos::View.");
1224  static_assert (static_cast<int> (IdxView::rank) == 1,
1225  "IdxView must be a rank-1 Kokkos::View.");
1226  static_assert (static_cast<int> (ColView::rank) == 1,
1227  "ColView must be a rank-1 Kokkos::View.");
1228  static_assert (std::is_integral<SizeType>::value,
1229  "SizeType must be a built-in integer type.");
1230 
1231  public:
1232  using execution_space = typename ExecutionSpace::execution_space;
1233  using size_type = SizeType;
1234  using value_type = size_t;
1235 
1236  private:
1237  DstView dst;
1238  SrcView src;
1239  IdxView idx;
1240  ColView col;
1241  Op op;
1242  size_type numCols;
1243 
1244  public:
1245  UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1246  (const ExecutionSpace& /* execSpace */,
1247  const DstView& dst_,
1248  const SrcView& src_,
1249  const IdxView& idx_,
1250  const ColView& col_,
1251  const Op& op_,
1252  const size_t numCols_) :
1253  dst (dst_),
1254  src (src_),
1255  idx (idx_),
1256  col (col_),
1257  op (op_),
1258  numCols (numCols_)
1259  {}
1260 
1261  template<class TagType>
1262  KOKKOS_INLINE_FUNCTION void
1263  operator() (TagType tag,
1264  const size_type k,
1265  value_type& lclErrorCount) const
1266  {
1267  static_assert
1268  (std::is_same<TagType, atomic_tag>::value ||
1269  std::is_same<TagType, nonatomic_tag>::value,
1270  "TagType must be atomic_tag or nonatomic_tag.");
1271  using row_index_type = typename IdxView::non_const_value_type;
1272  using col_index_type = typename ColView::non_const_value_type;
1273 
1274  const row_index_type lclRow = idx(k);
1275  if (lclRow < static_cast<row_index_type> (0) ||
1276  lclRow >= static_cast<row_index_type> (dst.extent (0))) {
1277  ++lclErrorCount;
1278  }
1279  else {
1280  const size_type offset = k * numCols;
1281  for (size_type j = 0; j < numCols; ++j) {
1282  const col_index_type lclCol = col(j);
1283  if (Impl::outOfBounds<col_index_type> (lclCol, dst.extent (1))) {
1284  ++lclErrorCount;
1285  }
1286  else { // all indices are valid; apply the op
1287  op (tag, dst(lclRow, col(j)), src(offset+j));
1288  }
1289  }
1290  }
1291  }
1292 
1293  KOKKOS_INLINE_FUNCTION void
1294  init (value_type& initialErrorCount) const {
1295  initialErrorCount = 0;
1296  }
1297 
1298  KOKKOS_INLINE_FUNCTION void
1299  join (volatile value_type& dstErrorCount,
1300  const volatile value_type& srcErrorCount) const
1301  {
1302  dstErrorCount += srcErrorCount;
1303  }
1304 
1305  static void
1306  unpack (const ExecutionSpace& execSpace,
1307  const DstView& dst,
1308  const SrcView& src,
1309  const IdxView& idx,
1310  const ColView& col,
1311  const Op& op,
1312  const size_type numCols,
1313  const bool use_atomic_updates)
1314  {
1315  using row_index_type = typename IdxView::non_const_value_type;
1316  using col_index_type = typename ColView::non_const_value_type;
1317 
1318  size_t errorCount = 0;
1319  if (use_atomic_updates) {
1320  using range_type =
1321  Kokkos::RangePolicy<atomic_tag, execution_space, size_type>;
1322  Kokkos::parallel_reduce
1323  ("Tpetra::MultiVector unpack var stride atomic debug only",
1324  range_type (0, idx.size ()),
1325  UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1326  (execSpace, dst, src, idx, col, op, numCols),
1327  errorCount);
1328  }
1329  else {
1330  using range_type =
1331  Kokkos::RangePolicy<nonatomic_tag, execution_space, size_type>;
1332  Kokkos::parallel_reduce
1333  ("Tpetra::MultiVector unpack var stride nonatomic debug only",
1334  range_type (0, idx.size ()),
1335  UnpackArrayMultiColumnVariableStrideWithBoundsCheck
1336  (execSpace, dst, src, idx, col, op, numCols),
1337  errorCount);
1338  }
1339 
1340  if (errorCount != 0) {
1341  constexpr size_t maxNumBadIndicesToPrint = 100;
1342 
1343  std::ostringstream os; // for error reporting
1344  os << "Tpetra::MultiVector multicolumn variable stride unpack kernel "
1345  "found " << errorCount
1346  << " error" << (errorCount != size_t (1) ? "s" : "") << ". ";
1347 
1348  // Go back and find any out-of-bounds entries in the array of
1349  // row indices. Performance doesn't matter since we are
1350  // already in an error state, so we can do this sequentially,
1351  // on host.
1352  auto idx_h = Kokkos::create_mirror_view (idx);
1353  Kokkos::deep_copy (idx_h, idx);
1354 
1355  std::vector<row_index_type> badRows;
1356  const size_type numRowInds = idx_h.extent (0);
1357  for (size_type k = 0; k < numRowInds; ++k) {
1358  if (idx_h(k) < static_cast<row_index_type> (0) ||
1359  idx_h(k) >= static_cast<row_index_type> (dst.extent (0))) {
1360  badRows.push_back (idx_h(k));
1361  }
1362  }
1363 
1364  if (badRows.size () != 0) {
1365  os << badRows.size () << " out-of-bounds row ind"
1366  << (badRows.size () != size_t (1) ? "ices" : "ex");
1367  if (badRows.size () <= maxNumBadIndicesToPrint) {
1368  os << ": [";
1369  for (size_t k = 0; k < badRows.size (); ++k) {
1370  os << badRows[k];
1371  if (k + 1 < badRows.size ()) {
1372  os << ", ";
1373  }
1374  }
1375  os << "]. ";
1376  }
1377  else {
1378  os << ". ";
1379  }
1380  }
1381  else {
1382  os << "No out-of-bounds row indices. ";
1383  }
1384 
1385  // Go back and find any out-of-bounds entries in the array
1386  // of column indices.
1387  auto col_h = Kokkos::create_mirror_view (col);
1388  Kokkos::deep_copy (col_h, col);
1389 
1390  std::vector<col_index_type> badCols;
1391  const size_type numColInds = col_h.extent (0);
1392  for (size_type k = 0; k < numColInds; ++k) {
1393  if (Impl::outOfBounds<col_index_type> (col_h(k), dst.extent (1))) {
1394  badCols.push_back (col_h(k));
1395  }
1396  }
1397 
1398  if (badCols.size () != 0) {
1399  os << badCols.size () << " out-of-bounds column ind"
1400  << (badCols.size () != size_t (1) ? "ices" : "ex");
1401  if (badCols.size () <= maxNumBadIndicesToPrint) {
1402  for (size_t k = 0; k < badCols.size (); ++k) {
1403  os << ": [";
1404  os << badCols[k];
1405  if (k + 1 < badCols.size ()) {
1406  os << ", ";
1407  }
1408  }
1409  os << "]. ";
1410  }
1411  else {
1412  os << ". ";
1413  }
1414  }
1415  else {
1416  os << "No out-of-bounds column indices. ";
1417  }
1418 
1419  TEUCHOS_TEST_FOR_EXCEPTION
1420  (errorCount != 0 && badRows.size () == 0 && badCols.size () == 0,
1421  std::logic_error, "Tpetra::MultiVector variable stride unpack "
1422  "kernel reports errorCount=" << errorCount << ", but we failed "
1423  "to find any bad rows or columns. This should never happen. "
1424  "Please report this to the Tpetra developers.");
1425 
1426  throw std::runtime_error (os.str ());
1427  } // hasErr
1428  }
1429  };
1430 
1431  template <typename ExecutionSpace,
1432  typename DstView,
1433  typename SrcView,
1434  typename IdxView,
1435  typename ColView,
1436  typename Op>
1437  void
1438  unpack_array_multi_column_variable_stride (const ExecutionSpace& execSpace,
1439  const DstView& dst,
1440  const SrcView& src,
1441  const IdxView& idx,
1442  const ColView& col,
1443  const Op& op,
1444  const size_t numCols,
1445  const bool use_atomic_updates,
1446  const bool debug)
1447  {
1448  static_assert (Kokkos::Impl::is_view<DstView>::value,
1449  "DstView must be a Kokkos::View.");
1450  static_assert (Kokkos::Impl::is_view<SrcView>::value,
1451  "SrcView must be a Kokkos::View.");
1452  static_assert (Kokkos::Impl::is_view<IdxView>::value,
1453  "IdxView must be a Kokkos::View.");
1454  static_assert (Kokkos::Impl::is_view<ColView>::value,
1455  "ColView must be a Kokkos::View.");
1456  static_assert (static_cast<int> (DstView::rank) == 2,
1457  "DstView must be a rank-2 Kokkos::View.");
1458  static_assert (static_cast<int> (SrcView::rank) == 1,
1459  "SrcView must be a rank-1 Kokkos::View.");
1460  static_assert (static_cast<int> (IdxView::rank) == 1,
1461  "IdxView must be a rank-1 Kokkos::View.");
1462  static_assert (static_cast<int> (ColView::rank) == 1,
1463  "ColView must be a rank-1 Kokkos::View.");
1464 
1465  if (debug) {
1466  using impl_type =
1467  UnpackArrayMultiColumnVariableStrideWithBoundsCheck<ExecutionSpace,
1468  DstView, SrcView, IdxView, ColView, Op>;
1469  impl_type::unpack (execSpace, dst, src, idx, col, op, numCols,
1470  use_atomic_updates);
1471  }
1472  else {
1473  using impl_type = UnpackArrayMultiColumnVariableStride<ExecutionSpace,
1474  DstView, SrcView, IdxView, ColView, Op>;
1475  impl_type::unpack (execSpace, dst, src, idx, col, op, numCols,
1476  use_atomic_updates);
1477  }
1478  }
1479 
1480  template <typename DstView, typename SrcView,
1481  typename DstIdxView, typename SrcIdxView, typename Op,
1482  typename Enabled = void>
1483  struct PermuteArrayMultiColumn {
1484  typedef typename DstView::execution_space execution_space;
1485  typedef typename execution_space::size_type size_type;
1486 
1487  DstView dst;
1488  SrcView src;
1489  DstIdxView dst_idx;
1490  SrcIdxView src_idx;
1491  size_t numCols;
1492  Op op;
1493 
1494  PermuteArrayMultiColumn (const DstView& dst_,
1495  const SrcView& src_,
1496  const DstIdxView& dst_idx_,
1497  const SrcIdxView& src_idx_,
1498  const size_t numCols_,
1499  const Op& op_) :
1500  dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1501  numCols(numCols_), op(op_) {}
1502 
1503  KOKKOS_INLINE_FUNCTION void
1504  operator() (const size_type k) const {
1505  const typename DstIdxView::value_type toRow = dst_idx(k);
1506  const typename SrcIdxView::value_type fromRow = src_idx(k);
1507  nonatomic_tag tag; // permute does not need atomics
1508  for (size_t j = 0; j < numCols; ++j) {
1509  op(tag, dst(toRow, j), src(fromRow, j));
1510  }
1511  }
1512 
1513  static void
1514  permute (const DstView& dst,
1515  const SrcView& src,
1516  const DstIdxView& dst_idx,
1517  const SrcIdxView& src_idx,
1518  const size_t numCols,
1519  const Op& op)
1520  {
1521  using range_type =
1522  Kokkos::RangePolicy<execution_space, size_type>;
1523  // permute does not need atomics for Op
1524  const size_type n = std::min (dst_idx.size (), src_idx.size ());
1525  Kokkos::parallel_for
1526  ("Tpetra::MultiVector permute multicol const stride",
1527  range_type (0, n),
1528  PermuteArrayMultiColumn (dst, src, dst_idx, src_idx, numCols, op));
1529  }
1530  };
1531 
1532  // To do: Add enable_if<> restrictions on DstView::Rank == 1,
1533  // SrcView::Rank == 2
1534  template <typename DstView, typename SrcView,
1535  typename DstIdxView, typename SrcIdxView, typename Op>
1536  void permute_array_multi_column(const DstView& dst,
1537  const SrcView& src,
1538  const DstIdxView& dst_idx,
1539  const SrcIdxView& src_idx,
1540  size_t numCols,
1541  const Op& op) {
1542  PermuteArrayMultiColumn<DstView,SrcView,DstIdxView,SrcIdxView,Op>::permute(
1543  dst, src, dst_idx, src_idx, numCols, op);
1544  }
1545 
1546  template <typename DstView, typename SrcView,
1547  typename DstIdxView, typename SrcIdxView,
1548  typename DstColView, typename SrcColView, typename Op,
1549  typename Enabled = void>
1550  struct PermuteArrayMultiColumnVariableStride {
1551  typedef typename DstView::execution_space execution_space;
1552  typedef typename execution_space::size_type size_type;
1553 
1554  DstView dst;
1555  SrcView src;
1556  DstIdxView dst_idx;
1557  SrcIdxView src_idx;
1558  DstColView dst_col;
1559  SrcColView src_col;
1560  size_t numCols;
1561  Op op;
1562 
1563  PermuteArrayMultiColumnVariableStride(const DstView& dst_,
1564  const SrcView& src_,
1565  const DstIdxView& dst_idx_,
1566  const SrcIdxView& src_idx_,
1567  const DstColView& dst_col_,
1568  const SrcColView& src_col_,
1569  const size_t numCols_,
1570  const Op& op_) :
1571  dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1572  dst_col(dst_col_), src_col(src_col_),
1573  numCols(numCols_), op(op_) {}
1574 
1575  KOKKOS_INLINE_FUNCTION void
1576  operator() (const size_type k) const {
1577  const typename DstIdxView::value_type toRow = dst_idx(k);
1578  const typename SrcIdxView::value_type fromRow = src_idx(k);
1579  const nonatomic_tag tag; // permute does not need atomics
1580  for (size_t j = 0; j < numCols; ++j) {
1581  op(tag, dst(toRow, dst_col(j)), src(fromRow, src_col(j)));
1582  }
1583  }
1584 
1585  static void
1586  permute (const DstView& dst,
1587  const SrcView& src,
1588  const DstIdxView& dst_idx,
1589  const SrcIdxView& src_idx,
1590  const DstColView& dst_col,
1591  const SrcColView& src_col,
1592  const size_t numCols,
1593  const Op& op)
1594  {
1595  using range_type = Kokkos::RangePolicy<execution_space, size_type>;
1596  const size_type n = std::min (dst_idx.size (), src_idx.size ());
1597  Kokkos::parallel_for
1598  ("Tpetra::MultiVector permute multicol var stride",
1599  range_type (0, n),
1600  PermuteArrayMultiColumnVariableStride (dst, src, dst_idx, src_idx,
1601  dst_col, src_col, numCols, op));
1602  }
1603  };
1604 
1605  // To do: Add enable_if<> restrictions on DstView::Rank == 1,
1606  // SrcView::Rank == 2
1607  template <typename DstView, typename SrcView,
1608  typename DstIdxView, typename SrcIdxView,
1609  typename DstColView, typename SrcColView, typename Op>
1610  void permute_array_multi_column_variable_stride(const DstView& dst,
1611  const SrcView& src,
1612  const DstIdxView& dst_idx,
1613  const SrcIdxView& src_idx,
1614  const DstColView& dst_col,
1615  const SrcColView& src_col,
1616  size_t numCols,
1617  const Op& op) {
1618  PermuteArrayMultiColumnVariableStride<DstView,SrcView,
1619  DstIdxView,SrcIdxView,DstColView,SrcColView,Op>::permute(
1620  dst, src, dst_idx, src_idx, dst_col, src_col, numCols, op);
1621  }
1622 
1623 } // Details namespace
1624 } // KokkosRefactor namespace
1625 } // Tpetra namespace
1626 
1627 #endif // TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
Implementation details of Tpetra.
KOKKOS_INLINE_FUNCTION bool outOfBounds(const IntegerType x, const IntegerType exclusiveUpperBound)
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...