Tpetra parallel linear algebra  Version of the Day
Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp
1 /*
2 // @HEADER
3 // ***********************************************************************
4 //
5 // Tpetra: Templated Linear Algebra Services Package
6 // Copyright (2008) Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ************************************************************************
41 // @HEADER
42 */
43 
44 // mfh 13/14 Sep 2013 The "should use as<size_t>" comments are both
45 // incorrect (as() is not a device function) and usually irrelevant
46 // (it would only matter if LocalOrdinal were bigger than size_t on a
47 // particular platform, which is unlikely).
48 
49 #ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
50 #define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
51 
52 #include "Kokkos_Core.hpp"
53 #include "Kokkos_ArithTraits.hpp"
54 #include <sstream>
55 #include <stdexcept>
56 
57 namespace Tpetra {
58 namespace KokkosRefactor {
59 namespace Details {
60 
66 namespace Impl {
67 
74 template<class IntegerType,
75  const bool isSigned = std::numeric_limits<IntegerType>::is_signed>
76 struct OutOfBounds {
77  static KOKKOS_INLINE_FUNCTION bool
78  test (const IntegerType x,
79  const IntegerType exclusiveUpperBound);
80 };
81 
82 // Partial specialization for the case where IntegerType IS signed.
83 template<class IntegerType>
84 struct OutOfBounds<IntegerType, true> {
85  static KOKKOS_INLINE_FUNCTION bool
86  test (const IntegerType x,
87  const IntegerType exclusiveUpperBound)
88  {
89  return x < static_cast<IntegerType> (0) || x >= exclusiveUpperBound;
90  }
91 };
92 
93 // Partial specialization for the case where IntegerType is NOT signed.
94 template<class IntegerType>
95 struct OutOfBounds<IntegerType, false> {
96  static KOKKOS_INLINE_FUNCTION bool
97  test (const IntegerType x,
98  const IntegerType exclusiveUpperBound)
99  {
100  return x >= exclusiveUpperBound;
101  }
102 };
103 
106 template<class IntegerType>
107 KOKKOS_INLINE_FUNCTION bool
108 outOfBounds (const IntegerType x, const IntegerType exclusiveUpperBound)
109 {
110  return OutOfBounds<IntegerType>::test (x, exclusiveUpperBound);
111 }
112 
113 } // namespace Impl
114 
115  // Functors for implementing packAndPrepare and unpackAndCombine
116  // through parallel_for
117 
118  template <typename DstView, typename SrcView, typename IdxView>
119  struct PackArraySingleColumn {
120  typedef typename DstView::execution_space execution_space;
121  typedef typename execution_space::size_type size_type;
122 
123  DstView dst;
124  SrcView src;
125  IdxView idx;
126  size_t col;
127 
128  PackArraySingleColumn(const DstView& dst_,
129  const SrcView& src_,
130  const IdxView& idx_,
131  size_t col_) :
132  dst(dst_), src(src_), idx(idx_), col(col_) {}
133 
134  KOKKOS_INLINE_FUNCTION
135  void operator()( const size_type k ) const {
136  dst(k) = src(idx(k), col);
137  }
138 
139  static void
140  pack (const DstView& dst,
141  const SrcView& src,
142  const IdxView& idx,
143  size_t col)
144  {
145  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
146  Kokkos::parallel_for (range_type (0, idx.size ()),
147  PackArraySingleColumn (dst,src,idx,col));
148  }
149  };
150 
151  template <typename DstView,
152  typename SrcView,
153  typename IdxView,
154  typename SizeType = typename DstView::execution_space::size_type>
155  class PackArraySingleColumnWithBoundsCheck {
156  private:
157  static_assert (Kokkos::Impl::is_view<DstView>::value,
158  "DstView must be a Kokkos::View.");
159  static_assert (Kokkos::Impl::is_view<SrcView>::value,
160  "SrcView must be a Kokkos::View.");
161  static_assert (Kokkos::Impl::is_view<IdxView>::value,
162  "IdxView must be a Kokkos::View.");
163  static_assert (static_cast<int> (DstView::rank) == 1,
164  "DstView must be a rank-1 Kokkos::View.");
165  static_assert (static_cast<int> (SrcView::rank) == 2,
166  "SrcView must be a rank-2 Kokkos::View.");
167  static_assert (static_cast<int> (IdxView::rank) == 1,
168  "IdxView must be a rank-1 Kokkos::View.");
169  static_assert (std::is_integral<SizeType>::value,
170  "SizeType must be a built-in integer type.");
171  public:
172  typedef SizeType size_type;
174  typedef int value_type;
175 
176  private:
177  DstView dst;
178  SrcView src;
179  IdxView idx;
180  size_type col;
181 
182  public:
183  PackArraySingleColumnWithBoundsCheck (const DstView& dst_,
184  const SrcView& src_,
185  const IdxView& idx_,
186  const size_type col_) :
187  dst (dst_), src (src_), idx (idx_), col (col_) {}
188 
189  KOKKOS_INLINE_FUNCTION void
190  operator() (const size_type& k, value_type& result) const {
191  typedef typename IdxView::non_const_value_type index_type;
192 
193  const index_type lclRow = idx(k);
194  if (lclRow < static_cast<index_type> (0) ||
195  lclRow >= static_cast<index_type> (src.dimension_0 ())) {
196  result = 0; // failed!
197  }
198  else {
199  dst(k) = src(lclRow, col);
200  }
201  }
202 
203  KOKKOS_INLINE_FUNCTION
204  void init (value_type& initialResult) const {
205  initialResult = 1; // success
206  }
207 
208  KOKKOS_INLINE_FUNCTION void
209  join (volatile value_type& dstResult,
210  const volatile value_type& srcResult) const
211  {
212  dstResult = (dstResult == 0 || srcResult == 0) ? 0 : 1;
213  }
214 
215  static void
216  pack (const DstView& dst,
217  const SrcView& src,
218  const IdxView& idx,
219  const size_type col)
220  {
221  typedef typename DstView::execution_space execution_space;
222  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
223  typedef typename IdxView::non_const_value_type index_type;
224 
225  int result = 1;
226  Kokkos::parallel_reduce (range_type (0, idx.size ()),
227  PackArraySingleColumnWithBoundsCheck (dst, src,
228  idx, col),
229  result);
230  if (result != 1) {
231  // Go back and find the out-of-bounds entries in the index
232  // array. Performance doesn't matter since we are already in
233  // an error state, so we can do this sequentially, on host.
234  auto idx_h = Kokkos::create_mirror_view (idx);
235  Kokkos::deep_copy (idx_h, idx);
236 
237  std::vector<index_type> badIndices;
238  const size_type numInds = idx_h.dimension_0 ();
239  for (size_type k = 0; k < numInds; ++k) {
240  if (idx_h(k) < static_cast<index_type> (0) ||
241  idx_h(k) >= static_cast<index_type> (src.dimension_0 ())) {
242  badIndices.push_back (idx_h(k));
243  }
244  }
245 
246  std::ostringstream os;
247  os << "MultiVector single-column pack kernel had "
248  << badIndices.size () << " out-of bounds index/ices. "
249  "Here they are: [";
250  for (size_t k = 0; k < badIndices.size (); ++k) {
251  os << badIndices[k];
252  if (k + 1 < badIndices.size ()) {
253  os << ", ";
254  }
255  }
256  os << "].";
257  throw std::runtime_error (os.str ());
258  }
259  }
260  };
261 
262 
263  template <typename DstView, typename SrcView, typename IdxView>
264  void
265  pack_array_single_column (const DstView& dst,
266  const SrcView& src,
267  const IdxView& idx,
268  const size_t col,
269  const bool debug = true)
270  {
271  static_assert (Kokkos::Impl::is_view<DstView>::value,
272  "DstView must be a Kokkos::View.");
273  static_assert (Kokkos::Impl::is_view<SrcView>::value,
274  "SrcView must be a Kokkos::View.");
275  static_assert (Kokkos::Impl::is_view<IdxView>::value,
276  "IdxView must be a Kokkos::View.");
277  static_assert (static_cast<int> (DstView::rank) == 1,
278  "DstView must be a rank-1 Kokkos::View.");
279  static_assert (static_cast<int> (SrcView::rank) == 2,
280  "SrcView must be a rank-2 Kokkos::View.");
281  static_assert (static_cast<int> (IdxView::rank) == 1,
282  "IdxView must be a rank-1 Kokkos::View.");
283 
284  if (debug) {
285  typedef PackArraySingleColumnWithBoundsCheck<DstView,SrcView,IdxView> impl_type;
286  impl_type::pack (dst, src, idx, col);
287  }
288  else {
289  typedef PackArraySingleColumn<DstView,SrcView,IdxView> impl_type;
290  impl_type::pack (dst, src, idx, col);
291  }
292  }
293 
294  template <typename DstView, typename SrcView, typename IdxView>
295  struct PackArrayMultiColumn {
296  typedef typename DstView::execution_space execution_space;
297  typedef typename execution_space::size_type size_type;
298 
299  DstView dst;
300  SrcView src;
301  IdxView idx;
302  size_t numCols;
303 
304  PackArrayMultiColumn(const DstView& dst_,
305  const SrcView& src_,
306  const IdxView& idx_,
307  size_t numCols_) :
308  dst(dst_), src(src_), idx(idx_), numCols(numCols_) {}
309 
310  KOKKOS_INLINE_FUNCTION
311  void operator()( const size_type k ) const {
312  const typename IdxView::value_type localRow = idx(k);
313  const size_t offset = k*numCols;
314  for (size_t j = 0; j < numCols; ++j)
315  dst(offset + j) = src(localRow, j);
316  }
317 
318  static void pack(const DstView& dst,
319  const SrcView& src,
320  const IdxView& idx,
321  size_t numCols) {
322  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
323  Kokkos::parallel_for (range_type (0, idx.size ()),
324  PackArrayMultiColumn (dst,src,idx,numCols));
325  }
326  };
327 
328  template <typename DstView,
329  typename SrcView,
330  typename IdxView,
331  typename SizeType = typename DstView::execution_space::size_type>
332  class PackArrayMultiColumnWithBoundsCheck {
333  public:
334  typedef SizeType size_type;
336  typedef int value_type;
337 
338  private:
339  DstView dst;
340  SrcView src;
341  IdxView idx;
342  size_type numCols;
343 
344  public:
345  PackArrayMultiColumnWithBoundsCheck (const DstView& dst_,
346  const SrcView& src_,
347  const IdxView& idx_,
348  const size_type numCols_) :
349  dst (dst_), src (src_), idx (idx_), numCols (numCols_) {}
350 
351  KOKKOS_INLINE_FUNCTION void
352  operator() (const size_type& k, value_type& result) const {
353  typedef typename IdxView::non_const_value_type index_type;
354 
355  const index_type lclRow = idx(k);
356  if (lclRow < static_cast<index_type> (0) ||
357  lclRow >= static_cast<index_type> (src.dimension_0 ())) {
358  result = 0; // failed!
359  }
360  else {
361  const size_type offset = k*numCols;
362  for (size_type j = 0; j < numCols; ++j) {
363  dst(offset + j) = src(lclRow, j);
364  }
365  }
366  }
367 
368  KOKKOS_INLINE_FUNCTION
369  void init (value_type& initialResult) const {
370  initialResult = 1; // success
371  }
372 
373  KOKKOS_INLINE_FUNCTION void
374  join (volatile value_type& dstResult,
375  const volatile value_type& srcResult) const
376  {
377  dstResult = (dstResult == 0 || srcResult == 0) ? 0 : 1;
378  }
379 
380  static void
381  pack (const DstView& dst,
382  const SrcView& src,
383  const IdxView& idx,
384  const size_type numCols)
385  {
386  typedef typename DstView::execution_space execution_space;
387  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
388  typedef typename IdxView::non_const_value_type index_type;
389 
390  int result = 1;
391  Kokkos::parallel_reduce (range_type (0, idx.size ()),
392  PackArrayMultiColumnWithBoundsCheck (dst, src,
393  idx, numCols),
394  result);
395  if (result != 1) {
396  // Go back and find the out-of-bounds entries in the index
397  // array. Performance doesn't matter since we are already in
398  // an error state, so we can do this sequentially, on host.
399  auto idx_h = Kokkos::create_mirror_view (idx);
400  Kokkos::deep_copy (idx_h, idx);
401 
402  std::vector<index_type> badIndices;
403  const size_type numInds = idx_h.dimension_0 ();
404  for (size_type k = 0; k < numInds; ++k) {
405  if (idx_h(k) < static_cast<index_type> (0) ||
406  idx_h(k) >= static_cast<index_type> (src.dimension_0 ())) {
407  badIndices.push_back (idx_h(k));
408  }
409  }
410 
411  std::ostringstream os;
412  os << "MultiVector multiple-column pack kernel had "
413  << badIndices.size () << " out-of bounds index/ices. "
414  "Here they are: [";
415  for (size_t k = 0; k < badIndices.size (); ++k) {
416  os << badIndices[k];
417  if (k + 1 < badIndices.size ()) {
418  os << ", ";
419  }
420  }
421  os << "].";
422  throw std::runtime_error (os.str ());
423  }
424  }
425  };
426 
427 
428  template <typename DstView,
429  typename SrcView,
430  typename IdxView>
431  void
432  pack_array_multi_column (const DstView& dst,
433  const SrcView& src,
434  const IdxView& idx,
435  const size_t numCols,
436  const bool debug = true)
437  {
438  static_assert (Kokkos::Impl::is_view<DstView>::value,
439  "DstView must be a Kokkos::View.");
440  static_assert (Kokkos::Impl::is_view<SrcView>::value,
441  "SrcView must be a Kokkos::View.");
442  static_assert (Kokkos::Impl::is_view<IdxView>::value,
443  "IdxView must be a Kokkos::View.");
444  static_assert (static_cast<int> (DstView::rank) == 1,
445  "DstView must be a rank-1 Kokkos::View.");
446  static_assert (static_cast<int> (SrcView::rank) == 2,
447  "SrcView must be a rank-2 Kokkos::View.");
448  static_assert (static_cast<int> (IdxView::rank) == 1,
449  "IdxView must be a rank-1 Kokkos::View.");
450 
451  if (debug) {
452  typedef PackArrayMultiColumnWithBoundsCheck<DstView,
453  SrcView, IdxView> impl_type;
454  impl_type::pack (dst, src, idx, numCols);
455  }
456  else {
457  typedef PackArrayMultiColumn<DstView, SrcView, IdxView> impl_type;
458  impl_type::pack (dst, src, idx, numCols);
459  }
460  }
461 
462  template <typename DstView, typename SrcView, typename IdxView,
463  typename ColView>
464  struct PackArrayMultiColumnVariableStride {
465  typedef typename DstView::execution_space execution_space;
466  typedef typename execution_space::size_type size_type;
467 
468  DstView dst;
469  SrcView src;
470  IdxView idx;
471  ColView col;
472  size_t numCols;
473 
474  PackArrayMultiColumnVariableStride(const DstView& dst_,
475  const SrcView& src_,
476  const IdxView& idx_,
477  const ColView& col_,
478  size_t numCols_) :
479  dst(dst_), src(src_), idx(idx_), col(col_), numCols(numCols_) {}
480 
481  KOKKOS_INLINE_FUNCTION
482  void operator()( const size_type k ) const {
483  const typename IdxView::value_type localRow = idx(k);
484  const size_t offset = k*numCols;
485  for (size_t j = 0; j < numCols; ++j)
486  dst(offset + j) = src(localRow, col(j));
487  }
488 
489  static void pack(const DstView& dst,
490  const SrcView& src,
491  const IdxView& idx,
492  const ColView& col,
493  size_t numCols) {
494  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
495  Kokkos::parallel_for (range_type (0, idx.size ()),
496  PackArrayMultiColumnVariableStride(
497  dst,src,idx,col,numCols) );
498  }
499  };
500 
501  template <typename DstView,
502  typename SrcView,
503  typename IdxView,
504  typename ColView,
505  typename SizeType = typename DstView::execution_space::size_type>
506  class PackArrayMultiColumnVariableStrideWithBoundsCheck {
507  public:
508  typedef SizeType size_type;
510  typedef Kokkos::pair<int, int> value_type;
511 
512  private:
513  DstView dst;
514  SrcView src;
515  IdxView idx;
516  ColView col;
517  size_type numCols;
518 
519  public:
520  PackArrayMultiColumnVariableStrideWithBoundsCheck (const DstView& dst_,
521  const SrcView& src_,
522  const IdxView& idx_,
523  const ColView& col_,
524  const size_type numCols_) :
525  dst (dst_), src (src_), idx (idx_), col (col_), numCols (numCols_) {}
526 
527  KOKKOS_INLINE_FUNCTION void
528  operator() (const size_type& k, value_type& result) const {
529  typedef typename IdxView::non_const_value_type row_index_type;
530  typedef typename ColView::non_const_value_type col_index_type;
531 
532  const row_index_type lclRow = idx(k);
533  if (lclRow < static_cast<row_index_type> (0) ||
534  lclRow >= static_cast<row_index_type> (src.dimension_0 ())) {
535  result.first = 0; // failed!
536  }
537  else {
538  const size_type offset = k*numCols;
539  for (size_type j = 0; j < numCols; ++j) {
540  const col_index_type lclCol = col(j);
541  if (Impl::outOfBounds<col_index_type> (lclCol, src.dimension_1 ())) {
542  result.second = 0; // failed!
543  }
544  else { // all indices are valid; do the assignment
545  dst(offset + j) = src(lclRow, lclCol);
546  }
547  }
548  }
549  }
550 
551  KOKKOS_INLINE_FUNCTION void
552  init (value_type& initialResult) const {
553  initialResult.first = 1; // success
554  initialResult.second = 1; // success
555  }
556 
557  KOKKOS_INLINE_FUNCTION void
558  join (volatile value_type& dstResult,
559  const volatile value_type& srcResult) const
560  {
561  dstResult.first = (dstResult.first == 0 || srcResult.first == 0) ? 0 : 1;
562  dstResult.second = (dstResult.second == 0 || srcResult.second == 0) ? 0 : 1;
563  }
564 
565  static void
566  pack (const DstView& dst,
567  const SrcView& src,
568  const IdxView& idx,
569  const ColView& col,
570  const size_type numCols)
571  {
572  using Kokkos::parallel_reduce;
573  typedef typename DstView::execution_space execution_space;
574  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
575  typedef typename IdxView::non_const_value_type row_index_type;
576  typedef typename ColView::non_const_value_type col_index_type;
577 
578  Kokkos::pair<int, int> result (1, 1);
579  parallel_reduce (range_type (0, idx.size ()),
580  PackArrayMultiColumnVariableStrideWithBoundsCheck (dst, src,
581  idx, col,
582  numCols),
583  result);
584  const bool hasBadRows = (result.first != 1);
585  const bool hasBadCols = (result.second != 1);
586  const bool hasErr = hasBadRows || hasBadCols;
587  if (hasErr) {
588  std::ostringstream os; // for error reporting
589 
590  if (hasBadRows) {
591  // Go back and find the out-of-bounds entries in the array of
592  // row indices. Performance doesn't matter since we are already
593  // in an error state, so we can do this sequentially, on host.
594  auto idx_h = Kokkos::create_mirror_view (idx);
595  Kokkos::deep_copy (idx_h, idx);
596 
597  std::vector<row_index_type> badRows;
598  const size_type numInds = idx_h.dimension_0 ();
599  for (size_type k = 0; k < numInds; ++k) {
600  if (Impl::outOfBounds<row_index_type> (idx_h(k), src.dimension_0 ())) {
601  badRows.push_back (idx_h(k));
602  }
603  }
604  os << "MultiVector multiple-column pack kernel had "
605  << badRows.size () << " out-of bounds row index/ices: [";
606  for (size_t k = 0; k < badRows.size (); ++k) {
607  os << badRows[k];
608  if (k + 1 < badRows.size ()) {
609  os << ", ";
610  }
611  }
612  os << "].";
613  } // hasBadRows
614 
615  if (hasBadCols) {
616  // Go back and find the out-of-bounds entries in the array
617  // of column indices. Performance doesn't matter since we
618  // are already in an error state, so we can do this
619  // sequentially, on host.
620  auto col_h = Kokkos::create_mirror_view (col);
621  Kokkos::deep_copy (col_h, col);
622 
623  std::vector<col_index_type> badCols;
624  const size_type numInds = col_h.dimension_0 ();
625  for (size_type k = 0; k < numInds; ++k) {
626  if (Impl::outOfBounds<col_index_type> (col_h(k), src.dimension_1 ())) {
627  badCols.push_back (col_h(k));
628  }
629  }
630 
631  if (hasBadRows) {
632  os << " ";
633  }
634  os << "MultiVector multiple-column pack kernel had "
635  << badCols.size () << " out-of bounds column index/ices: [";
636  for (size_t k = 0; k < badCols.size (); ++k) {
637  os << badCols[k];
638  if (k + 1 < badCols.size ()) {
639  os << ", ";
640  }
641  }
642  os << "].";
643  } // hasBadCols
644 
645  throw std::runtime_error (os.str ());
646  } // hasErr
647  }
648  };
649 
650  template <typename DstView,
651  typename SrcView,
652  typename IdxView,
653  typename ColView>
654  void
655  pack_array_multi_column_variable_stride (const DstView& dst,
656  const SrcView& src,
657  const IdxView& idx,
658  const ColView& col,
659  const size_t numCols,
660  const bool debug = true)
661  {
662  static_assert (Kokkos::Impl::is_view<DstView>::value,
663  "DstView must be a Kokkos::View.");
664  static_assert (Kokkos::Impl::is_view<SrcView>::value,
665  "SrcView must be a Kokkos::View.");
666  static_assert (Kokkos::Impl::is_view<IdxView>::value,
667  "IdxView must be a Kokkos::View.");
668  static_assert (Kokkos::Impl::is_view<ColView>::value,
669  "ColView must be a Kokkos::View.");
670  static_assert (static_cast<int> (DstView::rank) == 1,
671  "DstView must be a rank-1 Kokkos::View.");
672  static_assert (static_cast<int> (SrcView::rank) == 2,
673  "SrcView must be a rank-2 Kokkos::View.");
674  static_assert (static_cast<int> (IdxView::rank) == 1,
675  "IdxView must be a rank-1 Kokkos::View.");
676  static_assert (static_cast<int> (ColView::rank) == 1,
677  "ColView must be a rank-1 Kokkos::View.");
678 
679  if (debug) {
680  typedef PackArrayMultiColumnVariableStrideWithBoundsCheck<DstView,
681  SrcView, IdxView, ColView> impl_type;
682  impl_type::pack (dst, src, idx, col, numCols);
683  }
684  else {
685  typedef PackArrayMultiColumnVariableStride<DstView,
686  SrcView, IdxView, ColView> impl_type;
687  impl_type::pack (dst, src, idx, col, numCols);
688  }
689  }
690 
691  // FIXME (mfh 16 Dec 2016) It looks like these are totally generic
692  // and don't get specialized in Stokhos. This suggests we can
693  // template them on the execution space, and specialize them for
694  // Kokkos::Serial so they don't do atomic update operations. It
695  // might be better to make the unpack kernels (the pack kernels
696  // don't use atomic updates) functions that we prebuild, in the
697  // manner of KokkosKernels, or at least handle via ETI.
698 
699  template<class ExecutionSpace>
700  struct InsertOp {
701  template <typename Scalar>
702  KOKKOS_INLINE_FUNCTION
703  void operator() (Scalar& dest, const Scalar& src) const {
704  Kokkos::atomic_assign(&dest, src);
705  }
706  };
707 
708 #ifdef KOKKOS_HAVE_SERIAL
709  template<>
710  struct InsertOp< ::Kokkos::Serial > {
711  template <typename Scalar>
712  KOKKOS_INLINE_FUNCTION
713  void operator() (Scalar& dest, const Scalar& src) const {
714  dest = src; // no need for an atomic operation here
715  }
716  };
717 #endif // KOKKOS_HAVE_SERIAL
718 
719  template<class ExecutionSpace>
720  struct AddOp {
721  template <typename Scalar>
722  KOKKOS_INLINE_FUNCTION
723  void operator() (Scalar& dest, const Scalar& src) const {
724  Kokkos::atomic_add(&dest, src);
725  }
726  };
727 
728 #ifdef KOKKOS_HAVE_SERIAL
729  template<>
730  struct AddOp< ::Kokkos::Serial > {
731  template <typename Scalar>
732  KOKKOS_INLINE_FUNCTION
733  void operator() (Scalar& dest, const Scalar& src) const {
734  dest += src; // no need for an atomic operation here
735  }
736  };
737 #endif // KOKKOS_HAVE_SERIAL
738 
739  template<class ExecutionSpace>
740  struct AbsMaxOp {
741  // ETP: Is this really what we want? This seems very odd if
742  // Scalar != SCT::mag_type (e.g., Scalar == std::complex<T>)
743  //
744  // mfh: I didn't write this code, but note that we don't use T =
745  // Scalar here, we use T = ArithTraits<Scalar>::mag_type. That
746  // makes this code reasonable.
747  template <typename T>
748  KOKKOS_INLINE_FUNCTION
749  T max(const T& a, const T& b) const { return a > b ? a : b; }
750 
751  template <typename Scalar>
752  KOKKOS_INLINE_FUNCTION
753  void operator() (Scalar& dest, const Scalar& src) const {
754  typedef Kokkos::Details::ArithTraits<Scalar> SCT;
755  Kokkos::atomic_assign(&dest, Scalar(max(SCT::abs(dest),SCT::abs(src))));
756  }
757  };
758 
759 #ifdef KOKKOS_HAVE_SERIAL
760  template<>
761  struct AbsMaxOp< ::Kokkos::Serial > {
762  // ETP: Is this really what we want? This seems very odd if
763  // Scalar != SCT::mag_type (e.g., Scalar == std::complex<T>)
764  //
765  // mfh: I didn't write this code, but note that we don't use T =
766  // Scalar here, we use T = ArithTraits<Scalar>::mag_type. That
767  // makes this code reasonable.
768  template <typename T>
769  KOKKOS_INLINE_FUNCTION
770  T max(const T& a, const T& b) const { return a > b ? a : b; }
771 
772  template <typename Scalar>
773  KOKKOS_INLINE_FUNCTION
774  void operator() (Scalar& dest, const Scalar& src) const {
775  typedef Kokkos::Details::ArithTraits<Scalar> SCT;
776  // no need for an atomic operation here
777  dest = static_cast<Scalar> (max (SCT::abs (dest), SCT::abs (src)));
778  }
779  };
780 #endif // KOKKOS_HAVE_SERIAL
781 
782  template <typename DstView, typename SrcView, typename IdxView, typename Op>
783  struct UnpackArrayMultiColumn {
784  typedef typename DstView::execution_space execution_space;
785  typedef typename execution_space::size_type size_type;
786 
787  DstView dst;
788  SrcView src;
789  IdxView idx;
790  Op op;
791  size_t numCols;
792 
793  UnpackArrayMultiColumn(const DstView& dst_,
794  const SrcView& src_,
795  const IdxView& idx_,
796  const Op& op_,
797  size_t numCols_) :
798  dst(dst_), src(src_), idx(idx_), op(op_), numCols(numCols_) {}
799 
800  KOKKOS_INLINE_FUNCTION
801  void operator()( const size_type k ) const {
802  const typename IdxView::value_type localRow = idx(k);
803  const size_t offset = k*numCols;
804  for (size_t j = 0; j < numCols; ++j)
805  op( dst(localRow,j), src(offset+j) );
806  }
807 
808  static void unpack(const DstView& dst,
809  const SrcView& src,
810  const IdxView& idx,
811  const Op& op,
812  size_t numCols) {
813  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
814  Kokkos::parallel_for (range_type (0, idx.size ()),
815  UnpackArrayMultiColumn (dst,src,idx,op,numCols));
816  }
817  };
818 
819  template <typename DstView,
820  typename SrcView,
821  typename IdxView,
822  typename Op,
823  typename SizeType = typename DstView::execution_space::size_type>
824  class UnpackArrayMultiColumnWithBoundsCheck {
825  static_assert (Kokkos::Impl::is_view<DstView>::value,
826  "DstView must be a Kokkos::View.");
827  static_assert (Kokkos::Impl::is_view<SrcView>::value,
828  "SrcView must be a Kokkos::View.");
829  static_assert (Kokkos::Impl::is_view<IdxView>::value,
830  "IdxView must be a Kokkos::View.");
831  static_assert (static_cast<int> (DstView::rank) == 2,
832  "DstView must be a rank-2 Kokkos::View.");
833  static_assert (static_cast<int> (SrcView::rank) == 1,
834  "SrcView must be a rank-1 Kokkos::View.");
835  static_assert (static_cast<int> (IdxView::rank) == 1,
836  "IdxView must be a rank-1 Kokkos::View.");
837  static_assert (std::is_integral<SizeType>::value,
838  "SizeType must be a built-in integer type.");
839 
840  public:
841  typedef SizeType size_type;
843  typedef int value_type;
844 
845  private:
846  DstView dst;
847  SrcView src;
848  IdxView idx;
849  Op op;
850  size_type numCols;
851 
852  public:
853  UnpackArrayMultiColumnWithBoundsCheck (const DstView& dst_,
854  const SrcView& src_,
855  const IdxView& idx_,
856  const Op& op_,
857  const size_type numCols_) :
858  dst (dst_), src (src_), idx (idx_), op (op_), numCols (numCols_)
859  {}
860 
861  KOKKOS_INLINE_FUNCTION
862  void operator() (const size_type& k, value_type& result) const {
863  typedef typename IdxView::non_const_value_type index_type;
864 
865  const index_type lclRow = idx(k);
866  if (lclRow < static_cast<index_type> (0) ||
867  lclRow >= static_cast<index_type> (dst.dimension_0 ())) {
868  result = 0; // failed!
869  }
870  else {
871  const size_type offset = k*numCols;
872  for (size_type j = 0; j < numCols; ++j)
873  op (dst(lclRow,j), src(offset+j));
874  }
875  }
876 
877  KOKKOS_INLINE_FUNCTION
878  void init (value_type& initialResult) const {
879  initialResult = 1; // success
880  }
881 
882  KOKKOS_INLINE_FUNCTION void
883  join (volatile value_type& dstResult,
884  const volatile value_type& srcResult) const
885  {
886  dstResult = (dstResult == 0 || srcResult == 0) ? 0 : 1;
887  }
888 
889  static void
890  unpack (const DstView& dst,
891  const SrcView& src,
892  const IdxView& idx,
893  const Op& op,
894  const size_type numCols)
895  {
896  typedef typename DstView::execution_space execution_space;
897  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
898  typedef typename IdxView::non_const_value_type index_type;
899 
900  int result = 1;
901  Kokkos::parallel_reduce (range_type (0, idx.size ()),
902  UnpackArrayMultiColumnWithBoundsCheck (dst,src,idx,op,numCols),
903  result);
904  if (result != 1) {
905  // Go back and find the out-of-bounds entries in the index
906  // array. Performance doesn't matter since we are already in
907  // an error state, so we can do this sequentially, on host.
908  auto idx_h = Kokkos::create_mirror_view (idx);
909  Kokkos::deep_copy (idx_h, idx);
910 
911  std::vector<index_type> badIndices;
912  const size_type numInds = idx_h.dimension_0 ();
913  for (size_type k = 0; k < numInds; ++k) {
914  if (idx_h(k) < static_cast<index_type> (0) ||
915  idx_h(k) >= static_cast<index_type> (dst.dimension_0 ())) {
916  badIndices.push_back (idx_h(k));
917  }
918  }
919 
920  std::ostringstream os;
921  os << "MultiVector unpack kernel had " << badIndices.size ()
922  << " out-of bounds index/ices. Here they are: [";
923  for (size_t k = 0; k < badIndices.size (); ++k) {
924  os << badIndices[k];
925  if (k + 1 < badIndices.size ()) {
926  os << ", ";
927  }
928  }
929  os << "].";
930  throw std::runtime_error (os.str ());
931  }
932  }
933  };
934 
935  template <typename DstView, typename SrcView, typename IdxView, typename Op>
936  void
937  unpack_array_multi_column (const DstView& dst,
938  const SrcView& src,
939  const IdxView& idx,
940  const Op& op,
941  const size_t numCols,
942  const bool debug = true)
943  {
944  static_assert (Kokkos::Impl::is_view<DstView>::value,
945  "DstView must be a Kokkos::View.");
946  static_assert (Kokkos::Impl::is_view<SrcView>::value,
947  "SrcView must be a Kokkos::View.");
948  static_assert (Kokkos::Impl::is_view<IdxView>::value,
949  "IdxView must be a Kokkos::View.");
950  static_assert (static_cast<int> (DstView::rank) == 2,
951  "DstView must be a rank-2 Kokkos::View.");
952  static_assert (static_cast<int> (SrcView::rank) == 1,
953  "SrcView must be a rank-1 Kokkos::View.");
954  static_assert (static_cast<int> (IdxView::rank) == 1,
955  "IdxView must be a rank-1 Kokkos::View.");
956 
957  if (debug) {
958  typedef UnpackArrayMultiColumnWithBoundsCheck<DstView,
959  SrcView, IdxView, Op> impl_type;
960  impl_type::unpack (dst, src, idx, op, numCols);
961  }
962  else {
963  typedef UnpackArrayMultiColumn<DstView,
964  SrcView, IdxView, Op> impl_type;
965  impl_type::unpack (dst, src, idx, op, numCols);
966  }
967  }
968 
969  template <typename DstView, typename SrcView, typename IdxView,
970  typename ColView, typename Op>
971  struct UnpackArrayMultiColumnVariableStride {
972  typedef typename DstView::execution_space execution_space;
973  typedef typename execution_space::size_type size_type;
974 
975  DstView dst;
976  SrcView src;
977  IdxView idx;
978  ColView col;
979  Op op;
980  size_t numCols;
981 
982  UnpackArrayMultiColumnVariableStride(const DstView& dst_,
983  const SrcView& src_,
984  const IdxView& idx_,
985  const ColView& col_,
986  const Op& op_,
987  size_t numCols_) :
988  dst(dst_), src(src_), idx(idx_), col(col_), op(op_), numCols(numCols_) {}
989 
990  KOKKOS_INLINE_FUNCTION
991  void operator()( const size_type k ) const {
992  const typename IdxView::value_type localRow = idx(k);
993  const size_t offset = k*numCols;
994  for (size_t j = 0; j < numCols; ++j)
995  op( dst(localRow,col(j)), src(offset+j) );
996  }
997 
998  static void unpack(const DstView& dst,
999  const SrcView& src,
1000  const IdxView& idx,
1001  const ColView& col,
1002  const Op& op,
1003  size_t numCols) {
1004  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
1005  Kokkos::parallel_for (range_type (0, idx.size ()),
1006  UnpackArrayMultiColumnVariableStride(
1007  dst,src,idx,col,op,numCols) );
1008  }
1009  };
1010 
1011  template <typename DstView,
1012  typename SrcView,
1013  typename IdxView,
1014  typename ColView,
1015  typename Op,
1016  typename SizeType = typename DstView::execution_space::size_type>
1017  class UnpackArrayMultiColumnVariableStrideWithBoundsCheck {
1018  public:
1019  typedef SizeType size_type;
1021  typedef Kokkos::pair<int, int> value_type;
1022 
1023  private:
1024  DstView dst;
1025  SrcView src;
1026  IdxView idx;
1027  ColView col;
1028  Op op;
1029  size_type numCols;
1030 
1031  public:
1032  UnpackArrayMultiColumnVariableStrideWithBoundsCheck (const DstView& dst_,
1033  const SrcView& src_,
1034  const IdxView& idx_,
1035  const ColView& col_,
1036  const Op& op_,
1037  const size_t numCols_) :
1038  dst (dst_), src (src_), idx (idx_), col (col_), op (op_),
1039  numCols (numCols_)
1040  {}
1041 
1042  KOKKOS_INLINE_FUNCTION void
1043  operator() (const size_type& k, value_type& result) const {
1044  typedef typename IdxView::non_const_value_type row_index_type;
1045  typedef typename ColView::non_const_value_type col_index_type;
1046 
1047  const row_index_type lclRow = idx(k);
1048  if (lclRow < static_cast<row_index_type> (0) ||
1049  lclRow >= static_cast<row_index_type> (dst.dimension_0 ())) {
1050  result.first = 0; // failed!
1051  }
1052  else {
1053  const size_type offset = k*numCols;
1054  for (size_type j = 0; j < numCols; ++j) {
1055  const col_index_type lclCol = col(j);
1056 
1057  if (Impl::outOfBounds<col_index_type> (lclCol, dst.dimension_1 ())) {
1058  result.second = 0; // failed!
1059  }
1060  else { // all indices are valid; apply the op
1061  op (dst(lclRow, col(j)), src(offset+j));
1062  }
1063  }
1064  }
1065  }
1066 
1067  KOKKOS_INLINE_FUNCTION void
1068  init (value_type& initialResult) const {
1069  initialResult.first = 1; // success
1070  initialResult.second = 1; // success
1071  }
1072 
1073  KOKKOS_INLINE_FUNCTION void
1074  join (volatile value_type& dstResult,
1075  const volatile value_type& srcResult) const
1076  {
1077  dstResult.first = (dstResult.first == 0 || srcResult.first == 0) ? 0 : 1;
1078  dstResult.second = (dstResult.second == 0 || srcResult.second == 0) ? 0 : 1;
1079  }
1080 
1081  static void
1082  unpack (const DstView& dst,
1083  const SrcView& src,
1084  const IdxView& idx,
1085  const ColView& col,
1086  const Op& op,
1087  const size_type numCols)
1088  {
1089  typedef typename DstView::execution_space execution_space;
1090  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
1091  typedef typename IdxView::non_const_value_type row_index_type;
1092  typedef typename ColView::non_const_value_type col_index_type;
1093 
1094  Kokkos::pair<int, int> result (1, 1);
1095  Kokkos::parallel_reduce (range_type (0, idx.size ()),
1096  UnpackArrayMultiColumnVariableStrideWithBoundsCheck (dst, src, idx,
1097  col, op, numCols),
1098  result);
1099 
1100  const bool hasBadRows = (result.first != 1);
1101  const bool hasBadCols = (result.second != 1);
1102  const bool hasErr = hasBadRows || hasBadCols;
1103  if (hasErr) {
1104  std::ostringstream os; // for error reporting
1105 
1106  if (hasBadRows) {
1107  // Go back and find the out-of-bounds entries in the array
1108  // of row indices. Performance doesn't matter since we are
1109  // already in an error state, so we can do this
1110  // sequentially, on host.
1111  auto idx_h = Kokkos::create_mirror_view (idx);
1112  Kokkos::deep_copy (idx_h, idx);
1113 
1114  std::vector<row_index_type> badRows;
1115  const size_type numInds = idx_h.dimension_0 ();
1116  for (size_type k = 0; k < numInds; ++k) {
1117  if (idx_h(k) < static_cast<row_index_type> (0) ||
1118  idx_h(k) >= static_cast<row_index_type> (dst.dimension_0 ())) {
1119  badRows.push_back (idx_h(k));
1120  }
1121  }
1122  os << "MultiVector multiple-column unpack kernel had "
1123  << badRows.size () << " out-of bounds row index/ices: [";
1124  for (size_t k = 0; k < badRows.size (); ++k) {
1125  os << badRows[k];
1126  if (k + 1 < badRows.size ()) {
1127  os << ", ";
1128  }
1129  }
1130  os << "].";
1131  } // hasBadRows
1132 
1133  if (hasBadCols) {
1134  // Go back and find the out-of-bounds entries in the array
1135  // of column indices. Performance doesn't matter since we
1136  // are already in an error state, so we can do this
1137  // sequentially, on host.
1138  auto col_h = Kokkos::create_mirror_view (col);
1139  Kokkos::deep_copy (col_h, col);
1140 
1141  std::vector<col_index_type> badCols;
1142  const size_type numInds = col_h.dimension_0 ();
1143  for (size_type k = 0; k < numInds; ++k) {
1144  if (Impl::outOfBounds<col_index_type> (col_h(k), dst.dimension_1 ())) {
1145  badCols.push_back (col_h(k));
1146  }
1147  }
1148 
1149  if (hasBadRows) {
1150  os << " ";
1151  }
1152  os << "MultiVector multiple-column unpack kernel had "
1153  << badCols.size () << " out-of bounds column index/ices: [";
1154  for (size_t k = 0; k < badCols.size (); ++k) {
1155  os << badCols[k];
1156  if (k + 1 < badCols.size ()) {
1157  os << ", ";
1158  }
1159  }
1160  os << "].";
1161  } // hasBadCols
1162 
1163  throw std::runtime_error (os.str ());
1164  } // hasErr
1165  }
1166  };
1167 
1168  template <typename DstView,
1169  typename SrcView,
1170  typename IdxView,
1171  typename ColView,
1172  typename Op>
1173  void
1174  unpack_array_multi_column_variable_stride (const DstView& dst,
1175  const SrcView& src,
1176  const IdxView& idx,
1177  const ColView& col,
1178  const Op& op,
1179  const size_t numCols,
1180  const bool debug = true)
1181  {
1182  static_assert (Kokkos::Impl::is_view<DstView>::value,
1183  "DstView must be a Kokkos::View.");
1184  static_assert (Kokkos::Impl::is_view<SrcView>::value,
1185  "SrcView must be a Kokkos::View.");
1186  static_assert (Kokkos::Impl::is_view<IdxView>::value,
1187  "IdxView must be a Kokkos::View.");
1188  static_assert (Kokkos::Impl::is_view<ColView>::value,
1189  "ColView must be a Kokkos::View.");
1190  static_assert (static_cast<int> (DstView::rank) == 2,
1191  "DstView must be a rank-2 Kokkos::View.");
1192  static_assert (static_cast<int> (SrcView::rank) == 1,
1193  "SrcView must be a rank-1 Kokkos::View.");
1194  static_assert (static_cast<int> (IdxView::rank) == 1,
1195  "IdxView must be a rank-1 Kokkos::View.");
1196  static_assert (static_cast<int> (ColView::rank) == 1,
1197  "ColView must be a rank-1 Kokkos::View.");
1198 
1199  if (debug) {
1200  typedef UnpackArrayMultiColumnVariableStrideWithBoundsCheck<DstView,
1201  SrcView, IdxView, ColView, Op> impl_type;
1202  impl_type::unpack (dst, src, idx, col, op, numCols);
1203  }
1204  else {
1205  typedef UnpackArrayMultiColumnVariableStride<DstView,
1206  SrcView, IdxView, ColView, Op> impl_type;
1207  impl_type::unpack (dst, src, idx, col, op, numCols);
1208  }
1209  }
1210 
1211  template <typename DstView, typename SrcView,
1212  typename DstIdxView, typename SrcIdxView>
1213  struct PermuteArrayMultiColumn {
1214  typedef typename DstView::execution_space execution_space;
1215  typedef typename execution_space::size_type size_type;
1216 
1217  DstView dst;
1218  SrcView src;
1219  DstIdxView dst_idx;
1220  SrcIdxView src_idx;
1221  size_t numCols;
1222 
1223  PermuteArrayMultiColumn(const DstView& dst_,
1224  const SrcView& src_,
1225  const DstIdxView& dst_idx_,
1226  const SrcIdxView& src_idx_,
1227  size_t numCols_) :
1228  dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1229  numCols(numCols_) {}
1230 
1231  KOKKOS_INLINE_FUNCTION
1232  void operator()( const size_type k ) const {
1233  const typename DstIdxView::value_type toRow = dst_idx(k);
1234  const typename SrcIdxView::value_type fromRow = src_idx(k);
1235  for (size_t j = 0; j < numCols; ++j)
1236  dst(toRow, j) = src(fromRow, j);
1237  }
1238 
1239  static void permute(const DstView& dst,
1240  const SrcView& src,
1241  const DstIdxView& dst_idx,
1242  const SrcIdxView& src_idx,
1243  size_t numCols) {
1244  const size_type n = std::min( dst_idx.size(), src_idx.size() );
1245  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
1246  Kokkos::parallel_for (range_type (0, n),
1247  PermuteArrayMultiColumn (dst,src,dst_idx,src_idx,numCols));
1248  }
1249  };
1250 
1251  // To do: Add enable_if<> restrictions on DstView::Rank == 1,
1252  // SrcView::Rank == 2
1253  template <typename DstView, typename SrcView,
1254  typename DstIdxView, typename SrcIdxView>
1255  void permute_array_multi_column(const DstView& dst,
1256  const SrcView& src,
1257  const DstIdxView& dst_idx,
1258  const SrcIdxView& src_idx,
1259  size_t numCols) {
1260  PermuteArrayMultiColumn<DstView,SrcView,DstIdxView,SrcIdxView>::permute(
1261  dst, src, dst_idx, src_idx, numCols);
1262  }
1263 
1264  template <typename DstView, typename SrcView,
1265  typename DstIdxView, typename SrcIdxView,
1266  typename DstColView, typename SrcColView>
1267  struct PermuteArrayMultiColumnVariableStride {
1268  typedef typename DstView::execution_space execution_space;
1269  typedef typename execution_space::size_type size_type;
1270 
1271  DstView dst;
1272  SrcView src;
1273  DstIdxView dst_idx;
1274  SrcIdxView src_idx;
1275  DstColView dst_col;
1276  SrcColView src_col;
1277  size_t numCols;
1278 
1279  PermuteArrayMultiColumnVariableStride(const DstView& dst_,
1280  const SrcView& src_,
1281  const DstIdxView& dst_idx_,
1282  const SrcIdxView& src_idx_,
1283  const DstColView& dst_col_,
1284  const SrcColView& src_col_,
1285  size_t numCols_) :
1286  dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
1287  dst_col(dst_col_), src_col(src_col_),
1288  numCols(numCols_) {}
1289 
1290  KOKKOS_INLINE_FUNCTION
1291  void operator()( const size_type k ) const {
1292  const typename DstIdxView::value_type toRow = dst_idx(k);
1293  const typename SrcIdxView::value_type fromRow = src_idx(k);
1294  for (size_t j = 0; j < numCols; ++j)
1295  dst(toRow, dst_col(j)) = src(fromRow, src_col(j));
1296  }
1297 
1298  static void permute(const DstView& dst,
1299  const SrcView& src,
1300  const DstIdxView& dst_idx,
1301  const SrcIdxView& src_idx,
1302  const DstColView& dst_col,
1303  const SrcColView& src_col,
1304  size_t numCols) {
1305  const size_type n = std::min( dst_idx.size(), src_idx.size() );
1306  typedef Kokkos::RangePolicy<execution_space, size_type> range_type;
1307  Kokkos::parallel_for (range_type (0, n),
1308  PermuteArrayMultiColumnVariableStride (dst, src,
1309  dst_idx,
1310  src_idx,
1311  dst_col,
1312  src_col,
1313  numCols));
1314  }
1315  };
1316 
1317  // To do: Add enable_if<> restrictions on DstView::Rank == 1,
1318  // SrcView::Rank == 2
1319  template <typename DstView, typename SrcView,
1320  typename DstIdxView, typename SrcIdxView,
1321  typename DstColView, typename SrcColView>
1322  void permute_array_multi_column_variable_stride(const DstView& dst,
1323  const SrcView& src,
1324  const DstIdxView& dst_idx,
1325  const SrcIdxView& src_idx,
1326  const DstColView& dst_col,
1327  const SrcColView& src_col,
1328  size_t numCols) {
1329  PermuteArrayMultiColumnVariableStride<DstView,SrcView,
1330  DstIdxView,SrcIdxView,DstColView,SrcColView>::permute(
1331  dst, src, dst_idx, src_idx, dst_col, src_col, numCols);
1332  }
1333 
1334 } // Details namespace
1335 } // KokkosRefactor namespace
1336 } // Tpetra namespace
1337 
1338 #endif // TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_HPP
Namespace Tpetra contains the class and methods constituting the Tpetra library.
KOKKOS_INLINE_FUNCTION bool outOfBounds(const IntegerType x, const IntegerType exclusiveUpperBound)
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...
void deep_copy(MultiVector< DS, DL, DG, DN, dstClassic > &dst, const MultiVector< SS, SL, SG, SN, srcClassic > &src)
Copy the contents of the MultiVector src into dst.
Implementation details of Tpetra.
Is x out of bounds? That is, is x less than zero, or greater than or equal to the given exclusive upp...