40 #ifndef TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_DEF_HPP
41 #define TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_DEF_HPP
43 #include "TpetraCore_config.h"
44 #include "Teuchos_Array.hpp"
45 #include "Teuchos_ArrayView.hpp"
46 #include "Teuchos_OrdinalTraits.hpp"
54 #include "Kokkos_Core.hpp"
86 namespace UnpackAndCombineCrsMatrixImpl {
97 template<
class ST,
class LO,
class GO>
102 const char imports[],
105 const size_t num_ent,
106 const size_t bytes_per_value)
112 bool unpack_pids = pids_out.size() > 0;
114 const size_t num_ent_beg = offset;
117 const size_t gids_beg = num_ent_beg + num_ent_len;
118 const size_t gids_len =
121 const size_t pids_beg = gids_beg + gids_len;
122 const size_t pids_len = unpack_pids ?
126 const size_t vals_beg = gids_beg + gids_len + pids_len;
127 const size_t vals_len = num_ent * bytes_per_value;
129 const char*
const num_ent_in = imports + num_ent_beg;
130 const char*
const gids_in = imports + gids_beg;
131 const char*
const pids_in = unpack_pids ? imports + pids_beg :
nullptr;
132 const char*
const vals_in = imports + vals_beg;
134 size_t num_bytes_out = 0;
137 if (
static_cast<size_t> (num_ent_out) != num_ent) {
142 Kokkos::pair<int, size_t> p;
147 num_bytes_out += p.second;
154 num_bytes_out += p.second;
161 num_bytes_out += p.second;
164 const size_t expected_num_bytes = num_ent_len + gids_len + pids_len + vals_len;
165 if (num_bytes_out != expected_num_bytes) {
181 template<
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
183 typedef LocalMatrix local_matrix_type;
186 typedef typename local_matrix_type::value_type ST;
190 typedef typename DT::execution_space XS;
192 typedef Kokkos::View<const size_t*, BufferDeviceType>
193 num_packets_per_lid_type;
194 typedef Kokkos::View<const size_t*, DT> offsets_type;
195 typedef Kokkos::View<const char*, BufferDeviceType> input_buffer_type;
196 typedef Kokkos::View<const LO*, BufferDeviceType> import_lids_type;
198 typedef Kokkos::View<int, DT> error_type;
199 using member_type =
typename Kokkos::TeamPolicy<XS>::member_type;
201 static_assert (std::is_same<LO, typename local_matrix_type::ordinal_type>::value,
202 "LocalMap::local_ordinal_type and "
203 "LocalMatrix::ordinal_type must be the same.");
205 local_matrix_type local_matrix;
207 input_buffer_type imports;
208 num_packets_per_lid_type num_packets_per_lid;
209 import_lids_type import_lids;
210 Kokkos::View<const LO*[2], DT> batch_info;
211 offsets_type offsets;
214 size_t bytes_per_value;
216 error_type error_code;
219 const local_matrix_type& local_matrix_in,
221 const input_buffer_type& imports_in,
222 const num_packets_per_lid_type& num_packets_per_lid_in,
223 const import_lids_type& import_lids_in,
224 const Kokkos::View<
const LO*[2], DT>& batch_info_in,
225 const offsets_type& offsets_in,
227 const size_t batch_size_in,
228 const size_t bytes_per_value_in,
229 const bool atomic_in) :
230 local_matrix (local_matrix_in),
231 local_col_map (local_col_map_in),
232 imports (imports_in),
233 num_packets_per_lid (num_packets_per_lid_in),
234 import_lids (import_lids_in),
235 batch_info (batch_info_in),
236 offsets (offsets_in),
237 combine_mode (combine_mode_in),
238 batch_size (batch_size_in),
239 bytes_per_value (bytes_per_value_in),
244 KOKKOS_INLINE_FUNCTION
245 void operator()(member_type team_member)
const
248 using Kokkos::subview;
249 using Kokkos::MemoryUnmanaged;
251 const LO batch = team_member.league_rank();
252 const LO lid_no = batch_info(batch, 0);
253 const LO batch_no = batch_info(batch, 1);
255 const size_t num_bytes = num_packets_per_lid(lid_no);
262 const LO import_lid = import_lids(lid_no);
263 const size_t buf_size = imports.size();
264 const size_t offset = offsets(lid_no);
268 const char*
const in_buf = imports.data() + offset;
270 const size_t num_entries_in_row =
static_cast<size_t>(num_ent_LO);
273 size_t expected_num_bytes = 0;
280 if (expected_num_bytes > num_bytes)
283 #ifndef KOKKOS_ENABLE_SYCL
285 "*** Error: UnpackCrsMatrixAndCombineFunctor: "
286 "At row %d, the expected number of bytes (%d) != number of unpacked bytes (%d)\n",
287 (
int) lid_no, (
int) expected_num_bytes, (
int) num_bytes
290 Kokkos::atomic_compare_exchange_strong(error_code.data(), 0, 21);
294 if (offset > buf_size || offset + num_bytes > buf_size)
297 #ifndef KOKKOS_ENABLE_SYCL
299 "*** Error: UnpackCrsMatrixAndCombineFunctor: "
300 "At row %d, the offset (%d) > buffer size (%d)\n",
301 (
int) lid_no, (
int) offset, (
int) buf_size
304 Kokkos::atomic_compare_exchange_strong(error_code.data(), 0, 22);
309 size_t num_entries_in_batch = 0;
310 if (num_entries_in_row <= batch_size)
311 num_entries_in_batch = num_entries_in_row;
312 else if (num_entries_in_row >= (batch_no + 1) * batch_size)
313 num_entries_in_batch = batch_size;
315 num_entries_in_batch = num_entries_in_row - batch_no * batch_size;
318 const size_t num_ent_start = offset;
319 const size_t num_ent_end = num_ent_start + bytes_per_lid;
322 const size_t gids_start = num_ent_end;
323 const size_t gids_end = gids_start + num_entries_in_row * bytes_per_gid;
325 const size_t vals_start = gids_end;
327 const size_t shift = batch_no * batch_size;
328 const char*
const num_ent_in = imports.data() + num_ent_start;
329 const char*
const gids_in = imports.data() + gids_start + shift * bytes_per_gid;
330 const char*
const vals_in = imports.data() + vals_start + shift * bytes_per_value;
334 if (
static_cast<size_t>(num_ent_out) != num_entries_in_row)
337 #ifndef KOKKOS_ENABLE_SYCL
339 "*** Error: UnpackCrsMatrixAndCombineFunctor: "
340 "At row %d, number of entries (%d) != number of entries unpacked (%d)\n",
341 (
int) lid_no, (
int) num_entries_in_row, (
int) num_ent_out
344 Kokkos::atomic_compare_exchange_strong(error_code.data(), 0, 23);
347 constexpr
bool matrix_has_sorted_rows =
true;
348 Kokkos::parallel_for(
349 Kokkos::TeamThreadRange(team_member, num_entries_in_batch),
350 KOKKOS_LAMBDA(
const LO& j)
355 distance = j * bytes_per_gid;
365 distance = j * bytes_per_value;
368 if (combine_mode ==
ADD) {
372 const bool use_atomic_updates = atomic;
373 (void)local_matrix.sumIntoValues(
378 matrix_has_sorted_rows,
381 }
else if (combine_mode ==
REPLACE) {
385 const bool use_atomic_updates =
false;
386 (void)local_matrix.replaceValues(
391 matrix_has_sorted_rows,
397 #ifndef KOKKOS_ENABLE_SYCL
399 "*** Error: UnpackCrsMatrixAndCombineFunctor: "
400 "At row %d, an unknown error occurred during unpack\n", (
int) lid_no
403 Kokkos::atomic_compare_exchange_strong(error_code.data(), 0, 31);
408 team_member.team_barrier();
414 auto error_code_h = Kokkos::create_mirror_view_and_copy(
415 Kokkos::HostSpace(), error_code
417 return error_code_h();
422 struct MaxNumEntTag {};
423 struct TotNumEntTag {};
433 template<
class LO,
class DT,
class BDT>
436 typedef Kokkos::View<const size_t*, BDT> num_packets_per_lid_type;
437 typedef Kokkos::View<const size_t*, DT> offsets_type;
438 typedef Kokkos::View<const char*, BDT> input_buffer_type;
441 typedef size_t value_type;
444 num_packets_per_lid_type num_packets_per_lid;
445 offsets_type offsets;
446 input_buffer_type imports;
450 const offsets_type& offsets_in,
451 const input_buffer_type& imports_in) :
452 num_packets_per_lid (num_packets_per_lid_in),
453 offsets (offsets_in),
457 KOKKOS_INLINE_FUNCTION
void
458 operator() (
const MaxNumEntTag,
const LO i, value_type& update)
const {
460 const size_t num_bytes = num_packets_per_lid(i);
463 const char*
const in_buf = imports.data () + offsets(i);
465 const size_t num_ent =
static_cast<size_t> (num_ent_LO);
467 update = (update < num_ent) ? num_ent : update;
471 KOKKOS_INLINE_FUNCTION
void
472 join (
const MaxNumEntTag,
473 volatile value_type& dst,
474 const volatile value_type& src)
const
476 if (dst < src) dst = src;
479 KOKKOS_INLINE_FUNCTION
void
480 operator() (
const TotNumEntTag,
const LO i, value_type& tot_num_ent)
const {
482 const size_t num_bytes = num_packets_per_lid(i);
485 const char*
const in_buf = imports.data () + offsets(i);
487 tot_num_ent +=
static_cast<size_t> (num_ent_LO);
499 template<
class LO,
class DT,
class BDT>
502 const Kokkos::View<const size_t*, BDT>& num_packets_per_lid,
503 const Kokkos::View<const size_t*, DT>& offsets,
504 const Kokkos::View<const char*, BDT>& imports)
506 typedef typename DT::execution_space XS;
507 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<LO>,
508 MaxNumEntTag> range_policy;
512 const LO numRowsToUnpack =
513 static_cast<LO
> (num_packets_per_lid.extent (0));
514 size_t max_num_ent = 0;
515 Kokkos::parallel_reduce (
"Max num entries in CRS",
516 range_policy (0, numRowsToUnpack),
517 functor, max_num_ent);
528 template<
class LO,
class DT,
class BDT>
531 const Kokkos::View<const size_t*, BDT>& num_packets_per_lid,
532 const Kokkos::View<const size_t*, DT>& offsets,
533 const Kokkos::View<const char*, BDT>& imports)
535 typedef typename DT::execution_space XS;
536 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<LO>, TotNumEntTag> range_policy;
537 size_t tot_num_ent = 0;
540 const LO numRowsToUnpack =
541 static_cast<LO
> (num_packets_per_lid.extent (0));
542 Kokkos::parallel_reduce (
"Total num entries in CRS to unpack",
543 range_policy (0, numRowsToUnpack),
544 functor, tot_num_ent);
549 KOKKOS_INLINE_FUNCTION
551 unpackRowCount(
const char imports[],
553 const size_t num_bytes)
559 const size_t p_num_bytes = PT::packValueCount(num_ent_LO);
560 if (p_num_bytes > num_bytes) {
561 return OrdinalTraits<size_t>::invalid();
563 const char*
const in_buf = imports + offset;
564 (void) PT::unpackValue(num_ent_LO, in_buf);
566 return static_cast<size_t>(num_ent_LO);
573 template<
class View1,
class View2>
577 const View1& batches_per_lid,
581 using LO =
typename View2::value_type;
583 for (
size_t i=0; i<batches_per_lid.extent(0); i++)
585 for (
size_t batch_no=0; batch_no<batches_per_lid(i); batch_no++)
587 batch_info(batch, 0) =
static_cast<LO
>(i);
588 batch_info(batch, 1) = batch_no;
592 return batch == batch_info.extent(0);
602 template<
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
605 const LocalMatrix& local_matrix,
607 const Kokkos::View<const char*, BufferDeviceType>& imports,
608 const Kokkos::View<const size_t*, BufferDeviceType>& num_packets_per_lid,
612 using ST =
typename LocalMatrix::value_type;
615 using XS =
typename DT::execution_space;
616 const char prefix[] =
617 "Tpetra::Details::UnpackAndCombineCrsMatrixImpl::"
618 "unpackAndCombineIntoCrsMatrix: ";
620 const size_t num_import_lids =
static_cast<size_t>(import_lids.extent(0));
621 if (num_import_lids == 0) {
628 TEUCHOS_TEST_FOR_EXCEPTION(combine_mode ==
ABSMAX,
629 std::invalid_argument,
630 prefix <<
"ABSMAX combine mode is not yet implemented for a matrix that has a "
631 "static graph (i.e., was constructed with the CrsMatrix constructor "
632 "that takes a const CrsGraph pointer).");
634 TEUCHOS_TEST_FOR_EXCEPTION(combine_mode ==
INSERT,
635 std::invalid_argument,
636 prefix <<
"INSERT combine mode is not allowed if the matrix has a static graph "
637 "(i.e., was constructed with the CrsMatrix constructor that takes a "
638 "const CrsGraph pointer).");
641 TEUCHOS_TEST_FOR_EXCEPTION(!(combine_mode ==
ADD || combine_mode ==
REPLACE),
642 std::invalid_argument,
643 prefix <<
"Invalid combine mode; should never get "
644 "here! Please report this bug to the Tpetra developers.");
647 bool bad_num_import_lids =
648 num_import_lids !=
static_cast<size_t>(num_packets_per_lid.extent(0));
649 TEUCHOS_TEST_FOR_EXCEPTION(bad_num_import_lids,
650 std::invalid_argument,
651 prefix <<
"importLIDs.size() (" << num_import_lids <<
") != "
652 "numPacketsPerLID.size() (" << num_packets_per_lid.extent(0) <<
").");
656 Kokkos::View<size_t*, DT> offsets(
"offsets", num_import_lids+1);
660 size_t max_num_ent = compute_maximum_num_entries<LO,DT>(num_packets_per_lid, offsets, imports);
662 const size_t batch_size = std::min(default_batch_size, max_num_ent);
665 size_t num_batches = 0;
666 Kokkos::View<LO*[2], DT> batch_info(
"", num_batches);
667 Kokkos::View<size_t*, DT> batches_per_lid(
"", num_import_lids);
669 Kokkos::parallel_reduce(
670 Kokkos::RangePolicy<XS, Kokkos::IndexType<size_t>>(0, num_import_lids),
671 KOKKOS_LAMBDA(
const size_t i,
size_t& batches)
673 const size_t num_entries_in_row = unpackRowCount<LO>(
674 imports.data(), offsets(i), num_packets_per_lid(i)
677 (num_entries_in_row <= batch_size) ?
679 num_entries_in_row / batch_size + (num_entries_in_row % batch_size != 0);
680 batches += batches_per_lid(i);
684 Kokkos::resize(batch_info, num_batches);
686 Kokkos::HostSpace host_space;
687 auto batches_per_lid_h = Kokkos::create_mirror_view(host_space, batches_per_lid);
690 auto batch_info_h = Kokkos::create_mirror_view(host_space, batch_info);
700 const bool atomic = XS::concurrency() != 1;
716 using policy = Kokkos::TeamPolicy<XS, Kokkos::IndexType<LO>>;
718 #if defined(KOKKOS_ENABLE_CUDA)
719 constexpr
bool is_cuda = std::is_same<XS, Kokkos::Cuda>::value;
721 constexpr
bool is_cuda =
false;
723 if (!is_cuda || team_size == Teuchos::OrdinalTraits<size_t>::invalid())
725 Kokkos::parallel_for(policy(
static_cast<LO
>(num_batches), Kokkos::AUTO), f);
729 Kokkos::parallel_for(policy(
static_cast<LO
>(num_batches),
static_cast<int>(team_size)), f);
732 auto error_code = f.error();
733 TEUCHOS_TEST_FOR_EXCEPTION(
736 prefix <<
"UnpackCrsMatrixAndCombineFunctor reported error code " << error_code
740 template<
class LocalMatrix,
class BufferDeviceType>
743 const LocalMatrix& local_matrix,
745 const Kokkos::View<const char*, BufferDeviceType>& imports,
746 const Kokkos::View<const size_t*, BufferDeviceType>& num_packets_per_lid,
747 const size_t num_same_ids)
749 using Kokkos::parallel_reduce;
750 typedef typename LocalMatrix::ordinal_type LO;
751 typedef typename LocalMatrix::device_type device_type;
752 typedef typename device_type::execution_space XS;
753 typedef typename Kokkos::View<LO*, device_type>::size_type size_type;
754 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<LO> > range_policy;
755 typedef BufferDeviceType BDT;
761 num_items =
static_cast<LO
>(num_same_ids);
764 parallel_reduce(range_policy(0, num_items),
765 KOKKOS_LAMBDA(
const LO lid,
size_t& update) {
766 update +=
static_cast<size_t>(local_matrix.graph.row_map[lid+1]
767 -local_matrix.graph.row_map[lid]);
773 num_items =
static_cast<LO
>(permute_from_lids.extent(0));
776 parallel_reduce(range_policy(0, num_items),
777 KOKKOS_LAMBDA(
const LO i,
size_t& update) {
778 const LO lid = permute_from_lids(i);
779 update +=
static_cast<size_t> (local_matrix.graph.row_map[lid+1]
780 - local_matrix.graph.row_map[lid]);
787 const size_type np = num_packets_per_lid.extent(0);
788 Kokkos::View<size_t*, device_type> offsets(
"offsets", np+1);
791 compute_total_num_entries<LO, device_type, BDT> (num_packets_per_lid,
799 template<
class LO,
class DT,
class BDT>
801 setupRowPointersForRemotes(
804 const Kokkos::View<const char*, BDT>& imports,
805 const Kokkos::View<const size_t*, BDT>& num_packets_per_lid,
808 using Kokkos::parallel_reduce;
809 typedef typename DT::execution_space XS;
811 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_type> > range_policy;
813 const size_t InvalidNum = OrdinalTraits<size_t>::invalid();
814 const size_type N = num_packets_per_lid.extent(0);
817 parallel_reduce (
"Setup row pointers for remotes",
819 KOKKOS_LAMBDA (
const size_t i,
int& k_error) {
820 typedef typename std::remove_reference< decltype( tgt_rowptr(0) ) >::type atomic_incr_type;
821 const size_t num_bytes = num_packets_per_lid(i);
822 const size_t offset = offsets(i);
823 const size_t num_ent = unpackRowCount<LO> (imports.data(), offset, num_bytes);
824 if (num_ent == InvalidNum) {
827 Kokkos::atomic_fetch_add (&tgt_rowptr (import_lids(i)), atomic_incr_type(num_ent));
835 makeCrsRowPtrFromLengths(
837 const Kokkos::View<size_t*,DT>& new_start_row)
839 using Kokkos::parallel_scan;
840 typedef typename DT::execution_space XS;
841 typedef typename Kokkos::View<size_t*,DT>::size_type size_type;
842 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_type> > range_policy;
843 const size_type N = new_start_row.extent(0);
844 parallel_scan(range_policy(0, N),
845 KOKKOS_LAMBDA(
const size_t& i,
size_t& update,
const bool&
final) {
846 auto cur_val = tgt_rowptr(i);
848 tgt_rowptr(i) = update;
849 new_start_row(i) = tgt_rowptr(i);
856 template<
class LocalMatrix,
class LocalMap>
862 const Kokkos::View<size_t*, typename LocalMap::device_type>& new_start_row,
865 const LocalMatrix& local_matrix,
866 const LocalMap& local_col_map,
867 const size_t num_same_ids,
870 using Kokkos::parallel_for;
873 typedef typename DT::execution_space XS;
874 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_t> > range_policy;
876 parallel_for(range_policy(0, num_same_ids),
877 KOKKOS_LAMBDA(
const size_t i) {
878 typedef typename std::remove_reference< decltype( new_start_row(0) ) >::type atomic_incr_type;
880 const LO src_lid =
static_cast<LO
>(i);
881 size_t src_row = local_matrix.graph.row_map(src_lid);
883 const LO tgt_lid =
static_cast<LO
>(i);
884 const size_t tgt_row = tgt_rowptr(tgt_lid);
886 const size_t nsr = local_matrix.graph.row_map(src_lid+1)
887 - local_matrix.graph.row_map(src_lid);
888 Kokkos::atomic_fetch_add(&new_start_row(tgt_lid), atomic_incr_type(nsr));
890 for (
size_t j=local_matrix.graph.row_map(src_lid);
891 j<local_matrix.graph.row_map(src_lid+1); ++j) {
892 LO src_col = local_matrix.graph.entries(j);
893 tgt_vals(tgt_row + j - src_row) = local_matrix.values(j);
894 tgt_colind(tgt_row + j - src_row) = local_col_map.getGlobalElement(src_col);
895 tgt_pids(tgt_row + j - src_row) = (src_pids(src_col) != my_pid) ? src_pids(src_col) : -1;
901 template<
class LocalMatrix,
class LocalMap>
903 copyDataFromPermuteIDs(
907 const Kokkos::View<size_t*,typename LocalMap::device_type>& new_start_row,
912 const LocalMatrix& local_matrix,
913 const LocalMap& local_col_map,
916 using Kokkos::parallel_for;
919 typedef typename DT::execution_space XS;
920 typedef typename PackTraits<LO>::input_array_type::size_type size_type;
921 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_type> > range_policy;
923 const size_type num_permute_to_lids = permute_to_lids.extent(0);
925 parallel_for(range_policy(0, num_permute_to_lids),
926 KOKKOS_LAMBDA(
const size_t i) {
927 typedef typename std::remove_reference< decltype( new_start_row(0) ) >::type atomic_incr_type;
929 const LO src_lid = permute_from_lids(i);
930 const size_t src_row = local_matrix.graph.row_map(src_lid);
932 const LO tgt_lid = permute_to_lids(i);
933 const size_t tgt_row = tgt_rowptr(tgt_lid);
935 size_t nsr = local_matrix.graph.row_map(src_lid+1)
936 - local_matrix.graph.row_map(src_lid);
937 Kokkos::atomic_fetch_add(&new_start_row(tgt_lid), atomic_incr_type(nsr));
939 for (
size_t j=local_matrix.graph.row_map(src_lid);
940 j<local_matrix.graph.row_map(src_lid+1); ++j) {
941 LO src_col = local_matrix.graph.entries(j);
942 tgt_vals(tgt_row + j - src_row) = local_matrix.values(j);
943 tgt_colind(tgt_row + j - src_row) = local_col_map.getGlobalElement(src_col);
944 tgt_pids(tgt_row + j - src_row) = (src_pids(src_col) != my_pid) ? src_pids(src_col) : -1;
950 template<
typename LocalMatrix,
typename LocalMap,
typename BufferDeviceType>
952 unpackAndCombineIntoCrsArrays2(
956 const Kokkos::View<size_t*,typename LocalMap::device_type>& new_start_row,
959 const Kokkos::View<const char*, BufferDeviceType>& imports,
960 const Kokkos::View<const size_t*, BufferDeviceType>& num_packets_per_lid,
964 const size_t bytes_per_value)
967 using Kokkos::subview;
968 using Kokkos::MemoryUnmanaged;
969 using Kokkos::parallel_reduce;
970 using Kokkos::atomic_fetch_add;
975 typedef typename LocalMatrix::value_type ST;
976 typedef typename DT::execution_space XS;
977 typedef typename Kokkos::View<LO*, DT>::size_type size_type;
978 typedef typename Kokkos::pair<size_type, size_type> slice;
979 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_type> > range_policy;
981 typedef View<int*,DT, MemoryUnmanaged> pids_out_type;
982 typedef View<GO*, DT, MemoryUnmanaged> gids_out_type;
983 typedef View<ST*, DT, MemoryUnmanaged> vals_out_type;
985 const size_t InvalidNum = OrdinalTraits<size_t>::invalid();
988 const size_type num_import_lids = import_lids.size();
991 parallel_reduce (
"Unpack and combine into CRS",
992 range_policy (0, num_import_lids),
993 KOKKOS_LAMBDA (
const size_t i,
int& k_error) {
994 typedef typename std::remove_reference< decltype( new_start_row(0) ) >::type atomic_incr_type;
995 const size_t num_bytes = num_packets_per_lid(i);
996 const size_t offset = offsets(i);
997 if (num_bytes == 0) {
1001 size_t num_ent = unpackRowCount<LO>(imports.data(), offset, num_bytes);
1002 if (num_ent == InvalidNum) {
1006 const LO lcl_row = import_lids(i);
1007 const size_t start_row = atomic_fetch_add(&new_start_row(lcl_row), atomic_incr_type(num_ent));
1008 const size_t end_row = start_row + num_ent;
1010 gids_out_type gids_out = subview(tgt_colind, slice(start_row, end_row));
1011 vals_out_type vals_out = subview(tgt_vals, slice(start_row, end_row));
1012 pids_out_type pids_out = subview(tgt_pids, slice(start_row, end_row));
1014 k_error += unpackRow<ST,LO,GO>(gids_out, pids_out, vals_out,
1015 imports.data(), offset, num_bytes,
1016 num_ent, bytes_per_value);
1019 for (
size_t j = 0; j < static_cast<size_t>(num_ent); ++j) {
1020 const int pid = pids_out(j);
1021 pids_out(j) = (pid != my_pid) ? pid : -1;
1028 template<
typename LocalMatrix,
typename LocalMap,
typename BufferDeviceType>
1031 const LocalMatrix & local_matrix,
1032 const LocalMap & local_col_map,
1034 const Kokkos::View<const char*, BufferDeviceType>& imports,
1035 const Kokkos::View<const size_t*, BufferDeviceType>& num_packets_per_lid,
1043 const size_t num_same_ids,
1044 const size_t tgt_num_rows,
1045 const size_t tgt_num_nonzeros,
1046 const int my_tgt_pid,
1047 const size_t bytes_per_value)
1050 using Kokkos::subview;
1051 using Kokkos::parallel_for;
1052 using Kokkos::MemoryUnmanaged;
1056 typedef typename DT::execution_space XS;
1057 typedef typename Kokkos::View<LO*, DT>::size_type size_type;
1058 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_t> > range_policy;
1059 typedef BufferDeviceType BDT;
1061 const char prefix[] =
"unpackAndCombineIntoCrsArrays: ";
1063 const size_t N = tgt_num_rows;
1067 const int my_pid = my_tgt_pid;
1070 parallel_for(range_policy(0, N+1),
1071 KOKKOS_LAMBDA(
const size_t i) {
1077 parallel_for(range_policy(0, num_same_ids),
1078 KOKKOS_LAMBDA(
const size_t i) {
1079 const LO tgt_lid =
static_cast<LO
>(i);
1080 const LO src_lid =
static_cast<LO
>(i);
1081 tgt_rowptr(tgt_lid) = local_matrix.graph.row_map(src_lid+1)
1082 - local_matrix.graph.row_map(src_lid);
1087 const size_type num_permute_to_lids = permute_to_lids.extent(0);
1088 parallel_for(range_policy(0, num_permute_to_lids),
1089 KOKKOS_LAMBDA(
const size_t i) {
1090 const LO tgt_lid = permute_to_lids(i);
1091 const LO src_lid = permute_from_lids(i);
1092 tgt_rowptr(tgt_lid) = local_matrix.graph.row_map(src_lid+1)
1093 - local_matrix.graph.row_map(src_lid);
1098 const size_type num_import_lids = import_lids.extent(0);
1099 View<size_t*, DT> offsets(
"offsets", num_import_lids+1);
1102 #ifdef HAVE_TPETRA_DEBUG
1104 auto nth_offset_h = getEntryOnHost(offsets, num_import_lids);
1105 const bool condition =
1106 nth_offset_h !=
static_cast<size_t>(imports.extent (0));
1107 TEUCHOS_TEST_FOR_EXCEPTION
1108 (condition, std::logic_error, prefix
1109 <<
"The final offset in bytes " << nth_offset_h
1110 <<
" != imports.size() = " << imports.extent(0)
1111 <<
". Please report this bug to the Tpetra developers.");
1117 setupRowPointersForRemotes<LO,DT,BDT>(tgt_rowptr,
1118 import_lids, imports, num_packets_per_lid, offsets);
1119 TEUCHOS_TEST_FOR_EXCEPTION(k_error != 0, std::logic_error, prefix
1120 <<
" Error transferring data to target row pointers. "
1121 "Please report this bug to the Tpetra developers.");
1125 View<size_t*, DT> new_start_row (
"new_start_row", N+1);
1128 makeCrsRowPtrFromLengths(tgt_rowptr, new_start_row);
1131 copyDataFromSameIDs(tgt_colind, tgt_pids, tgt_vals, new_start_row,
1132 tgt_rowptr, src_pids, local_matrix, local_col_map, num_same_ids, my_pid);
1134 copyDataFromPermuteIDs(tgt_colind, tgt_pids, tgt_vals, new_start_row,
1135 tgt_rowptr, src_pids, permute_to_lids, permute_from_lids,
1136 local_matrix, local_col_map, my_pid);
1138 if (imports.extent(0) <= 0) {
1142 int unpack_err = unpackAndCombineIntoCrsArrays2(tgt_colind, tgt_pids,
1143 tgt_vals, new_start_row, offsets, import_lids, imports, num_packets_per_lid,
1144 local_matrix, local_col_map, my_pid, bytes_per_value);
1145 TEUCHOS_TEST_FOR_EXCEPTION(
1146 unpack_err != 0, std::logic_error, prefix <<
"unpack loop failed. This "
1147 "should never happen. Please report this bug to the Tpetra developers.");
1193 template<
typename ST,
typename LO,
typename GO,
typename Node>
1197 const Teuchos::ArrayView<const char>& imports,
1198 const Teuchos::ArrayView<const size_t>& numPacketsPerLID,
1199 const Teuchos::ArrayView<const LO>& importLIDs,
1204 typedef typename Node::device_type device_type;
1206 static_assert (std::is_same<device_type, typename local_matrix_device_type::device_type>::value,
1207 "Node::device_type and LocalMatrix::device_type must be the same.");
1210 device_type outputDevice;
1215 auto num_packets_per_lid_d =
1217 numPacketsPerLID.size(),
true,
"num_packets_per_lid");
1219 auto import_lids_d =
1221 importLIDs.size(),
true,
"import_lids");
1225 imports.size(),
true,
"imports");
1228 auto local_col_map = sourceMatrix.
getColMap()->getLocalMap();
1239 UnpackAndCombineCrsMatrixImpl::unpackAndCombineIntoCrsMatrix(
1240 local_matrix, local_col_map, imports_d, num_packets_per_lid_d,
1241 import_lids_d, combineMode);
1245 template<
typename ST,
typename LO,
typename GO,
typename NT>
1247 unpackCrsMatrixAndCombineNew(
1249 Kokkos::DualView<
char*,
1251 Kokkos::DualView<
size_t*,
1253 const Kokkos::DualView<
const LO*,
1261 using device_type =
typename crs_matrix_type::device_type;
1262 using local_matrix_device_type =
typename crs_matrix_type::local_matrix_device_type;
1263 using buffer_device_type =
typename dist_object_type::buffer_device_type;
1266 (std::is_same<device_type, typename local_matrix_device_type::device_type>::value,
1267 "crs_matrix_type::device_type and local_matrix_device_type::device_type "
1268 "must be the same.");
1270 if (numPacketsPerLID.need_sync_device()) {
1271 numPacketsPerLID.sync_device ();
1273 auto num_packets_per_lid_d = numPacketsPerLID.view_device ();
1275 TEUCHOS_ASSERT( ! importLIDs.need_sync_device () );
1276 auto import_lids_d = importLIDs.view_device ();
1278 if (imports.need_sync_device()) {
1279 imports.sync_device ();
1281 auto imports_d = imports.view_device ();
1284 auto local_col_map = sourceMatrix.
getColMap ()->getLocalMap ();
1285 typedef decltype (local_col_map) local_map_type;
1287 UnpackAndCombineCrsMatrixImpl::unpackAndCombineIntoCrsMatrix<
1288 local_matrix_device_type,
1291 > (local_matrix, local_col_map, imports_d, num_packets_per_lid_d,
1292 import_lids_d, combineMode);
1350 template<
typename Scalar,
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
1354 const Teuchos::ArrayView<const LocalOrdinal> &importLIDs,
1355 const Teuchos::ArrayView<const char> &imports,
1356 const Teuchos::ArrayView<const size_t>& numPacketsPerLID,
1360 const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
1361 const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs)
1363 using Kokkos::MemoryUnmanaged;
1365 typedef typename Node::device_type DT;
1367 const char prefix[] =
"unpackAndCombineWithOwningPIDsCount: ";
1369 TEUCHOS_TEST_FOR_EXCEPTION
1370 (permuteToLIDs.size () != permuteFromLIDs.size (), std::invalid_argument,
1371 prefix <<
"permuteToLIDs.size() = " << permuteToLIDs.size () <<
" != "
1372 "permuteFromLIDs.size() = " << permuteFromLIDs.size() <<
".");
1376 TEUCHOS_TEST_FOR_EXCEPTION
1377 (! locallyIndexed, std::invalid_argument, prefix <<
"The input "
1378 "CrsMatrix 'sourceMatrix' must be locally indexed.");
1379 TEUCHOS_TEST_FOR_EXCEPTION
1380 (importLIDs.size () != numPacketsPerLID.size (), std::invalid_argument,
1381 prefix <<
"importLIDs.size() = " << importLIDs.size () <<
" != "
1382 "numPacketsPerLID.size() = " << numPacketsPerLID.size () <<
".");
1385 auto permute_from_lids_d =
1387 permuteFromLIDs.getRawPtr (),
1388 permuteFromLIDs.size (),
true,
1389 "permute_from_lids");
1392 imports.getRawPtr (),
1393 imports.size (),
true,
1395 auto num_packets_per_lid_d =
1397 numPacketsPerLID.getRawPtr (),
1398 numPacketsPerLID.size (),
true,
1399 "num_packets_per_lid");
1401 return UnpackAndCombineCrsMatrixImpl::unpackAndCombineWithOwningPIDsCount(
1402 local_matrix, permute_from_lids_d, imports_d,
1403 num_packets_per_lid_d, numSameIDs);
1420 template<
typename Scalar,
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
1424 const Teuchos::ArrayView<const LocalOrdinal>& importLIDs,
1425 const Teuchos::ArrayView<const char>& imports,
1426 const Teuchos::ArrayView<const size_t>& numPacketsPerLID,
1429 const size_t numSameIDs,
1430 const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
1431 const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs,
1432 size_t TargetNumRows,
1433 size_t TargetNumNonzeros,
1434 const int MyTargetPID,
1435 const Teuchos::ArrayView<size_t>& CRS_rowptr,
1436 const Teuchos::ArrayView<GlobalOrdinal>& CRS_colind,
1438 const Teuchos::ArrayView<const int>& SourcePids,
1439 Teuchos::Array<int>& TargetPids)
1446 using Teuchos::ArrayView;
1447 using Teuchos::outArg;
1448 using Teuchos::REDUCE_MAX;
1449 using Teuchos::reduceAll;
1451 typedef LocalOrdinal LO;
1453 typedef typename Node::device_type DT;
1456 typedef typename matrix_type::impl_scalar_type ST;
1457 typedef typename ArrayView<const LO>::size_type size_type;
1459 const char prefix[] =
"Tpetra::Details::unpackAndCombineIntoCrsArrays: ";
1461 TEUCHOS_TEST_FOR_EXCEPTION(
1462 TargetNumRows + 1 !=
static_cast<size_t> (CRS_rowptr.size ()),
1463 std::invalid_argument, prefix <<
"CRS_rowptr.size() = " <<
1464 CRS_rowptr.size () <<
"!= TargetNumRows+1 = " << TargetNumRows+1 <<
".");
1466 TEUCHOS_TEST_FOR_EXCEPTION(
1467 permuteToLIDs.size () != permuteFromLIDs.size (), std::invalid_argument,
1468 prefix <<
"permuteToLIDs.size() = " << permuteToLIDs.size ()
1469 <<
"!= permuteFromLIDs.size() = " << permuteFromLIDs.size () <<
".");
1470 const size_type numImportLIDs = importLIDs.size ();
1472 TEUCHOS_TEST_FOR_EXCEPTION(
1473 numImportLIDs != numPacketsPerLID.size (), std::invalid_argument,
1474 prefix <<
"importLIDs.size() = " << numImportLIDs <<
" != "
1475 "numPacketsPerLID.size() = " << numPacketsPerLID.size() <<
".");
1478 if (
static_cast<size_t> (TargetPids.size ()) != TargetNumNonzeros) {
1479 TargetPids.resize (TargetNumNonzeros);
1481 TargetPids.assign (TargetNumNonzeros, -1);
1485 auto local_col_map = sourceMatrix.
getColMap()->getLocalMap();
1489 auto import_lids_d =
1491 importLIDs.size(),
true,
"import_lids");
1495 imports.size(),
true,
"imports");
1497 auto num_packets_per_lid_d =
1499 numPacketsPerLID.size(),
true,
"num_packets_per_lid");
1501 auto permute_from_lids_d =
1503 permuteFromLIDs.size(),
true,
"permute_from_lids");
1505 auto permute_to_lids_d =
1507 permuteToLIDs.size(),
true,
"permute_to_lids");
1511 CRS_rowptr.size(),
true,
"crs_rowptr");
1515 CRS_colind.size(),
true,
"crs_colidx");
1517 #ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE
1518 static_assert (! std::is_same<
1519 typename std::remove_const<
1520 typename std::decay<
1524 std::complex<double> >::value,
1525 "CRS_vals::value_type is std::complex<double>; this should never happen"
1526 ", since std::complex does not work in Kokkos::View objects.");
1531 CRS_vals.size(),
true,
"crs_vals");
1533 #ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE
1534 static_assert (! std::is_same<
1535 typename decltype (crs_vals_d)::non_const_value_type,
1536 std::complex<double> >::value,
1537 "crs_vals_d::non_const_value_type is std::complex<double>; this should "
1538 "never happen, since std::complex does not work in Kokkos::View objects.");
1543 SourcePids.size(),
true,
"src_pids");
1547 TargetPids.size(),
true,
"tgt_pids");
1549 size_t bytes_per_value = 0;
1563 size_t bytes_per_value_l = 0;
1564 if (local_matrix.values.extent(0) > 0) {
1565 const ST& val = local_matrix.values(0);
1568 const ST& val = crs_vals_d(0);
1571 Teuchos::reduceAll<int, size_t>(*(sourceMatrix.
getComm()),
1572 Teuchos::REDUCE_MAX,
1574 outArg(bytes_per_value));
1577 #ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE
1578 static_assert (! std::is_same<
1579 typename decltype (crs_vals_d)::non_const_value_type,
1580 std::complex<double> >::value,
1581 "crs_vals_d::non_const_value_type is std::complex<double>; this should "
1582 "never happen, since std::complex does not work in Kokkos::View objects.");
1585 UnpackAndCombineCrsMatrixImpl::unpackAndCombineIntoCrsArrays(
1586 local_matrix, local_col_map, import_lids_d, imports_d,
1587 num_packets_per_lid_d, permute_to_lids_d, permute_from_lids_d,
1588 crs_rowptr_d, crs_colind_d, crs_vals_d, src_pids_d, tgt_pids_d,
1589 numSameIDs, TargetNumRows, TargetNumNonzeros, MyTargetPID,
1593 typename decltype(crs_rowptr_d)::HostMirror crs_rowptr_h(
1594 CRS_rowptr.getRawPtr(), CRS_rowptr.size());
1597 typename decltype(crs_colind_d)::HostMirror crs_colind_h(
1598 CRS_colind.getRawPtr(), CRS_colind.size());
1601 typename decltype(crs_vals_d)::HostMirror crs_vals_h(
1602 CRS_vals.getRawPtr(), CRS_vals.size());
1605 typename decltype(tgt_pids_d)::HostMirror tgt_pids_h(
1606 TargetPids.getRawPtr(), TargetPids.size());
1614 #define TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_INSTANT( ST, LO, GO, NT ) \
1616 Details::unpackCrsMatrixAndCombine<ST, LO, GO, NT> ( \
1617 const CrsMatrix<ST, LO, GO, NT>&, \
1618 const Teuchos::ArrayView<const char>&, \
1619 const Teuchos::ArrayView<const size_t>&, \
1620 const Teuchos::ArrayView<const LO>&, \
1624 Details::unpackCrsMatrixAndCombineNew<ST, LO, GO, NT> ( \
1625 const CrsMatrix<ST, LO, GO, NT>&, \
1626 Kokkos::DualView<char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>, \
1627 Kokkos::DualView<size_t*, typename DistObject<char, LO, GO, NT>::buffer_device_type>, \
1628 const Kokkos::DualView<const LO*, typename DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1630 const CombineMode); \
1632 Details::unpackAndCombineIntoCrsArrays<ST, LO, GO, NT> ( \
1633 const CrsMatrix<ST, LO, GO, NT> &, \
1634 const Teuchos::ArrayView<const LO>&, \
1635 const Teuchos::ArrayView<const char>&, \
1636 const Teuchos::ArrayView<const size_t>&, \
1638 const CombineMode, \
1640 const Teuchos::ArrayView<const LO>&, \
1641 const Teuchos::ArrayView<const LO>&, \
1645 const Teuchos::ArrayView<size_t>&, \
1646 const Teuchos::ArrayView<GO>&, \
1647 const Teuchos::ArrayView<CrsMatrix<ST, LO, GO, NT>::impl_scalar_type>&, \
1648 const Teuchos::ArrayView<const int>&, \
1649 Teuchos::Array<int>&); \
1651 Details::unpackAndCombineWithOwningPIDsCount<ST, LO, GO, NT> ( \
1652 const CrsMatrix<ST, LO, GO, NT> &, \
1653 const Teuchos::ArrayView<const LO> &, \
1654 const Teuchos::ArrayView<const char> &, \
1655 const Teuchos::ArrayView<const size_t>&, \
1659 const Teuchos::ArrayView<const LO>&, \
1660 const Teuchos::ArrayView<const LO>&);
Declaration of the Tpetra::CrsMatrix class.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types,...
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
Declaration and definition of Tpetra::Details::getEntryOnHost.
size_t compute_total_num_entries(const Kokkos::View< const size_t *, BDT > &num_packets_per_lid, const Kokkos::View< const size_t *, DT > &offsets, const Kokkos::View< const char *, BDT > &imports)
Total number of entries in any row of the packed matrix.
void unpackAndCombineIntoCrsMatrix(const LocalMatrix &local_matrix, const LocalMap &local_map, const Kokkos::View< const char *, BufferDeviceType > &imports, const Kokkos::View< const size_t *, BufferDeviceType > &num_packets_per_lid, const typename PackTraits< typename LocalMap::local_ordinal_type >::input_array_type import_lids, const Tpetra::CombineMode combine_mode)
Perform the unpack operation for the matrix.
size_t compute_maximum_num_entries(const Kokkos::View< const size_t *, BDT > &num_packets_per_lid, const Kokkos::View< const size_t *, DT > &offsets, const Kokkos::View< const char *, BDT > &imports)
Maximum number of entries in any row of the packed matrix.
bool compute_batch_info(const View1 &batches_per_lid, View2 &batch_info)
Compute the index and batch number associated with each batch.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
bool isLocallyIndexed() const override
Whether the matrix is locally indexed on the calling process.
typename row_matrix_type::impl_scalar_type impl_scalar_type
The type used internally in place of Scalar.
static size_t hierarchicalUnpackBatchSize()
Size of batch for hierarchical unpacking.
static size_t hierarchicalUnpackTeamSize()
Size of team for hierarchical unpacking.
"Local" part of Map suitable for Kokkos kernels.
KOKKOS_INLINE_FUNCTION LocalOrdinal getLocalElement(const GlobalOrdinal globalIndex) const
Get the local index corresponding to the given global index. (device only)
LocalOrdinal local_ordinal_type
The type of local indices.
GlobalOrdinal global_ordinal_type
The type of global indices.
DeviceType device_type
The device type.
Kokkos::parallel_reduce functor to determine the number of entries (to unpack) in a KokkosSparse::Crs...
Base class for distributed Tpetra objects that support data redistribution.
Implementation details of Tpetra.
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks.
void unpackCrsMatrixAndCombine(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, const Teuchos::ArrayView< const char > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &importLIDs, size_t constantNumPackets, CombineMode combineMode)
Unpack the imported column indices and values, and combine into matrix.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
CombineMode
Rule for combining data in an Import or Export.
@ REPLACE
Replace existing values with new values.
@ ABSMAX
Replace old value with maximum of magnitudes of old and new values.
@ INSERT
Insert new values that don't currently exist.
Traits class for packing / unpacking data of type T.
static KOKKOS_INLINE_FUNCTION Kokkos::pair< int, size_t > unpackArray(value_type outBuf[], const char inBuf[], const size_t numEnt)
Unpack numEnt value_type entries from the given input buffer of bytes, to the given output buffer of ...
static KOKKOS_INLINE_FUNCTION size_t unpackValue(T &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
Kokkos::View< value_type *, Kokkos::AnonymousSpace > output_array_type
The type of an output array of value_type.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const T &)
Number of bytes required to pack or unpack the given value of type value_type.
Kokkos::View< const value_type *, Kokkos::AnonymousSpace > input_array_type
The type of an input array of value_type.
Unpacks and combines a single row of the CrsMatrix.
int error() const
Host function for getting the error.