40#ifndef TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
41#define TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
43#include "TpetraCore_config.h"
44#include "Teuchos_Array.hpp"
45#include "Teuchos_ArrayView.hpp"
89namespace PackCrsMatrixImpl {
97template<
class OutputOffsetsViewType,
99 class InputOffsetsViewType,
100 class InputLocalRowIndicesViewType,
101 class InputLocalRowPidsViewType,
103#ifdef HAVE_TPETRA_DEBUG
111 typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
112 typedef typename CountsViewType::non_const_value_type count_type;
113 typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
114 typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
115 typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
117 typedef typename OutputOffsetsViewType::device_type device_type;
118 static_assert (std::is_same<
typename CountsViewType::device_type::execution_space,
119 typename device_type::execution_space>::value,
120 "OutputOffsetsViewType and CountsViewType must have the same execution space.");
121 static_assert (Kokkos::Impl::is_view<OutputOffsetsViewType>::value,
122 "OutputOffsetsViewType must be a Kokkos::View.");
123 static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
124 "OutputOffsetsViewType must be a nonconst Kokkos::View.");
125 static_assert (std::is_integral<output_offset_type>::value,
126 "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
127 static_assert (Kokkos::Impl::is_view<CountsViewType>::value,
128 "CountsViewType must be a Kokkos::View.");
129 static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
130 "CountsViewType must be a nonconst Kokkos::View.");
131 static_assert (std::is_integral<count_type>::value,
132 "The type of each entry of CountsViewType must be a built-in integer type.");
133 static_assert (Kokkos::Impl::is_view<InputOffsetsViewType>::value,
134 "InputOffsetsViewType must be a Kokkos::View.");
135 static_assert (std::is_integral<input_offset_type>::value,
136 "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
137 static_assert (Kokkos::Impl::is_view<InputLocalRowIndicesViewType>::value,
138 "InputLocalRowIndicesViewType must be a Kokkos::View.");
139 static_assert (std::is_integral<local_row_index_type>::value,
140 "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
163 const size_t numRowsToPack =
static_cast<size_t> (lclRowInds_.extent (0));
165 if (
numRowsToPack !=
static_cast<size_t> (counts_.extent (0))) {
166 std::ostringstream
os;
168 <<
" != counts.extent(0) = " << counts_.extent (0)
173 static_cast<size_t> (outputOffsets_.extent (0))) {
174 std::ostringstream
os;
176 <<
" != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
185 output_offset_type& update,
186 const bool final)
const
189 if (
curInd <
static_cast<local_row_index_type
> (0)) {
197 if (
curInd >=
static_cast<local_row_index_type
> (outputOffsets_.extent (0))) {
202 outputOffsets_(
curInd) = update;
205 if (
curInd <
static_cast<local_row_index_type
> (counts_.extent (0))) {
207 if (
static_cast<size_t> (
lclRow + 1) >=
static_cast<size_t> (rowOffsets_.extent (0)) ||
208 static_cast<local_row_index_type
> (
lclRow) <
static_cast<local_row_index_type
> (0)) {
216 const count_type
count =
217 static_cast<count_type
> (rowOffsets_(
lclRow+1) - rowOffsets_(
lclRow));
224 static_cast<count_type
> (0) :
225 sizeOfLclCount_ +
count * (sizeOfGblColInd_ +
226 (lclRowPids_.size() > 0 ? sizeOfPid_ : 0) +
242 auto error_h = Kokkos::create_mirror_view (error_);
243 Kokkos::deep_copy (
error_h, error_);
250 typename InputOffsetsViewType::const_type rowOffsets_;
251 typename InputLocalRowIndicesViewType::const_type lclRowInds_;
252 typename InputLocalRowPidsViewType::const_type lclRowPids_;
253 count_type sizeOfLclCount_;
254 count_type sizeOfGblColInd_;
255 count_type sizeOfPid_;
256 count_type sizeOfValue_;
257 Kokkos::View<int, device_type> error_;
274typename CountsViewType::non_const_value_type
280 const typename CountsViewType::non_const_value_type
sizeOfLclCount,
282 const typename CountsViewType::non_const_value_type
sizeOfPid,
283 const typename CountsViewType::non_const_value_type
sizeOfValue)
287 typename InputLocalRowIndicesViewType::const_type,
288 typename InputLocalRowPidsViewType::const_type>
functor_type;
289 typedef typename CountsViewType::non_const_value_type count_type;
290 typedef typename OutputOffsetsViewType::size_type size_type;
291 typedef typename OutputOffsetsViewType::execution_space execution_space;
292 typedef typename functor_type::local_row_index_type LO;
293 typedef Kokkos::RangePolicy<execution_space, LO> range_type;
294 const char prefix[] =
"computeNumPacketsAndOffsets: ";
296 count_type
count = 0;
304 (
rowOffsets.extent (0) <=
static_cast<size_type
> (1),
305 std::invalid_argument,
prefix <<
"There is at least one row to pack, "
306 "but the matrix has no rows. lclRowInds.extent(0) = " <<
311 static_cast<size_type
> (
numRowsToPack + 1), std::invalid_argument,
312 prefix <<
"Output dimension does not match number of rows to pack. "
314 <<
" != lclRowInds.extent(0) + 1 = "
329 (
errCode != 0, std::runtime_error,
prefix <<
"parallel_scan error code "
339 errStr = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
345 <<
total <<
"." << std::endl;
349 os <<
"outputOffsets: [";
356 os <<
"]" << std::endl;
364 os <<
"]" << std::endl;
378 using Tpetra::Details::getEntryOnHost;
379 return static_cast<count_type
> (getEntryOnHost (
outputOffsets,
399template<
class ST,
class ColumnMap,
class BufferDeviceType>
401Kokkos::pair<int, size_t>
403 const Kokkos::View<char*, BufferDeviceType>& exports,
409 const size_t num_bytes_per_value,
410 const bool pack_pids)
412 using Kokkos::subview;
413 using LO =
typename ColumnMap::local_ordinal_type;
414 using GO =
typename ColumnMap::global_ordinal_type;
432 static_cast<size_t> (0);
464 error_code +=
p.first;
468 if (error_code != 0) {
480template<
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
481struct PackCrsMatrixFunctor {
484 typedef typename local_matrix_device_type::value_type ST;
487 typedef typename local_matrix_device_type::device_type DT;
489 typedef Kokkos::View<const size_t*, BufferDeviceType>
490 num_packets_per_lid_view_type;
491 typedef Kokkos::View<const size_t*, BufferDeviceType> offsets_view_type;
492 typedef Kokkos::View<char*, BufferDeviceType> exports_view_type;
496 typedef typename num_packets_per_lid_view_type::non_const_value_type
498 typedef typename offsets_view_type::non_const_value_type
500 typedef Kokkos::pair<int, LO> value_type;
502 static_assert (std::is_same<LO, typename local_matrix_device_type::ordinal_type>::value,
503 "local_map_type::local_ordinal_type and "
504 "local_matrix_device_type::ordinal_type must be the same.");
506 local_matrix_device_type local_matrix;
507 local_map_type local_col_map;
508 exports_view_type exports;
509 num_packets_per_lid_view_type num_packets_per_lid;
510 export_lids_view_type export_lids;
511 source_pids_view_type source_pids;
512 offsets_view_type offsets;
513 size_t num_bytes_per_value;
537 static_cast<LO
> (local_matrix.graph.row_map.extent (0));
540 std::logic_error,
"local_matrix.graph.row_map.extent(0) = "
546 using ::Tpetra::Details::OrdinalTraits;
550 KOKKOS_INLINE_FUNCTION
void
551 join (
volatile value_type& dst,
const volatile value_type& src)
const
555 if (src.first != 0 && dst.first == 0) {
560 KOKKOS_INLINE_FUNCTION
561 void operator() (
const LO i, value_type& dst)
const
563 const size_t offset = offsets[i];
564 const LO export_lid = export_lids[i];
565 const size_t buf_size = exports.size();
566 const size_t num_bytes = num_packets_per_lid(i);
567 const size_t num_ent =
568 static_cast<size_t> (local_matrix.graph.row_map[export_lid+1]
569 - local_matrix.graph.row_map[export_lid]);
579 if (export_lid >= local_matrix.numRows ()) {
580 if (dst.first != 0) {
581 dst = Kokkos::make_pair (1, i);
585 else if ((offset > buf_size || offset + num_bytes > buf_size)) {
586 if (dst.first != 0) {
587 dst = Kokkos::make_pair (2, i);
597 const auto row_beg = local_matrix.graph.row_map[export_lid];
598 const auto row_end = local_matrix.graph.row_map[export_lid + 1];
599 auto vals_in = subview (local_matrix.values,
600 Kokkos::make_pair (row_beg, row_end));
601 auto lids_in = subview (local_matrix.graph.entries,
602 Kokkos::make_pair (row_beg, row_end));
603 typedef local_map_type LMT;
604 typedef BufferDeviceType BDT;
605 auto p = packCrsMatrixRow<ST, LMT, BDT> (local_col_map, exports, lids_in,
606 source_pids, vals_in, offset,
607 num_ent, num_bytes_per_value,
609 int error_code_this_row = p.first;
610 size_t num_bytes_packed_this_row = p.second;
611 if (error_code_this_row != 0) {
612 if (dst.first != 0) {
613 dst = Kokkos::make_pair (error_code_this_row, i);
616 else if (num_bytes_packed_this_row != num_bytes) {
617 if (dst.first != 0) {
618 dst = Kokkos::make_pair (3, i);
631template<
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
635 const Kokkos::View<char*, BufferDeviceType>& exports,
639 const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
640 const size_t num_bytes_per_value,
641 const bool pack_pids)
644 using DT =
typename LocalMatrix::device_type;
645 using range_type = Kokkos::RangePolicy<typename DT::execution_space, LO>;
646 const char prefix[] =
"Tpetra::Details::do_pack: ";
648 if (export_lids.extent (0) != 0) {
650 (
static_cast<size_t> (offsets.extent (0)) !=
651 static_cast<size_t> (export_lids.extent (0) + 1),
652 std::invalid_argument,
prefix <<
"offsets.extent(0) = "
653 << offsets.extent (0) <<
" != export_lids.extent(0) (= "
654 << export_lids.extent (0) <<
") + 1.");
656 (export_lids.extent (0) != num_packets_per_lid.extent (0),
657 std::invalid_argument,
prefix <<
"export_lids.extent(0) = " <<
658 export_lids.extent (0) <<
" != num_packets_per_lid.extent(0) = "
659 << num_packets_per_lid.extent (0) <<
".");
664 (pack_pids && exports.extent (0) != 0 &&
665 source_pids.extent (0) == 0, std::invalid_argument,
prefix <<
666 "pack_pids is true, and exports.extent(0) = " <<
667 exports.extent (0) <<
" != 0, meaning that we need to pack at "
668 "least one matrix entry, but source_pids.extent(0) = 0.");
674 num_packets_per_lid, export_lids,
675 source_pids, offsets, num_bytes_per_value,
678 typename pack_functor_type::value_type
result;
679 range_type
range (0, num_packets_per_lid.extent (0));
686 (
true, std::runtime_error,
prefix <<
"PackCrsMatrixFunctor "
687 "reported error code " <<
result.first <<
" for the first "
688 "bad row " <<
result.second <<
".");
721template<
typename ST,
typename LO,
typename GO,
typename NT,
typename BufferDeviceType>
724 Kokkos::DualView<char*, BufferDeviceType>& exports,
725 const Kokkos::View<size_t*, BufferDeviceType>& num_packets_per_lid,
726 const Kokkos::View<const LO*, BufferDeviceType>& export_lids,
727 const Kokkos::View<const int*, typename NT::device_type>&
export_pids,
729 const bool pack_pids)
732 "Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix",
737 typedef Kokkos::DualView<char*, BufferDeviceType> exports_view_type;
738 const char prefix[] =
"Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix: ";
739 constexpr bool debug =
false;
741 auto local_matrix =
sourceMatrix.getLocalMatrixDevice ();
742 auto local_col_map =
sourceMatrix.getColMap ()->getLocalMap ();
750 static_cast<size_t> (export_lids.extent (0));
753 static_cast<size_t> (num_packets_per_lid.extent (0)),
754 std::invalid_argument,
prefix <<
"num_export_lids.extent(0) = "
756 << num_packets_per_lid.extent (0) <<
".");
759 (num_packets_per_lid.data () ==
NULL, std::invalid_argument,
761 "num_packets_per_lid.data() = "
762 << num_packets_per_lid.data () <<
" == NULL.");
769 size_t num_bytes_per_value = 0;
785 if (local_matrix.values.extent(0) > 0) {
786 const ST&
val = local_matrix.values(0);
789 using Teuchos::reduceAll;
793 Teuchos::outArg (num_bytes_per_value));
797 exports = exports_view_type (
"exports", 0);
807 computeNumPacketsAndOffsets (offsets, num_packets_per_lid,
808 local_matrix.graph.row_map, export_lids,
814 if (
count >
static_cast<size_t> (exports.extent (0))) {
815 exports = exports_view_type (
"exports",
count);
817 std::ostringstream
os;
818 os <<
"*** exports resized to " <<
count << std::endl;
819 std::cerr <<
os.str ();
823 std::ostringstream
os;
824 os <<
"*** count: " <<
count <<
", exports.extent(0): "
825 << exports.extent (0) << std::endl;
826 std::cerr <<
os.str ();
833 (pack_pids && exports.extent (0) != 0 &&
835 "pack_pids is true, and exports.extent(0) = " <<
836 exports.extent (0) <<
" != 0, meaning that we need to pack at least "
837 "one matrix entry, but export_pids.extent(0) = 0.");
839 typedef typename std::decay<
decltype (local_matrix)>::type
840 local_matrix_device_type;
841 typedef typename std::decay<
decltype (local_col_map)>::type
844 exports.modify_device ();
847 (local_matrix, local_col_map,
exports_d, num_packets_per_lid,
848 export_lids,
export_pids, offsets, num_bytes_per_value,
855template<
typename ST,
typename LO,
typename GO,
typename NT>
858 Teuchos::Array<char>& exports,
860 const Teuchos::ArrayView<const LO>&
exportLIDs,
864 using device_type =
typename local_matrix_device_type::device_type;
866 using host_exec_space =
typename Kokkos::View<size_t*, device_type>::HostMirror::execution_space;
867 using host_dev_type = Kokkos::Device<host_exec_space, Kokkos::HostSpace>;
877 "num_packets_per_lid");
891 Kokkos::DualView<char*, buffer_device_type>
exports_dv;
892 constexpr bool pack_pids =
false;
893 PackCrsMatrixImpl::packCrsMatrix<ST, LO, GO, NT, buffer_device_type> (
909 if (
static_cast<size_t> (exports.size ()) !=
910 static_cast<size_t> (
exports_dv.extent (0))) {
913 Kokkos::View<char*, host_dev_type>
exports_h (exports.getRawPtr (),
918template<
typename ST,
typename LO,
typename GO,
typename NT>
931 Kokkos::View<int*, device_type>
exportPIDs_d (
"exportPIDs", 0);
932 constexpr bool pack_pids =
false;
945 "Tpetra::Details::packCrsMatrixNew",
948 PackCrsMatrixImpl::packCrsMatrix<ST,LO,GO,NT,buffer_device_type> (
953template<
typename ST,
typename LO,
typename GO,
typename NT>
958 const Teuchos::ArrayView<const LO>&
exportLIDs,
959 const Teuchos::ArrayView<const int>&
sourcePIDs,
964 typedef typename Kokkos::DualView<char*, buffer_device_type>::t_host::execution_space host_exec_space;
965 typedef Kokkos::Device<host_exec_space, Kokkos::HostSpace>
host_dev_type;
967 typename local_matrix_device_type::device_type
outputDevice;
970 std::unique_ptr<std::string>
prefix;
972 const int myRank = [&] () {
974 if (
map.get () ==
nullptr) {
977 auto comm =
map->getComm ();
978 if (comm.get () ==
nullptr) {
981 return comm->getRank ();
983 std::ostringstream
os;
984 os <<
"Proc " <<
myRank <<
": packCrsMatrixWithOwningPIDs: ";
985 prefix = std::unique_ptr<std::string> (
new std::string (
os.str ()));
987 std::ostringstream
os2;
989 std::cerr <<
os2.str ();
1000 "num_packets_per_lid");
1016 constexpr bool pack_pids =
true;
1018 PackCrsMatrixImpl::packCrsMatrix
1022 catch (std::exception&
e) {
1024 std::ostringstream
os;
1025 os << *
prefix <<
"PackCrsMatrixImpl::packCrsMatrix threw: "
1026 <<
e.what () << std::endl;
1027 std::cerr <<
os.str ();
1033 std::ostringstream
os;
1034 os << *
prefix <<
"PackCrsMatrixImpl::packCrsMatrix threw an exception "
1035 "not a subclass of std::exception" << std::endl;
1036 std::cerr <<
os.str ();
1049 catch (std::exception&
e) {
1051 std::ostringstream
os;
1052 os << *
prefix <<
"Kokkos::deep_copy threw: " <<
e.what () << std::endl;
1053 std::cerr <<
os.str ();
1059 std::ostringstream
os;
1060 os << *
prefix <<
"Kokkos::deep_copy threw an exception not a subclass "
1061 "of std::exception" << std::endl;
1062 std::cerr <<
os.str ();
1069 std::ostringstream
os;
1070 os << *
prefix <<
"done" << std::endl;
1071 std::cerr <<
os.str ();
1078#define TPETRA_DETAILS_PACKCRSMATRIX_INSTANT( ST, LO, GO, NT ) \
1080 Details::packCrsMatrix<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1081 Teuchos::Array<char>&, \
1082 const Teuchos::ArrayView<size_t>&, \
1083 const Teuchos::ArrayView<const LO>&, \
1086 Details::packCrsMatrixNew<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1087 Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1088 const Kokkos::DualView<size_t*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1089 const Kokkos::DualView<const LO*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1092 Details::packCrsMatrixWithOwningPIDs<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1093 Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1094 const Teuchos::ArrayView<size_t>&, \
1095 const Teuchos::ArrayView<const LO>&, \
1096 const Teuchos::ArrayView<const int>&, \
Declaration of the Tpetra::CrsMatrix class.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types,...
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
Declaration and definition of Tpetra::Details::getEntryOnHost.
KOKKOS_FUNCTION Kokkos::pair< int, size_t > packCrsMatrixRow(const ColumnMap &col_map, const Kokkos::View< char *, BufferDeviceType > &exports, const typename PackTraits< typename ColumnMap::local_ordinal_type >::input_array_type &lids_in, const typename PackTraits< int >::input_array_type &pids_in, const typename PackTraits< ST >::input_array_type &vals_in, const size_t offset, const size_t num_ent, const size_t num_bytes_per_value, const bool pack_pids)
Packs a single row of the CrsMatrix.
Struct that holds views of the contents of a CrsMatrix.
typename Node::device_type device_type
The Kokkos device type.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
static bool verbose()
Whether Tpetra is in verbose mode.
"Local" part of Map suitable for Kokkos kernels.
LocalOrdinal local_ordinal_type
The type of local indices.
GlobalOrdinal global_ordinal_type
The type of global indices.
Compute the number of packets and offsets for the pack procedure.
int getError() const
Host function for getting the error.
Kokkos::Device< typename device_type::execution_space, buffer_memory_space > buffer_device_type
Kokkos::Device specialization for communication buffers.
Implementation details of Tpetra.
void packCrsMatrix(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Teuchos::Array< char > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
void packCrsMatrixNew(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication, for "new" DistObject inter...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Traits class for packing / unpacking data of type T.
static KOKKOS_INLINE_FUNCTION Kokkos::pair< int, size_t > packArray(char outBuf[], const value_type inBuf[], const size_t numEnt)
Pack the first numEnt entries of the given input buffer of value_type, into the output buffer of byte...
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const T &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const T &)
Number of bytes required to pack or unpack the given value of type value_type.
Kokkos::View< const value_type *, Kokkos::AnonymousSpace > input_array_type
The type of an input array of value_type.