42 #ifndef TPETRA_DETAILS_PACKCRSMATRIX_HPP 43 #define TPETRA_DETAILS_PACKCRSMATRIX_HPP 45 #include "TpetraCore_config.h" 46 #include "Teuchos_Array.hpp" 47 #include "Teuchos_ArrayView.hpp" 51 #include "Kokkos_Core.hpp" 66 #ifndef DOXYGEN_SHOULD_SKIP_THIS 69 #endif // DOXYGEN_SHOULD_SKIP_THIS 76 template<
class OutputOffsetsViewType,
78 class InputOffsetsViewType,
79 class InputLocalRowIndicesViewType,
81 #ifdef HAVE_TPETRA_DEBUG 85 #endif // HAVE_TPETRA_DEBUG 87 class NumPacketsAndOffsetsFunctor {
89 typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
90 typedef typename CountsViewType::non_const_value_type count_type;
91 typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
92 typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
94 typedef typename OutputOffsetsViewType::device_type device_type;
95 static_assert (std::is_same<
typename CountsViewType::device_type::execution_space,
96 typename device_type::execution_space>::value,
97 "OutputOffsetsViewType and CountsViewType must have the same execution space.");
98 static_assert (Kokkos::Impl::is_view<OutputOffsetsViewType>::value,
99 "OutputOffsetsViewType must be a Kokkos::View.");
100 static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
101 "OutputOffsetsViewType must be a nonconst Kokkos::View.");
102 static_assert (std::is_integral<output_offset_type>::value,
103 "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
104 static_assert (Kokkos::Impl::is_view<CountsViewType>::value,
105 "CountsViewType must be a Kokkos::View.");
106 static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
107 "CountsViewType must be a nonconst Kokkos::View.");
108 static_assert (std::is_integral<count_type>::value,
109 "The type of each entry of CountsViewType must be a built-in integer type.");
110 static_assert (Kokkos::Impl::is_view<InputOffsetsViewType>::value,
111 "InputOffsetsViewType must be a Kokkos::View.");
112 static_assert (std::is_integral<input_offset_type>::value,
113 "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
114 static_assert (Kokkos::Impl::is_view<InputLocalRowIndicesViewType>::value,
115 "InputLocalRowIndicesViewType must be a Kokkos::View.");
116 static_assert (std::is_integral<local_row_index_type>::value,
117 "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
119 NumPacketsAndOffsetsFunctor (
const OutputOffsetsViewType& outputOffsets,
120 const CountsViewType& counts,
121 const InputOffsetsViewType& rowOffsets,
122 const InputLocalRowIndicesViewType& lclRowInds,
123 const count_type sizeOfLclCount,
124 const count_type sizeOfValue,
125 const count_type sizeOfGblColInd) :
126 outputOffsets_ (outputOffsets),
128 rowOffsets_ (rowOffsets),
129 lclRowInds_ (lclRowInds),
130 sizeOfLclCount_ (sizeOfLclCount),
131 sizeOfValue_ (sizeOfValue),
132 sizeOfGblColInd_ (sizeOfGblColInd),
136 const size_t numRowsToPack =
static_cast<size_t> (lclRowInds_.dimension_0 ());
138 if (numRowsToPack != static_cast<size_t> (counts_.dimension_0 ())) {
139 std::ostringstream os;
140 os <<
"lclRowInds.dimension_0() = " << numRowsToPack
141 <<
" != counts.dimension_0() = " << counts_.dimension_0 ()
143 throw std::invalid_argument (os.str ());
145 if (static_cast<size_t> (numRowsToPack + 1) !=
static_cast<size_t> (outputOffsets_.dimension_0 ())) {
146 std::ostringstream os;
147 os <<
"lclRowInds.dimension_0() + 1 = " << (numRowsToPack + 1)
148 <<
" != outputOffsets.dimension_0() = " << outputOffsets_.dimension_0 ()
150 throw std::invalid_argument (os.str ());
155 KOKKOS_INLINE_FUNCTION
void 156 operator() (
const local_row_index_type& curInd,
157 output_offset_type& update,
158 const bool final)
const 161 if (curInd < static_cast<local_row_index_type> (0)) {
169 if (curInd >= static_cast<local_row_index_type> (outputOffsets_.dimension_0 ())) {
174 outputOffsets_(curInd) = update;
177 if (curInd < static_cast<local_row_index_type> (counts_.dimension_0 ())) {
178 const auto lclRow = lclRowInds_(curInd);
179 if (static_cast<size_t> (lclRow + 1) >= static_cast<size_t> (rowOffsets_.dimension_0 ()) ||
180 static_cast<local_row_index_type> (lclRow) <
static_cast<local_row_index_type
> (0)) {
188 const count_type count =
189 static_cast<count_type
> (rowOffsets_(lclRow+1) - rowOffsets_(lclRow));
194 const count_type numBytes = (count == 0) ?
195 static_cast<count_type> (0) :
196 sizeOfLclCount_ + count * (sizeOfGblColInd_ + sizeOfValue_);
199 counts_(curInd) = numBytes;
210 int getError ()
const {
211 auto error_h = Kokkos::create_mirror_view (error_);
217 OutputOffsetsViewType outputOffsets_;
218 CountsViewType counts_;
219 typename InputOffsetsViewType::const_type rowOffsets_;
220 typename InputLocalRowIndicesViewType::const_type lclRowInds_;
221 count_type sizeOfLclCount_;
222 count_type sizeOfValue_;
223 count_type sizeOfGblColInd_;
224 Kokkos::View<int, device_type> error_;
227 template<
class OutputOffsetsViewType,
228 class CountsViewType,
229 class InputOffsetsViewType,
230 class InputLocalRowIndicesViewType>
231 std::pair<typename CountsViewType::non_const_value_type, bool>
232 computeNumPacketsAndOffsets (std::unique_ptr<std::ostringstream>& errStr,
233 const OutputOffsetsViewType& outputOffsets,
234 const CountsViewType& counts,
235 const InputOffsetsViewType& rowOffsets,
236 const InputLocalRowIndicesViewType& lclRowInds,
237 const typename CountsViewType::non_const_value_type sizeOfLclCount,
238 const typename CountsViewType::non_const_value_type sizeOfValue,
239 const typename CountsViewType::non_const_value_type sizeOfGblColInd)
241 typedef NumPacketsAndOffsetsFunctor<OutputOffsetsViewType,
242 CountsViewType,
typename InputOffsetsViewType::const_type,
243 typename InputLocalRowIndicesViewType::const_type> functor_type;
244 typedef typename CountsViewType::non_const_value_type count_type;
245 typedef typename OutputOffsetsViewType::size_type size_type;
246 typedef typename OutputOffsetsViewType::execution_space execution_space;
247 typedef typename functor_type::local_row_index_type LO;
248 typedef Kokkos::RangePolicy<execution_space, LO> range_type;
249 const char prefix[] =
"computeNumPacketsAndOffsets: ";
251 const count_type numRowsToPack = lclRowInds.dimension_0 ();
252 if (numRowsToPack == 0) {
253 return {
static_cast<count_type
> (0),
true};
256 if (rowOffsets.dimension_0 () <=
static_cast<size_type
> (1)) {
257 if (errStr.get () == NULL) {
258 errStr = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
260 std::ostringstream& os = *errStr;
262 <<
"There is at least one row to pack, but the matrix has no rows. " 263 "lclRowInds.dimension_0() = " << numRowsToPack <<
", but " 264 "rowOffsets.dimension_0() = " << rowOffsets.dimension_0 () <<
" <= 1." 266 return {
static_cast<count_type
> (0),
false};
268 if (outputOffsets.dimension_0 () !=
static_cast<size_type
> (numRowsToPack + 1)) {
269 if (errStr.get () == NULL) {
270 errStr = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
272 std::ostringstream& os = *errStr;
274 <<
"Output dimension does not match number of rows to pack. " 275 <<
"outputOffsets.dimension_0() = " << outputOffsets.dimension_0 ()
276 <<
" != lclRowInds.dimension_0() + 1 = " 277 <<
static_cast<size_type
> (numRowsToPack + 1) <<
"." << std::endl;
278 return {
static_cast<count_type
> (0),
false};
281 functor_type f (outputOffsets, counts, rowOffsets,
282 lclRowInds, sizeOfLclCount,
283 sizeOfValue, sizeOfGblColInd);
284 #ifdef HAVE_TPETRA_DEBUG 285 TEUCHOS_TEST_FOR_EXCEPT(static_cast<size_t> (outputOffsets.dimension_0 ()) !=
286 static_cast<size_t> (numRowsToPack + 1));
287 TEUCHOS_TEST_FOR_EXCEPT(static_cast<size_t> (counts.dimension_0 ()) !=
288 static_cast<size_t> (numRowsToPack));
289 TEUCHOS_TEST_FOR_EXCEPT(static_cast<size_t> (outputOffsets.dimension_0 ()) !=
290 static_cast<size_t> (numRowsToPack + 1));
291 #endif // HAVE_TPETRA_DEBUG 293 Kokkos::parallel_scan (range_type (0, numRowsToPack+1), f);
296 const int errCode = f.getError ();
298 if (errStr.get () == NULL) {
299 errStr = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
301 std::ostringstream& os = *errStr;
303 <<
"NumPacketsAndOffsetsFunctor reported error code " << errCode
304 <<
" != 0." << std::endl;
310 for (LO k = 0; k < numRowsToPack; ++k) {
313 if (outputOffsets(numRowsToPack) != total) {
314 if (errStr.get () == NULL) {
315 errStr = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
317 std::ostringstream& os = *errStr;
319 <<
"outputOffsets(numRowsToPack=" << numRowsToPack <<
") " 320 << outputOffsets(numRowsToPack) <<
" != sum of counts = " 321 << total <<
"." << std::endl;
322 if (numRowsToPack != 0) {
324 if (numRowsToPack < static_cast<LO> (10)) {
325 os <<
"outputOffsets: [";
326 for (LO i = 0; i <= numRowsToPack; ++i) {
327 os << outputOffsets(i);
328 if (static_cast<LO> (i + 1) <= numRowsToPack) {
332 os <<
"]" << std::endl;
334 for (LO i = 0; i < numRowsToPack; ++i) {
336 if (static_cast<LO> (i + 1) < numRowsToPack) {
340 os <<
"]" << std::endl;
343 os <<
"outputOffsets(" << (numRowsToPack-1) <<
") = " 344 << outputOffsets(numRowsToPack-1) <<
"." << std::endl;
347 return {outputOffsets(numRowsToPack),
false};
349 #endif // HAVE_TPETRA_DEBUG 353 auto outputOffsets_last = Kokkos::subview (outputOffsets, numRowsToPack);
354 auto outputOffsets_last_h = Kokkos::create_mirror_view (outputOffsets_last);
355 return {
static_cast<count_type
> (outputOffsets_last_h ()),
true};
368 size_t bad_num_bytes;
369 bool out_of_bounds_error;
373 bad_index (Tpetra::Details::OrdinalTraits<LO>::invalid ()),
376 out_of_bounds_error (
false),
377 packing_error (
false)
384 return bad_index == Tpetra::Details::OrdinalTraits<LO>::invalid();
387 std::string summary()
const 389 std::ostringstream os;
390 os <<
"First bad index: " << bad_index
391 <<
", first bad offset: " << bad_offset
392 <<
", first bad number of bytes: " << bad_num_bytes
393 <<
", out of bounds error?: " << (out_of_bounds_error ?
"true" :
"false");
398 template<
class NumPacketsPerLidViewType,
399 class OffsetsViewType,
400 class ExportsViewType,
401 class ExportLidsViewType,
402 class LocalMatrixType,
404 struct PackCrsMatrixFunctor {
405 typedef NumPacketsPerLidViewType num_packets_per_lid_view_type;
406 typedef OffsetsViewType offsets_view_type;
407 typedef ExportsViewType exports_view_type;
408 typedef ExportLidsViewType export_lids_view_type;
410 typedef typename num_packets_per_lid_view_type::non_const_value_type count_type;
411 typedef typename offsets_view_type::non_const_value_type offset_type;
412 typedef typename LocalMatrixType::value_type IST;
413 typedef typename LocalMatrixType::ordinal_type LO;
414 typedef typename LocalMapType::global_ordinal_type GO;
417 static_assert (std::is_same<LO, typename LocalMatrixType::ordinal_type>::value,
418 "LocalMapType::local_ordinal_type and " 419 "LocalMatrixType::ordinal_type must be the same.");
421 num_packets_per_lid_view_type num_packets_per_lid_;
422 offsets_view_type offsets_;
423 exports_view_type exports_;
424 export_lids_view_type export_lids_;
425 LocalMatrixType local_matrix_;
426 LocalMapType local_col_map_;
428 PackCrsMatrixFunctor (
const num_packets_per_lid_view_type& num_packets_per_lid,
429 const offsets_view_type& offsets,
430 const exports_view_type& exports,
431 const export_lids_view_type& export_lids,
432 const LocalMatrixType& local_matrix,
433 const LocalMapType& local_col_map) :
434 num_packets_per_lid_ (num_packets_per_lid),
437 export_lids_ (export_lids),
438 local_matrix_ (local_matrix),
439 local_col_map_ (local_col_map)
442 KOKKOS_INLINE_FUNCTION
void init(value_type& dst)
const 444 dst.bad_index = Tpetra::Details::OrdinalTraits<LO>::invalid();
446 dst.bad_num_bytes = 0;
447 dst.out_of_bounds_error =
false;
448 dst.packing_error =
false;
451 KOKKOS_INLINE_FUNCTION
void 452 join (
volatile value_type& dst,
const volatile value_type& src)
const 459 LO invalid = Tpetra::Details::OrdinalTraits<LO>::invalid();
460 if (src.bad_index != invalid) {
465 if (dst.bad_index == invalid || src.bad_index < dst.bad_index) {
466 dst.bad_index = src.bad_index;
467 dst.bad_offset = src.bad_offset;
468 dst.bad_num_bytes = src.bad_num_bytes;
469 dst.out_of_bounds_error = src.out_of_bounds_error;
470 dst.packing_error = src.packing_error;
475 KOKKOS_INLINE_FUNCTION
476 void operator() (
const LO i, value_type& dst)
const 478 const LO export_lid = export_lids_[i];
479 const size_t buf_size = exports_.size();
480 const size_t num_ent =
481 static_cast<size_t> (local_matrix_.graph.row_map[export_lid+1]
482 - local_matrix_.graph.row_map[export_lid]);
489 char*
const num_ent_beg = exports_.ptr_on_device() + offsets_(i);
490 char*
const num_ent_end = num_ent_beg +
sizeof (LO);
491 char*
const val_beg = num_ent_end;
492 char*
const val_end = val_beg + num_ent *
sizeof (IST);
493 char*
const ind_beg = val_end;
494 const size_t num_bytes = num_packets_per_lid_(i);
496 if ((offsets_(i) > buf_size || offsets_(i) + num_bytes > buf_size)) {
497 dst.out_of_bounds_error =
true;
501 num_ent_beg, val_beg, ind_beg, num_ent, export_lid);
503 if (dst.out_of_bounds_error || dst.packing_error) {
505 dst.bad_offset = offsets_(i);
506 dst.bad_num_bytes = num_bytes;
524 template<
class LocalMatrixType,
class LocalMapType>
527 const LocalMapType& lclColMap,
528 char*
const numEntOut,
532 const typename LocalMatrixType::ordinal_type lclRow)
534 using Kokkos::subview;
535 typedef LocalMatrixType local_matrix_type;
536 typedef LocalMapType local_map_type;
537 typedef typename local_matrix_type::value_type IST;
538 typedef typename local_matrix_type::ordinal_type LO;
539 typedef typename local_matrix_type::size_type offset_type;
540 typedef typename local_map_type::global_ordinal_type GO;
541 typedef Kokkos::pair<offset_type, offset_type> pair_type;
543 const LO numEntLO =
static_cast<LO
> (numEnt);
546 memcpy (numEntOut, &numEntLO,
sizeof (LO));
552 #ifdef HAVE_TPETRA_DEBUG 553 if (lclRow >= lclMatrix.numRows () ||
554 (
static_cast<size_t> (lclRow + 1) >=
555 static_cast<size_t> (lclMatrix.graph.row_map.dimension_0 ()))) {
556 #else // NOT HAVE_TPETRA_DEBUG 557 if (lclRow >= lclMatrix.numRows ()) {
558 #endif // HAVE_TPETRA_DEBUG 563 const GO flagInd = Tpetra::Details::OrdinalTraits<GO>::invalid ();
564 for (
size_t k = 0; k < numEnt; ++k) {
567 memcpy (indOut + k *
sizeof (GO), &flagInd,
sizeof (GO));
571 const IST zero = Kokkos::ArithTraits<IST>::zero ();
572 for (
size_t k = 0; k < numEnt; ++k) {
575 memcpy (valOut + k *
sizeof (IST), &zero,
sizeof (IST));
583 const offset_type rowBeg = lclMatrix.graph.row_map[lclRow];
584 const offset_type rowEnd = lclMatrix.graph.row_map[lclRow + 1];
586 auto indIn = subview (lclMatrix.graph.entries, pair_type (rowBeg, rowEnd));
587 auto valIn = subview (lclMatrix.values, pair_type (rowBeg, rowEnd));
591 for (
size_t k = 0; k < numEnt; ++k) {
592 const GO gblIndIn = lclColMap.getGlobalElement (indIn[k]);
595 memcpy (indOut + k *
sizeof (GO), &gblIndIn,
sizeof (GO));
599 memcpy (valOut, valIn.ptr_on_device (), numEnt *
sizeof (IST));
630 template<
class LocalMatrixType,
class LocalMapType>
633 const LocalMapType& lclColMap,
634 std::unique_ptr<std::string>& errStr,
635 Teuchos::Array<char>& exports,
636 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
637 size_t& constantNumPackets,
638 const Teuchos::ArrayView<const typename LocalMatrixType::ordinal_type>& exportLIDs,
643 using Kokkos::HostSpace;
644 using Kokkos::MemoryUnmanaged;
646 typedef typename LocalMapType::local_ordinal_type LO;
647 typedef typename LocalMapType::global_ordinal_type GO;
648 typedef typename LocalMatrixType::value_type IST;
649 typedef typename LocalMatrixType::device_type device_type;
650 typedef typename device_type::execution_space execution_space;
651 typedef typename Kokkos::RangePolicy<execution_space, LO> range_type;
652 const char prefix[] =
"Tpetra::Details::packCrsMatrix: ";
654 static_assert (std::is_same<LO, typename LocalMatrixType::ordinal_type>::value,
655 "LocalMapType::local_ordinal_type and " 656 "LocalMatrixType::ordinal_type must be the same.");
660 constantNumPackets = 0;
662 const size_t numExportLIDs =
static_cast<size_t> (exportLIDs.size ());
663 if (numExportLIDs != static_cast<size_t> (numPacketsPerLID.size ())) {
664 std::ostringstream os;
665 os << prefix <<
"exportLIDs.size() = " << numExportLIDs
666 <<
" != numPacketsPerLID.size() = " << numPacketsPerLID.size ()
668 if (errStr.get () == NULL) {
669 errStr = std::unique_ptr<std::string> (
new std::string (os.str ()));
672 *errStr = *errStr + os.str ();
676 if (numExportLIDs != 0 && numPacketsPerLID.getRawPtr () == NULL) {
677 std::ostringstream os;
678 os << prefix <<
"numExportLIDs = " << numExportLIDs <<
" != 0, but " 679 <<
"numPacketsPerLID.getRawPtr() = " << numPacketsPerLID.getRawPtr ()
680 <<
" != NULL." << std::endl;
681 if (errStr.get () == NULL) {
682 errStr = std::unique_ptr<std::string> (
new std::string (os.str ()));
685 *errStr = *errStr + os.str ();
690 if (numExportLIDs == 0) {
695 typename LocalMatrixType::device_type outputDevice;
699 auto numPktPerLid_d =
701 numPacketsPerLID.getRawPtr (),
702 numPacketsPerLID.size (),
709 exportLIDs.getRawPtr (),
714 Kokkos::View<size_t*, device_type> packOffsets_d (
"packOffsets",
718 std::unique_ptr<std::ostringstream> errStrm;
719 std::pair<size_t, bool> countResult =
720 computeNumPacketsAndOffsets (errStrm,
723 lclMatrix.graph.row_map,
729 if (! countResult.second) {
730 if (errStr.get () == NULL) {
731 errStr = std::unique_ptr<std::string> (
new std::string (errStrm->str ()));
734 *errStr = *errStr + errStrm->str ();
741 typename decltype (numPktPerLid_d)::HostMirror numPktPerLid_h (numPacketsPerLID.getRawPtr (),
742 numPacketsPerLID.size ());
746 if (countResult.first > static_cast<size_t> (exports.size ())) {
747 exports.resize (countResult.first);
753 exports.getRawPtr (),
758 typedef PackCrsMatrixFunctor<
759 decltype (numPktPerLid_d),
760 decltype (packOffsets_d),
761 decltype (packBuf_d),
762 decltype (packLids_d),
764 LocalMapType> pack_functor_type;
765 pack_functor_type packer (numPktPerLid_d, packOffsets_d, packBuf_d,
766 packLids_d, lclMatrix, lclColMap);
767 typename pack_functor_type::value_type result;
768 Kokkos::parallel_reduce (range_type (0, numExportLIDs), packer, result);
770 if (! result.success ()) {
771 std::ostringstream os;
772 os <<
"Proc " << myRank <<
": packCrsMatrix failed. " 775 if (errStr.get () != NULL) {
776 errStr = std::unique_ptr<std::string> (
new std::string ());
783 typename decltype (packBuf_d)::HostMirror packBuf_h (exports.getRawPtr (),
792 #endif // TPETRA_DETAILS_PACKCRSMATRIX_HPP Namespace Tpetra contains the class and methods constituting the Tpetra library.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types...
bool packCrsMatrix(const LocalMatrixType &lclMatrix, const LocalMapType &lclColMap, std::unique_ptr< std::string > &errStr, Teuchos::Array< char > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t &constantNumPackets, const Teuchos::ArrayView< const typename LocalMatrixType::ordinal_type > &exportLIDs, const int myRank, Distributor &)
Pack specified entries of the given local sparse matrix for communication.
void deep_copy(MultiVector< DS, DL, DG, DN, dstClassic > &dst, const MultiVector< SS, SL, SG, SN, srcClassic > &src)
Copy the contents of the MultiVector src into dst.
Reduction result for PackCrsMatrixFunctor below.
Implementation details of Tpetra.
Declare and define the function Tpetra::Details::computeOffsetsFromCounts, an implementation detail o...
Sets up and executes a communication plan for a Tpetra DistObject.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
KOKKOS_FUNCTION bool packCrsMatrixRow(const LocalMatrixType &lclMatrix, const LocalMapType &lclColMap, char *const numEntOut, char *const valOut, char *const indOut, const size_t numEnt, const typename LocalMatrixType::ordinal_type lclRow)
Packs a single row of the CrsMatrix.
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...