40 #ifndef TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
41 #define TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
43 #include "TpetraCore_config.h"
44 #include "Teuchos_Array.hpp"
45 #include "Teuchos_ArrayView.hpp"
89 namespace PackCrsMatrixImpl {
97 template<
class OutputOffsetsViewType,
99 class InputOffsetsViewType,
100 class InputLocalRowIndicesViewType,
101 class InputLocalRowPidsViewType,
103 #ifdef HAVE_TPETRA_DEBUG
111 typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
112 typedef typename CountsViewType::non_const_value_type count_type;
113 typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
114 typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
115 typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
117 typedef typename OutputOffsetsViewType::device_type device_type;
118 static_assert (std::is_same<
typename CountsViewType::device_type::execution_space,
119 typename device_type::execution_space>::value,
120 "OutputOffsetsViewType and CountsViewType must have the same execution space.");
121 static_assert (Kokkos::Impl::is_view<OutputOffsetsViewType>::value,
122 "OutputOffsetsViewType must be a Kokkos::View.");
123 static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
124 "OutputOffsetsViewType must be a nonconst Kokkos::View.");
125 static_assert (std::is_integral<output_offset_type>::value,
126 "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
127 static_assert (Kokkos::Impl::is_view<CountsViewType>::value,
128 "CountsViewType must be a Kokkos::View.");
129 static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
130 "CountsViewType must be a nonconst Kokkos::View.");
131 static_assert (std::is_integral<count_type>::value,
132 "The type of each entry of CountsViewType must be a built-in integer type.");
133 static_assert (Kokkos::Impl::is_view<InputOffsetsViewType>::value,
134 "InputOffsetsViewType must be a Kokkos::View.");
135 static_assert (std::is_integral<input_offset_type>::value,
136 "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
137 static_assert (Kokkos::Impl::is_view<InputLocalRowIndicesViewType>::value,
138 "InputLocalRowIndicesViewType must be a Kokkos::View.");
139 static_assert (std::is_integral<local_row_index_type>::value,
140 "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
143 const CountsViewType& counts,
144 const InputOffsetsViewType& rowOffsets,
145 const InputLocalRowIndicesViewType& lclRowInds,
146 const InputLocalRowPidsViewType& lclRowPids,
147 const count_type sizeOfLclCount,
148 const count_type sizeOfGblColInd,
149 const count_type sizeOfPid,
150 const count_type sizeOfValue) :
151 outputOffsets_ (outputOffsets),
153 rowOffsets_ (rowOffsets),
154 lclRowInds_ (lclRowInds),
155 lclRowPids_ (lclRowPids),
156 sizeOfLclCount_ (sizeOfLclCount),
157 sizeOfGblColInd_ (sizeOfGblColInd),
158 sizeOfPid_ (sizeOfPid),
159 sizeOfValue_ (sizeOfValue),
163 const size_t numRowsToPack =
static_cast<size_t> (lclRowInds_.extent (0));
165 if (numRowsToPack !=
static_cast<size_t> (counts_.extent (0))) {
166 std::ostringstream os;
167 os <<
"lclRowInds.extent(0) = " << numRowsToPack
168 <<
" != counts.extent(0) = " << counts_.extent (0)
170 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, os.str ());
172 if (
static_cast<size_t> (numRowsToPack + 1) !=
173 static_cast<size_t> (outputOffsets_.extent (0))) {
174 std::ostringstream os;
175 os <<
"lclRowInds.extent(0) + 1 = " << (numRowsToPack + 1)
176 <<
" != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
178 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, os.str ());
183 KOKKOS_INLINE_FUNCTION
void
184 operator() (
const local_row_index_type& curInd,
185 output_offset_type& update,
186 const bool final)
const
189 if (curInd <
static_cast<local_row_index_type
> (0)) {
197 if (curInd >=
static_cast<local_row_index_type
> (outputOffsets_.extent (0))) {
202 outputOffsets_(curInd) = update;
205 if (curInd <
static_cast<local_row_index_type
> (counts_.extent (0))) {
206 const auto lclRow = lclRowInds_(curInd);
207 if (
static_cast<size_t> (lclRow + 1) >=
static_cast<size_t> (rowOffsets_.extent (0)) ||
208 static_cast<local_row_index_type
> (lclRow) <
static_cast<local_row_index_type
> (0)) {
216 const count_type count =
217 static_cast<count_type
> (rowOffsets_(lclRow+1) - rowOffsets_(lclRow));
223 const count_type numBytes = (count == 0) ?
224 static_cast<count_type
> (0) :
225 sizeOfLclCount_ + count * (sizeOfGblColInd_ +
226 (lclRowPids_.size() > 0 ? sizeOfPid_ : 0) +
230 counts_(curInd) = numBytes;
242 auto error_h = Kokkos::create_mirror_view (error_);
248 OutputOffsetsViewType outputOffsets_;
249 CountsViewType counts_;
250 typename InputOffsetsViewType::const_type rowOffsets_;
251 typename InputLocalRowIndicesViewType::const_type lclRowInds_;
252 typename InputLocalRowPidsViewType::const_type lclRowPids_;
253 count_type sizeOfLclCount_;
254 count_type sizeOfGblColInd_;
255 count_type sizeOfPid_;
256 count_type sizeOfValue_;
257 Kokkos::View<int, device_type> error_;
269 template<
class OutputOffsetsViewType,
270 class CountsViewType,
271 class InputOffsetsViewType,
272 class InputLocalRowIndicesViewType,
273 class InputLocalRowPidsViewType>
274 typename CountsViewType::non_const_value_type
275 computeNumPacketsAndOffsets (
const OutputOffsetsViewType& outputOffsets,
276 const CountsViewType& counts,
277 const InputOffsetsViewType& rowOffsets,
278 const InputLocalRowIndicesViewType& lclRowInds,
279 const InputLocalRowPidsViewType& lclRowPids,
280 const typename CountsViewType::non_const_value_type sizeOfLclCount,
281 const typename CountsViewType::non_const_value_type sizeOfGblColInd,
282 const typename CountsViewType::non_const_value_type sizeOfPid,
283 const typename CountsViewType::non_const_value_type sizeOfValue)
286 CountsViewType,
typename InputOffsetsViewType::const_type,
287 typename InputLocalRowIndicesViewType::const_type,
288 typename InputLocalRowPidsViewType::const_type> functor_type;
289 typedef typename CountsViewType::non_const_value_type count_type;
290 typedef typename OutputOffsetsViewType::size_type size_type;
291 typedef typename OutputOffsetsViewType::execution_space execution_space;
292 typedef typename functor_type::local_row_index_type LO;
293 typedef Kokkos::RangePolicy<execution_space, LO> range_type;
294 const char prefix[] =
"computeNumPacketsAndOffsets: ";
296 count_type count = 0;
297 const count_type numRowsToPack = lclRowInds.extent (0);
299 if (numRowsToPack == 0) {
303 TEUCHOS_TEST_FOR_EXCEPTION
304 (rowOffsets.extent (0) <=
static_cast<size_type
> (1),
305 std::invalid_argument, prefix <<
"There is at least one row to pack, "
306 "but the matrix has no rows. lclRowInds.extent(0) = " <<
307 numRowsToPack <<
", but rowOffsets.extent(0) = " <<
308 rowOffsets.extent (0) <<
" <= 1.");
309 TEUCHOS_TEST_FOR_EXCEPTION
310 (outputOffsets.extent (0) !=
311 static_cast<size_type
> (numRowsToPack + 1), std::invalid_argument,
312 prefix <<
"Output dimension does not match number of rows to pack. "
313 <<
"outputOffsets.extent(0) = " << outputOffsets.extent (0)
314 <<
" != lclRowInds.extent(0) + 1 = "
315 <<
static_cast<size_type
> (numRowsToPack + 1) <<
".");
316 TEUCHOS_TEST_FOR_EXCEPTION
317 (counts.extent (0) != numRowsToPack, std::invalid_argument,
318 prefix <<
"counts.extent(0) = " << counts.extent (0)
319 <<
" != numRowsToPack = " << numRowsToPack <<
".");
321 functor_type f (outputOffsets, counts, rowOffsets,
322 lclRowInds, lclRowPids, sizeOfLclCount,
323 sizeOfGblColInd, sizeOfPid, sizeOfValue);
324 Kokkos::parallel_scan (range_type (0, numRowsToPack + 1), f);
327 const int errCode = f.getError ();
328 TEUCHOS_TEST_FOR_EXCEPTION
329 (errCode != 0, std::runtime_error, prefix <<
"parallel_scan error code "
330 << errCode <<
" != 0.");
334 for (LO k = 0; k < numRowsToPack; ++k) {
337 if (outputOffsets(numRowsToPack) != total) {
338 if (errStr.get () == NULL) {
339 errStr = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
341 std::ostringstream& os = *errStr;
343 <<
"outputOffsets(numRowsToPack=" << numRowsToPack <<
") "
344 << outputOffsets(numRowsToPack) <<
" != sum of counts = "
345 << total <<
"." << std::endl;
346 if (numRowsToPack != 0) {
348 if (numRowsToPack <
static_cast<LO
> (10)) {
349 os <<
"outputOffsets: [";
350 for (LO i = 0; i <= numRowsToPack; ++i) {
351 os << outputOffsets(i);
352 if (
static_cast<LO
> (i + 1) <= numRowsToPack) {
356 os <<
"]" << std::endl;
358 for (LO i = 0; i < numRowsToPack; ++i) {
360 if (
static_cast<LO
> (i + 1) < numRowsToPack) {
364 os <<
"]" << std::endl;
367 os <<
"outputOffsets(" << (numRowsToPack-1) <<
") = "
368 << outputOffsets(numRowsToPack-1) <<
"." << std::endl;
371 count = outputOffsets(numRowsToPack);
372 return {
false, errStr};
378 using Tpetra::Details::getEntryOnHost;
379 return static_cast<count_type
> (getEntryOnHost (outputOffsets,
399 template<
class ST,
class ColumnMap,
class BufferDeviceType>
401 Kokkos::pair<int, size_t>
403 const Kokkos::View<char*, BufferDeviceType>& exports,
408 const size_t num_ent,
409 const size_t num_bytes_per_value,
410 const bool pack_pids)
412 using Kokkos::subview;
413 using LO =
typename ColumnMap::local_ordinal_type;
414 using GO =
typename ColumnMap::global_ordinal_type;
415 using return_type = Kokkos::pair<int, size_t>;
419 return return_type (0, 0);
422 const LO num_ent_LO =
static_cast<LO
> (num_ent);
423 const size_t num_ent_beg = offset;
426 const size_t gids_beg = num_ent_beg + num_ent_len;
429 const size_t pids_beg = gids_beg + gids_len;
430 const size_t pids_len = pack_pids ?
432 static_cast<size_t> (0);
434 const size_t vals_beg = gids_beg + gids_len + pids_len;
435 const size_t vals_len = num_ent * num_bytes_per_value;
437 char*
const num_ent_out = exports.data () + num_ent_beg;
438 char*
const gids_out = exports.data () + gids_beg;
439 char*
const pids_out = pack_pids ? exports.data () + pids_beg : NULL;
440 char*
const vals_out = exports.data () + vals_beg;
442 size_t num_bytes_out = 0;
449 for (
size_t k = 0; k < num_ent; ++k) {
450 const LO lid = lids_in[k];
451 const GO gid = col_map.getGlobalElement (lid);
456 for (
size_t k = 0; k < num_ent; ++k) {
457 const LO lid = lids_in[k];
458 const int pid = pids_in[lid];
464 error_code += p.first;
465 num_bytes_out += p.second;
468 if (error_code != 0) {
469 return return_type (10, num_bytes_out);
472 const size_t expected_num_bytes =
473 num_ent_len + gids_len + pids_len + vals_len;
474 if (num_bytes_out != expected_num_bytes) {
475 return return_type (11, num_bytes_out);
477 return return_type (0, num_bytes_out);
480 template<
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
481 struct PackCrsMatrixFunctor {
482 typedef LocalMatrix local_matrix_device_type;
484 typedef typename local_matrix_device_type::value_type ST;
487 typedef typename local_matrix_device_type::device_type DT;
489 typedef Kokkos::View<const size_t*, BufferDeviceType>
490 num_packets_per_lid_view_type;
491 typedef Kokkos::View<const size_t*, BufferDeviceType> offsets_view_type;
492 typedef Kokkos::View<char*, BufferDeviceType> exports_view_type;
496 typedef typename num_packets_per_lid_view_type::non_const_value_type
498 typedef typename offsets_view_type::non_const_value_type
500 typedef Kokkos::pair<int, LO> value_type;
502 static_assert (std::is_same<LO, typename local_matrix_device_type::ordinal_type>::value,
503 "local_map_type::local_ordinal_type and "
504 "local_matrix_device_type::ordinal_type must be the same.");
506 local_matrix_device_type local_matrix;
507 local_map_type local_col_map;
508 exports_view_type exports;
509 num_packets_per_lid_view_type num_packets_per_lid;
510 export_lids_view_type export_lids;
511 source_pids_view_type source_pids;
512 offsets_view_type offsets;
513 size_t num_bytes_per_value;
516 PackCrsMatrixFunctor (
const local_matrix_device_type& local_matrix_in,
517 const local_map_type& local_col_map_in,
518 const exports_view_type& exports_in,
519 const num_packets_per_lid_view_type& num_packets_per_lid_in,
520 const export_lids_view_type& export_lids_in,
521 const source_pids_view_type& source_pids_in,
522 const offsets_view_type& offsets_in,
523 const size_t num_bytes_per_value_in,
524 const bool pack_pids_in) :
525 local_matrix (local_matrix_in),
526 local_col_map (local_col_map_in),
527 exports (exports_in),
528 num_packets_per_lid (num_packets_per_lid_in),
529 export_lids (export_lids_in),
530 source_pids (source_pids_in),
531 offsets (offsets_in),
532 num_bytes_per_value (num_bytes_per_value_in),
533 pack_pids (pack_pids_in)
535 const LO numRows = local_matrix_in.numRows ();
537 static_cast<LO
> (local_matrix.graph.row_map.extent (0));
538 TEUCHOS_TEST_FOR_EXCEPTION
539 (numRows != 0 && rowMapDim != numRows +
static_cast<LO
> (1),
540 std::logic_error,
"local_matrix.graph.row_map.extent(0) = "
541 << rowMapDim <<
" != numRows (= " << numRows <<
" ) + 1.");
544 KOKKOS_INLINE_FUNCTION
void init (value_type& dst)
const
546 using ::Tpetra::Details::OrdinalTraits;
547 dst = Kokkos::make_pair (0, OrdinalTraits<LO>::invalid ());
550 KOKKOS_INLINE_FUNCTION
void
551 join (
volatile value_type& dst,
const volatile value_type& src)
const
555 if (src.first != 0 && dst.first == 0) {
560 KOKKOS_INLINE_FUNCTION
561 void operator() (
const LO i, value_type& dst)
const
563 const size_t offset = offsets[i];
564 const LO export_lid = export_lids[i];
565 const size_t buf_size = exports.size();
566 const size_t num_bytes = num_packets_per_lid(i);
567 const size_t num_ent =
568 static_cast<size_t> (local_matrix.graph.row_map[export_lid+1]
569 - local_matrix.graph.row_map[export_lid]);
579 if (export_lid >= local_matrix.numRows ()) {
580 if (dst.first != 0) {
581 dst = Kokkos::make_pair (1, i);
585 else if ((offset > buf_size || offset + num_bytes > buf_size)) {
586 if (dst.first != 0) {
587 dst = Kokkos::make_pair (2, i);
597 const auto row_beg = local_matrix.graph.row_map[export_lid];
598 const auto row_end = local_matrix.graph.row_map[export_lid + 1];
599 auto vals_in = subview (local_matrix.values,
600 Kokkos::make_pair (row_beg, row_end));
601 auto lids_in = subview (local_matrix.graph.entries,
602 Kokkos::make_pair (row_beg, row_end));
603 typedef local_map_type LMT;
604 typedef BufferDeviceType BDT;
605 auto p = packCrsMatrixRow<ST, LMT, BDT> (local_col_map, exports, lids_in,
606 source_pids, vals_in, offset,
607 num_ent, num_bytes_per_value,
609 int error_code_this_row = p.first;
610 size_t num_bytes_packed_this_row = p.second;
611 if (error_code_this_row != 0) {
612 if (dst.first != 0) {
613 dst = Kokkos::make_pair (error_code_this_row, i);
616 else if (num_bytes_packed_this_row != num_bytes) {
617 if (dst.first != 0) {
618 dst = Kokkos::make_pair (3, i);
631 template<
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
633 do_pack (
const LocalMatrix& local_matrix,
635 const Kokkos::View<char*, BufferDeviceType>& exports,
639 const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
640 const size_t num_bytes_per_value,
641 const bool pack_pids)
644 using DT =
typename LocalMatrix::device_type;
645 using range_type = Kokkos::RangePolicy<typename DT::execution_space, LO>;
646 const char prefix[] =
"Tpetra::Details::do_pack: ";
648 if (export_lids.extent (0) != 0) {
649 TEUCHOS_TEST_FOR_EXCEPTION
650 (
static_cast<size_t> (offsets.extent (0)) !=
651 static_cast<size_t> (export_lids.extent (0) + 1),
652 std::invalid_argument, prefix <<
"offsets.extent(0) = "
653 << offsets.extent (0) <<
" != export_lids.extent(0) (= "
654 << export_lids.extent (0) <<
") + 1.");
655 TEUCHOS_TEST_FOR_EXCEPTION
656 (export_lids.extent (0) != num_packets_per_lid.extent (0),
657 std::invalid_argument, prefix <<
"export_lids.extent(0) = " <<
658 export_lids.extent (0) <<
" != num_packets_per_lid.extent(0) = "
659 << num_packets_per_lid.extent (0) <<
".");
663 TEUCHOS_TEST_FOR_EXCEPTION
664 (pack_pids && exports.extent (0) != 0 &&
665 source_pids.extent (0) == 0, std::invalid_argument, prefix <<
666 "pack_pids is true, and exports.extent(0) = " <<
667 exports.extent (0) <<
" != 0, meaning that we need to pack at "
668 "least one matrix entry, but source_pids.extent(0) = 0.");
671 using pack_functor_type =
672 PackCrsMatrixFunctor<LocalMatrix, LocalMap, BufferDeviceType>;
673 pack_functor_type f (local_matrix, local_map, exports,
674 num_packets_per_lid, export_lids,
675 source_pids, offsets, num_bytes_per_value,
678 typename pack_functor_type::value_type result;
679 range_type range (0, num_packets_per_lid.extent (0));
680 Kokkos::parallel_reduce (range, f, result);
682 if (result.first != 0) {
685 TEUCHOS_TEST_FOR_EXCEPTION
686 (
true, std::runtime_error, prefix <<
"PackCrsMatrixFunctor "
687 "reported error code " << result.first <<
" for the first "
688 "bad row " << result.second <<
".");
721 template<
typename ST,
typename LO,
typename GO,
typename NT,
typename BufferDeviceType>
724 Kokkos::DualView<char*, BufferDeviceType>& exports,
725 const Kokkos::View<size_t*, BufferDeviceType>& num_packets_per_lid,
726 const Kokkos::View<const LO*, BufferDeviceType>& export_lids,
727 const Kokkos::View<const int*, typename NT::device_type>& export_pids,
728 size_t& constant_num_packets,
729 const bool pack_pids)
732 "Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix",
736 typedef BufferDeviceType DT;
737 typedef Kokkos::DualView<char*, BufferDeviceType> exports_view_type;
738 const char prefix[] =
"Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix: ";
739 constexpr
bool debug =
false;
742 auto local_col_map = sourceMatrix.
getColMap ()->getLocalMap ();
747 constant_num_packets = 0;
749 const size_t num_export_lids =
750 static_cast<size_t> (export_lids.extent (0));
751 TEUCHOS_TEST_FOR_EXCEPTION
753 static_cast<size_t> (num_packets_per_lid.extent (0)),
754 std::invalid_argument, prefix <<
"num_export_lids.extent(0) = "
755 << num_export_lids <<
" != num_packets_per_lid.extent(0) = "
756 << num_packets_per_lid.extent (0) <<
".");
757 if (num_export_lids != 0) {
758 TEUCHOS_TEST_FOR_EXCEPTION
759 (num_packets_per_lid.data () == NULL, std::invalid_argument,
760 prefix <<
"num_export_lids = "<< num_export_lids <<
" != 0, but "
761 "num_packets_per_lid.data() = "
762 << num_packets_per_lid.data () <<
" == NULL.");
769 size_t num_bytes_per_value = 0;
784 size_t num_bytes_per_value_l = 0;
785 if (local_matrix.values.extent(0) > 0) {
786 const ST& val = local_matrix.values(0);
789 using Teuchos::reduceAll;
790 reduceAll<int, size_t> (* (sourceMatrix.
getComm ()),
792 num_bytes_per_value_l,
793 Teuchos::outArg (num_bytes_per_value));
796 if (num_export_lids == 0) {
797 exports = exports_view_type (
"exports", 0);
802 Kokkos::View<size_t*, DT> offsets (
"offsets", num_export_lids + 1);
807 computeNumPacketsAndOffsets (offsets, num_packets_per_lid,
808 local_matrix.graph.row_map, export_lids,
810 num_bytes_per_lid, num_bytes_per_gid,
811 num_bytes_per_pid, num_bytes_per_value);
814 if (count >
static_cast<size_t> (exports.extent (0))) {
815 exports = exports_view_type (
"exports", count);
817 std::ostringstream os;
818 os <<
"*** exports resized to " << count << std::endl;
819 std::cerr << os.str ();
823 std::ostringstream os;
824 os <<
"*** count: " << count <<
", exports.extent(0): "
825 << exports.extent (0) << std::endl;
826 std::cerr << os.str ();
832 TEUCHOS_TEST_FOR_EXCEPTION
833 (pack_pids && exports.extent (0) != 0 &&
834 export_pids.extent (0) == 0, std::invalid_argument, prefix <<
835 "pack_pids is true, and exports.extent(0) = " <<
836 exports.extent (0) <<
" != 0, meaning that we need to pack at least "
837 "one matrix entry, but export_pids.extent(0) = 0.");
839 typedef typename std::decay<decltype (local_matrix)>::type
840 local_matrix_device_type;
841 typedef typename std::decay<decltype (local_col_map)>::type
844 exports.modify_device ();
845 auto exports_d = exports.view_device ();
846 do_pack<local_matrix_device_type, local_map_type, DT>
847 (local_matrix, local_col_map, exports_d, num_packets_per_lid,
848 export_lids, export_pids, offsets, num_bytes_per_value,
855 template<
typename ST,
typename LO,
typename GO,
typename NT>
858 Teuchos::Array<char>& exports,
859 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
860 const Teuchos::ArrayView<const LO>& exportLIDs,
861 size_t& constantNumPackets)
864 using device_type =
typename local_matrix_device_type::device_type;
866 using host_exec_space =
typename Kokkos::View<size_t*, device_type>::HostMirror::execution_space;
867 using host_dev_type = Kokkos::Device<host_exec_space, Kokkos::HostSpace>;
873 Kokkos::View<size_t*, buffer_device_type> num_packets_per_lid_d =
875 numPacketsPerLID.getRawPtr (),
876 numPacketsPerLID.size (),
false,
877 "num_packets_per_lid");
884 Kokkos::View<const LO*, buffer_device_type> export_lids_d =
886 exportLIDs.getRawPtr (),
887 exportLIDs.size (),
true,
890 Kokkos::View<int*, device_type> export_pids_d;
891 Kokkos::DualView<char*, buffer_device_type> exports_dv;
892 constexpr
bool pack_pids =
false;
893 PackCrsMatrixImpl::packCrsMatrix<ST, LO, GO, NT, buffer_device_type> (
894 sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
895 export_pids_d, constantNumPackets, pack_pids);
899 Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
900 (numPacketsPerLID.getRawPtr (),
901 numPacketsPerLID.size ());
909 if (
static_cast<size_t> (exports.size ()) !=
910 static_cast<size_t> (exports_dv.extent (0))) {
911 exports.resize (exports_dv.extent (0));
913 Kokkos::View<char*, host_dev_type> exports_h (exports.getRawPtr (),
918 template<
typename ST,
typename LO,
typename GO,
typename NT>
925 size_t& constantNumPackets)
931 Kokkos::View<int*, device_type> exportPIDs_d (
"exportPIDs", 0);
932 constexpr
bool pack_pids =
false;
935 auto numPacketsPerLID_nc = numPacketsPerLID;
936 numPacketsPerLID_nc.clear_sync_state ();
937 numPacketsPerLID_nc.modify_device ();
938 auto numPacketsPerLID_d = numPacketsPerLID.view_device ();
941 TEUCHOS_ASSERT( ! exportLIDs.need_sync_device () );
942 auto exportLIDs_d = exportLIDs.view_device ();
945 "Tpetra::Details::packCrsMatrixNew",
948 PackCrsMatrixImpl::packCrsMatrix<ST,LO,GO,NT,buffer_device_type> (
949 sourceMatrix, exports, numPacketsPerLID_d, exportLIDs_d,
950 exportPIDs_d, constantNumPackets, pack_pids);
953 template<
typename ST,
typename LO,
typename GO,
typename NT>
957 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
958 const Teuchos::ArrayView<const LO>& exportLIDs,
959 const Teuchos::ArrayView<const int>& sourcePIDs,
960 size_t& constantNumPackets)
964 typedef typename Kokkos::DualView<char*, buffer_device_type>::t_host::execution_space host_exec_space;
965 typedef Kokkos::Device<host_exec_space, Kokkos::HostSpace> host_dev_type;
967 typename local_matrix_device_type::device_type outputDevice;
970 std::unique_ptr<std::string> prefix;
972 const int myRank = [&] () {
973 auto map = sourceMatrix.
getMap ();
974 if (map.get () ==
nullptr) {
977 auto comm = map->getComm ();
978 if (comm.get () ==
nullptr) {
981 return comm->getRank ();
983 std::ostringstream os;
984 os <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs: ";
985 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
987 std::ostringstream os2;
988 os2 << *prefix <<
"start" << std::endl;
989 std::cerr << os2.str ();
996 auto num_packets_per_lid_d =
998 numPacketsPerLID.getRawPtr (),
999 numPacketsPerLID.size (),
false,
1000 "num_packets_per_lid");
1004 auto export_lids_d =
1006 exportLIDs.getRawPtr (),
1007 exportLIDs.size (),
true,
1011 auto export_pids_d =
1013 sourcePIDs.getRawPtr (),
1014 sourcePIDs.size (),
true,
1016 constexpr
bool pack_pids =
true;
1018 PackCrsMatrixImpl::packCrsMatrix
1019 (sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
1020 export_pids_d, constantNumPackets, pack_pids);
1022 catch (std::exception& e) {
1024 std::ostringstream os;
1025 os << *prefix <<
"PackCrsMatrixImpl::packCrsMatrix threw: "
1026 << e.what () << std::endl;
1027 std::cerr << os.str ();
1033 std::ostringstream os;
1034 os << *prefix <<
"PackCrsMatrixImpl::packCrsMatrix threw an exception "
1035 "not a subclass of std::exception" << std::endl;
1036 std::cerr << os.str ();
1041 if (numPacketsPerLID.size () != 0) {
1045 Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
1046 (numPacketsPerLID.getRawPtr (), numPacketsPerLID.size ());
1049 catch (std::exception& e) {
1051 std::ostringstream os;
1052 os << *prefix <<
"Kokkos::deep_copy threw: " << e.what () << std::endl;
1053 std::cerr << os.str ();
1059 std::ostringstream os;
1060 os << *prefix <<
"Kokkos::deep_copy threw an exception not a subclass "
1061 "of std::exception" << std::endl;
1062 std::cerr << os.str ();
1069 std::ostringstream os;
1070 os << *prefix <<
"done" << std::endl;
1071 std::cerr << os.str ();
1078 #define TPETRA_DETAILS_PACKCRSMATRIX_INSTANT( ST, LO, GO, NT ) \
1080 Details::packCrsMatrix<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1081 Teuchos::Array<char>&, \
1082 const Teuchos::ArrayView<size_t>&, \
1083 const Teuchos::ArrayView<const LO>&, \
1086 Details::packCrsMatrixNew<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1087 Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1088 const Kokkos::DualView<size_t*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1089 const Kokkos::DualView<const LO*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1092 Details::packCrsMatrixWithOwningPIDs<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1093 Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1094 const Teuchos::ArrayView<size_t>&, \
1095 const Teuchos::ArrayView<const LO>&, \
1096 const Teuchos::ArrayView<const int>&, \
Declaration of the Tpetra::CrsMatrix class.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types,...
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
Declaration and definition of Tpetra::Details::getEntryOnHost.
KOKKOS_FUNCTION Kokkos::pair< int, size_t > packCrsMatrixRow(const ColumnMap &col_map, const Kokkos::View< char *, BufferDeviceType > &exports, const typename PackTraits< typename ColumnMap::local_ordinal_type >::input_array_type &lids_in, const typename PackTraits< int >::input_array_type &pids_in, const typename PackTraits< ST >::input_array_type &vals_in, const size_t offset, const size_t num_ent, const size_t num_bytes_per_value, const bool pack_pids)
Packs a single row of the CrsMatrix.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
typename Node::device_type device_type
The Kokkos device type.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
static bool verbose()
Whether Tpetra is in verbose mode.
"Local" part of Map suitable for Kokkos kernels.
LocalOrdinal local_ordinal_type
The type of local indices.
GlobalOrdinal global_ordinal_type
The type of global indices.
Compute the number of packets and offsets for the pack procedure.
int getError() const
Host function for getting the error.
Base class for distributed Tpetra objects that support data redistribution.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Implementation details of Tpetra.
void packCrsMatrix(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Teuchos::Array< char > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
void packCrsMatrixNew(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication, for "new" DistObject inter...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Traits class for packing / unpacking data of type T.
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const T &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const T &)
Number of bytes required to pack or unpack the given value of type value_type.
Kokkos::View< const value_type *, Kokkos::AnonymousSpace > input_array_type
The type of an input array of value_type.
static KOKKOS_INLINE_FUNCTION Kokkos::pair< int, size_t > packArray(char outBuf[], const value_type inBuf[], const size_t numEnt)
Pack the first numEnt entries of the given input buffer of value_type, into the output buffer of byte...