42 #ifndef TPETRA_DETAILS_UNPACKCRSMATRIX_HPP 43 #define TPETRA_DETAILS_UNPACKCRSMATRIX_HPP 45 #include "TpetraCore_config.h" 46 #include "Teuchos_Array.hpp" 47 #include "Teuchos_ArrayView.hpp" 50 #include "Kokkos_Core.hpp" 65 #ifndef DOXYGEN_SHOULD_SKIP_THIS 68 #endif // DOXYGEN_SHOULD_SKIP_THIS 85 size_t expected_num_bytes;
88 bool wrong_num_bytes_error;
89 bool out_of_bounds_error;
90 bool invalid_combine_mode_error;
93 bad_index(Tpetra::Details::OrdinalTraits<LO>::invalid()),
96 expected_num_bytes(0),
99 wrong_num_bytes_error(
false),
100 out_of_bounds_error(
false),
101 invalid_combine_mode_error(
false) {}
107 return bad_index == Tpetra::Details::OrdinalTraits<LO>::invalid();
110 std::string summary()
const 112 std::ostringstream os;
113 if (wrong_num_bytes_error ||
114 out_of_bounds_error ||
115 invalid_combine_mode_error) {
116 if (wrong_num_bytes_error) {
117 os <<
"At index i = " << bad_index
118 <<
", expectedNumBytes > numBytes.";
120 else if (out_of_bounds_error) {
121 os <<
"First invalid offset into 'imports' " 122 <<
"unpack buffer at index i = " << bad_index <<
".";
124 else if (invalid_combine_mode_error) {
125 os <<
"Invalid combine mode; code should never get " 126 "here! Please report this bug to the Tpetra developers.";
128 os <<
" importLIDs[i]: " << bad_index
129 <<
", bufSize: " << buf_size
130 <<
", offset: " << offset
131 <<
", numBytes: " << num_bytes
132 <<
", expectedNumBytes: " << expected_num_bytes
133 <<
", numEnt: " << num_ent;
153 template<
class NumPacketsPerLIDType,
class OffsetsType,
154 class ImportsType,
class ImportLIDsType,
155 class LocalMatrixType,
class LocalMapType>
158 typedef NumPacketsPerLIDType num_packets_per_lid_type;
159 typedef OffsetsType offsets_type;
160 typedef ImportsType imports_type;
161 typedef ImportLIDsType import_lids_type;
162 typedef LocalMatrixType local_matrix_type;
163 typedef LocalMapType local_map_type;
164 typedef typename local_matrix_type::value_type IST;
165 typedef typename local_matrix_type::ordinal_type LO;
166 typedef typename local_map_type::global_ordinal_type GO;
169 static_assert (Kokkos::Impl::is_view<NumPacketsPerLIDType>::value,
170 "NumPacketsPerLIDType must be a Kokkos::View.");
171 static_assert (Kokkos::Impl::is_view<OffsetsType>::value,
172 "OffsetsType must be a Kokkos::View.");
173 static_assert (Kokkos::Impl::is_view<ImportsType>::value,
174 "ImportsType must be a Kokkos::View.");
175 static_assert (Kokkos::Impl::is_view<ImportLIDsType>::value,
176 "ImportLIDsType must be a Kokkos::View.");
177 static_assert (std::is_same<LO, typename LocalMatrixType::ordinal_type>::value,
178 "LocalMapType::local_ordinal_type and " 179 "LocalMatrixType::ordinal_type must be the same.");
181 NumPacketsPerLIDType num_packets_per_lid_;
182 OffsetsType offsets_;
183 ImportsType imports_;
184 ImportLIDsType import_lids_;
185 LocalMatrixType local_matrix_;
186 LocalMapType local_col_map_;
191 const num_packets_per_lid_type& num_packets_per_lid,
192 const offsets_type& offsets,
193 const imports_type& imports,
194 const import_lids_type& import_lids,
195 local_matrix_type& local_matrix,
196 const local_map_type& local_col_map,
199 num_packets_per_lid_(num_packets_per_lid),
202 import_lids_(import_lids),
203 local_matrix_(local_matrix),
204 local_col_map_(local_col_map),
205 combine_mode_(combine_mode),
209 KOKKOS_INLINE_FUNCTION
void init(value_type& dst)
const 211 dst.bad_index = Tpetra::Details::OrdinalTraits<LO>::invalid();
214 dst.expected_num_bytes = 0;
217 dst.wrong_num_bytes_error =
false;
218 dst.out_of_bounds_error =
false;
219 dst.invalid_combine_mode_error =
false;
222 KOKKOS_INLINE_FUNCTION
void 223 join (
volatile value_type& dst,
const volatile value_type& src)
const 229 LO invalid = Tpetra::Details::OrdinalTraits<LO>::invalid();
230 if (src.bad_index != invalid) {
235 if (dst.bad_index == invalid || src.bad_index < dst.bad_index) {
236 dst.bad_index = src.bad_index;
237 dst.num_ent = src.num_ent;
238 dst.offset = src.offset;
239 dst.expected_num_bytes = src.expected_num_bytes;
240 dst.num_bytes = src.num_bytes;
241 dst.buf_size = src.buf_size;
242 dst.wrong_num_bytes_error = src.wrong_num_bytes_error;
243 dst.out_of_bounds_error = src.out_of_bounds_error;
244 dst.invalid_combine_mode_error = src.invalid_combine_mode_error;
249 KOKKOS_INLINE_FUNCTION
250 void operator()(
const LO i, value_type& dst)
const 252 const LO local_row = import_lids_[i];
253 const size_t num_bytes = num_packets_per_lid_(i);
257 const size_t buf_size = imports_.size();
258 const char*
const num_ent_beg = imports_.ptr_on_device() + offsets_(i);
259 const char*
const num_ent_end = num_ent_beg +
sizeof(LO);
264 memcpy(&num_ent, num_ent_beg,
sizeof(LO));
266 const char*
const val_beg = num_ent_end;
267 const char*
const val_end = val_beg +
static_cast<size_t>(num_ent) *
sizeof(IST);
268 const char*
const ind_beg = val_end;
269 const size_t expected_num_bytes =
sizeof(LO) +
270 static_cast<size_t>(num_ent) * (
sizeof(IST) +
sizeof(GO));
272 if (expected_num_bytes > num_bytes) {
273 dst.wrong_num_bytes_error =
true;
275 else if (offsets_(i) > buf_size || offsets_(i) + num_bytes > buf_size) {
276 dst.out_of_bounds_error =
true;
288 if (combine_mode_ ==
ADD) {
289 for (
size_t k = 0; k < static_cast<size_t>(num_ent); ++k) {
292 memcpy(&val, val_beg + k *
sizeof(IST),
sizeof(IST));
293 memcpy(&col, ind_beg + k *
sizeof(GO),
sizeof(GO));
294 LO local_col_idx = local_col_map_.getLocalElement(col);
295 num_modified += local_matrix_.sumIntoValues(local_row,
296 &local_col_idx, 1, &val,
false, atomic_);
299 else if (combine_mode_ ==
REPLACE) {
300 for (
size_t k = 0; k < static_cast<size_t>(num_ent); ++k) {
303 memcpy(&val, val_beg + k *
sizeof(IST),
sizeof(IST));
304 memcpy(&col, ind_beg + k *
sizeof(GO),
sizeof(GO));
305 LO local_col_idx = local_col_map_.getLocalElement(col);
306 num_modified += local_matrix_.replaceValues(local_row,
307 &local_col_idx, 1, &val,
false, atomic_);
311 dst.invalid_combine_mode_error =
true;
315 if (dst.wrong_num_bytes_error ||
316 dst.out_of_bounds_error ||
317 dst.invalid_combine_mode_error) {
319 dst.num_ent = num_ent;
320 dst.offset = offsets_(i);
321 dst.expected_num_bytes = expected_num_bytes;
322 dst.num_bytes = num_bytes;
323 dst.buf_size = buf_size;
334 template<
class LocalMatrixType,
class LocalMapType>
337 LocalMatrixType& lclMatrix,
338 const LocalMapType& lclColMap,
339 std::unique_ptr<std::string>& errStr,
340 const Teuchos::ArrayView<const typename LocalMatrixType::ordinal_type>& importLIDs,
341 const Teuchos::ArrayView<const char>& imports,
342 const Teuchos::ArrayView<const size_t>& numPacketsPerLID,
343 size_t constantNumPackets,
352 typedef LocalMatrixType local_matrix_type;
353 typedef LocalMapType local_map_type;
354 typedef typename LocalMapType::local_ordinal_type LO;
356 static_assert (std::is_same<LO, typename LocalMatrixType::ordinal_type>::value,
357 "LocalMapType::local_ordinal_type and " 358 "LocalMatrixType::ordinal_type must be the same.");
360 typedef typename LocalMatrixType::device_type device_type;
361 typedef typename device_type::execution_space execution_space;
362 typedef Kokkos::RangePolicy<execution_space, LO> range_type;
364 const size_t numImportLIDs =
static_cast<size_t>(importLIDs.size());
369 std::ostringstream os;
370 if (combineMode ==
ABSMAX) {
372 os <<
"ABSMAX combine mode is not yet implemented for a matrix that has a " 373 <<
"static graph (i.e., was constructed with the CrsMatrix constructor " 374 <<
"that takes a const CrsGraph pointer).\n";
376 else if (combineMode ==
INSERT) {
378 os <<
"INSERT combine mode is not allowed if the matrix has a static graph " 379 <<
"(i.e., was constructed with the CrsMatrix constructor that takes a " 380 <<
"const CrsGraph pointer).\n";
382 else if (!(combineMode ==
ADD || combineMode ==
REPLACE)) {
385 os <<
"Invalid combine mode; should never get " 386 <<
"here! Please report this bug to the Tpetra developers.\n";
390 if (numImportLIDs != static_cast<size_t>(numPacketsPerLID.size())) {
392 os <<
"importLIDs.size() (" << numImportLIDs <<
") != " 393 <<
"numPacketsPerLID.size() (" << numPacketsPerLID.size() <<
").";
397 if (errStr.get() == NULL) {
398 errStr = std::unique_ptr<std::string>(
new std::string());
408 typename LocalMatrixType::device_type outputDevice;
409 auto num_packets_per_lid_d =
411 numPacketsPerLID.getRawPtr(), numPacketsPerLID.size(),
412 true,
"num_packets_per_lid_d");
416 importLIDs.getRawPtr(), importLIDs.size(),
417 true,
"import_lids_d");
421 imports.getRawPtr(), imports.size(),
425 View<size_t*, device_type>
426 offsets_d(
"offsets_d", numImportLIDs+1);
431 decltype(num_packets_per_lid_d),
434 decltype(import_lids_d),
436 local_map_type> unpack_functor_type;
437 unpack_functor_type unpack_functor(num_packets_per_lid_d, offsets_d,
438 imports_d, import_lids_d, lclMatrix, lclColMap, combineMode, atomic);
440 typename unpack_functor_type::value_type result;
441 Kokkos::parallel_reduce(range_type(0, numImportLIDs), unpack_functor, result);
443 if (!result.success()) {
444 std::ostringstream os;
445 os <<
"Proc " << myRank <<
": packCrsMatrix failed. " 448 if (errStr.get () != NULL) {
449 errStr = std::unique_ptr<std::string> (
new std::string ());
454 return result.success();
461 #endif // TPETRA_DETAILS_UNPACKCRSMATRIX_HPP Namespace Tpetra contains the class and methods constituting the Tpetra library.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types...
Unpacks and combines a single row of the CrsMatrix.
Implementation details of Tpetra.
Insert new values that don't currently exist.
Reduction result for UnpackCrsMatrixAndCombineFunctor below.
Declare and define the function Tpetra::Details::computeOffsetsFromCounts, an implementation detail o...
bool unpackCrsMatrixAndCombine(LocalMatrixType &lclMatrix, const LocalMapType &lclColMap, std::unique_ptr< std::string > &errStr, const Teuchos::ArrayView< const typename LocalMatrixType::ordinal_type > &importLIDs, const Teuchos::ArrayView< const char > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, const int myRank, Distributor &, CombineMode combineMode, const bool atomic)
Unpack the imported column indices and values, and combine into matrix.
Sets up and executes a communication plan for a Tpetra DistObject.
CombineMode
Rule for combining data in an Import or Export.
Sum new values into existing values.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
Replace old value with maximum of magnitudes of old and new values.
Replace existing values with new values.