Tpetra parallel linear algebra  Version of the Day
Tpetra_Details_unpackCrsMatrix.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_DETAILS_UNPACKCRSMATRIX_HPP
43 #define TPETRA_DETAILS_UNPACKCRSMATRIX_HPP
44 
45 #include "TpetraCore_config.h"
46 #include "Teuchos_Array.hpp"
47 #include "Teuchos_ArrayView.hpp"
50 #include "Kokkos_Core.hpp"
51 #include <memory>
52 #include <string>
53 
62 
63 namespace Tpetra {
64 
65 #ifndef DOXYGEN_SHOULD_SKIP_THIS
66 // Forward declaration of Distributor
67 class Distributor;
68 #endif // DOXYGEN_SHOULD_SKIP_THIS
69 
70 //
71 // Users must never rely on anything in the Details namespace.
72 //
73 namespace Details {
74 
79 template<class LO>
81 
82  LO bad_index; // only valid if outOfBounds == true.
83  size_t num_ent;
84  size_t offset; // only valid if outOfBounds == true.
85  size_t expected_num_bytes;
86  size_t num_bytes; // only valid if outOfBounds == true.
87  size_t buf_size;
88  bool wrong_num_bytes_error;
89  bool out_of_bounds_error;
90  bool invalid_combine_mode_error;
91 
92  KOKKOS_INLINE_FUNCTION UnpackCrsMatrixError():
93  bad_index(Tpetra::Details::OrdinalTraits<LO>::invalid()),
94  num_ent(0),
95  offset(0),
96  expected_num_bytes(0),
97  num_bytes(0),
98  buf_size(0),
99  wrong_num_bytes_error(false),
100  out_of_bounds_error(false),
101  invalid_combine_mode_error(false) {}
102 
103  bool success() const
104  {
105  // Any possible error would result in bad_index being changed from
106  // `invalid` to the index associated with the error.
107  return bad_index == Tpetra::Details::OrdinalTraits<LO>::invalid();
108  }
109 
110  std::string summary() const
111  {
112  std::ostringstream os;
113  if (wrong_num_bytes_error ||
114  out_of_bounds_error ||
115  invalid_combine_mode_error) {
116  if (wrong_num_bytes_error) {
117  os << "At index i = " << bad_index
118  << ", expectedNumBytes > numBytes.";
119  }
120  else if (out_of_bounds_error) {
121  os << "First invalid offset into 'imports' "
122  << "unpack buffer at index i = " << bad_index << ".";
123  }
124  else if (invalid_combine_mode_error) {
125  os << "Invalid combine mode; code should never get "
126  "here! Please report this bug to the Tpetra developers.";
127  }
128  os << " importLIDs[i]: " << bad_index
129  << ", bufSize: " << buf_size
130  << ", offset: " << offset
131  << ", numBytes: " << num_bytes
132  << ", expectedNumBytes: " << expected_num_bytes
133  << ", numEnt: " << num_ent;
134  }
135  return os.str();
136  }
137 };
138 
139 
149 //local IDs
153 template<class NumPacketsPerLIDType, class OffsetsType,
154  class ImportsType, class ImportLIDsType,
155  class LocalMatrixType, class LocalMapType>
157 
158  typedef NumPacketsPerLIDType num_packets_per_lid_type;
159  typedef OffsetsType offsets_type;
160  typedef ImportsType imports_type;
161  typedef ImportLIDsType import_lids_type;
162  typedef LocalMatrixType local_matrix_type;
163  typedef LocalMapType local_map_type;
164  typedef typename local_matrix_type::value_type IST;
165  typedef typename local_matrix_type::ordinal_type LO;
166  typedef typename local_map_type::global_ordinal_type GO;
168 
169  static_assert (Kokkos::Impl::is_view<NumPacketsPerLIDType>::value,
170  "NumPacketsPerLIDType must be a Kokkos::View.");
171  static_assert (Kokkos::Impl::is_view<OffsetsType>::value,
172  "OffsetsType must be a Kokkos::View.");
173  static_assert (Kokkos::Impl::is_view<ImportsType>::value,
174  "ImportsType must be a Kokkos::View.");
175  static_assert (Kokkos::Impl::is_view<ImportLIDsType>::value,
176  "ImportLIDsType must be a Kokkos::View.");
177  static_assert (std::is_same<LO, typename LocalMatrixType::ordinal_type>::value,
178  "LocalMapType::local_ordinal_type and "
179  "LocalMatrixType::ordinal_type must be the same.");
180 
181  NumPacketsPerLIDType num_packets_per_lid_;
182  OffsetsType offsets_;
183  ImportsType imports_;
184  ImportLIDsType import_lids_;
185  LocalMatrixType local_matrix_;
186  LocalMapType local_col_map_;
187  Tpetra::CombineMode combine_mode_;
188  bool atomic_;
189 
191  const num_packets_per_lid_type& num_packets_per_lid,
192  const offsets_type& offsets,
193  const imports_type& imports,
194  const import_lids_type& import_lids,
195  local_matrix_type& local_matrix,
196  const local_map_type& local_col_map,
197  const Tpetra::CombineMode combine_mode,
198  const bool atomic) :
199  num_packets_per_lid_(num_packets_per_lid),
200  offsets_(offsets),
201  imports_(imports),
202  import_lids_(import_lids),
203  local_matrix_(local_matrix),
204  local_col_map_(local_col_map),
205  combine_mode_(combine_mode),
206  atomic_(atomic)
207  {}
208 
209  KOKKOS_INLINE_FUNCTION void init(value_type& dst) const
210  {
211  dst.bad_index = Tpetra::Details::OrdinalTraits<LO>::invalid();
212  dst.num_ent = 0;
213  dst.offset = 0;
214  dst.expected_num_bytes = 0;
215  dst.num_bytes = 0;
216  dst.buf_size = 0;
217  dst.wrong_num_bytes_error = false;
218  dst.out_of_bounds_error = false;
219  dst.invalid_combine_mode_error = false;
220  }
221 
222  KOKKOS_INLINE_FUNCTION void
223  join (volatile value_type& dst, const volatile value_type& src) const
224  {
225  // The dst object should reflect the first bad index and all other
226  // associated error codes and data. Thus, we need only check if the src
227  // object shows an error and if its associated `bad_index` is less than the
228  // dst.bad_index (if dst shows errors).
229  LO invalid = Tpetra::Details::OrdinalTraits<LO>::invalid();
230  if (src.bad_index != invalid) {
231  // An error in the src, check if whether:
232  // 1. The dst shows errors
233  // 2. If dst does show errors, if src bad_index is less than its bad
234  // index
235  if (dst.bad_index == invalid || src.bad_index < dst.bad_index) {
236  dst.bad_index = src.bad_index;
237  dst.num_ent = src.num_ent;
238  dst.offset = src.offset;
239  dst.expected_num_bytes = src.expected_num_bytes;
240  dst.num_bytes = src.num_bytes;
241  dst.buf_size = src.buf_size;
242  dst.wrong_num_bytes_error = src.wrong_num_bytes_error;
243  dst.out_of_bounds_error = src.out_of_bounds_error;
244  dst.invalid_combine_mode_error = src.invalid_combine_mode_error;
245  }
246  }
247  }
248 
249  KOKKOS_INLINE_FUNCTION
250  void operator()(const LO i, value_type& dst) const
251  {
252  const LO local_row = import_lids_[i];
253  const size_t num_bytes = num_packets_per_lid_(i);
254 
255  if (num_bytes > 0) { // there is actually something in the row
256 
257  const size_t buf_size = imports_.size();
258  const char* const num_ent_beg = imports_.ptr_on_device() + offsets_(i);
259  const char* const num_ent_end = num_ent_beg + sizeof(LO);
260 
261  // Now we know how many entries to expect in the received data
262  // for this row.
263  LO num_ent = 0;
264  memcpy(&num_ent, num_ent_beg, sizeof(LO));
265 
266  const char* const val_beg = num_ent_end;
267  const char* const val_end = val_beg + static_cast<size_t>(num_ent) * sizeof(IST);
268  const char* const ind_beg = val_end;
269  const size_t expected_num_bytes = sizeof(LO) +
270  static_cast<size_t>(num_ent) * (sizeof(IST) + sizeof(GO));
271 
272  if (expected_num_bytes > num_bytes) {
273  dst.wrong_num_bytes_error = true;
274  }
275  else if (offsets_(i) > buf_size || offsets_(i) + num_bytes > buf_size) {
276  dst.out_of_bounds_error = true;
277  }
278  else {
279 
280  // FIXME (mfh 23 Mar 2017) CrsMatrix_NonlocalSumInto_Ignore test
281  // expects this method to ignore incoming entries that do not
282  // exist on the process that owns those rows. We would like to
283  // distinguish between "errors" resulting from ignored entries,
284  // vs. actual errors.
285 
286  // Combine a single column/value at a time to avoid temporary storage
287  LO num_modified = 0;
288  if (combine_mode_ == ADD) {
289  for (size_t k = 0; k < static_cast<size_t>(num_ent); ++k) {
290  IST val = 0;
291  GO col = 0;
292  memcpy(&val, val_beg + k * sizeof(IST), sizeof(IST));
293  memcpy(&col, ind_beg + k * sizeof(GO), sizeof(GO));
294  LO local_col_idx = local_col_map_.getLocalElement(col);
295  num_modified += local_matrix_.sumIntoValues(local_row,
296  &local_col_idx, 1, &val, false, atomic_);
297  }
298  }
299  else if (combine_mode_ == REPLACE) {
300  for (size_t k = 0; k < static_cast<size_t>(num_ent); ++k) {
301  IST val = 0;
302  GO col = 0;
303  memcpy(&val, val_beg + k * sizeof(IST), sizeof(IST));
304  memcpy(&col, ind_beg + k * sizeof(GO), sizeof(GO));
305  LO local_col_idx = local_col_map_.getLocalElement(col);
306  num_modified += local_matrix_.replaceValues(local_row,
307  &local_col_idx, 1, &val, false, atomic_);
308  }
309  }
310  else {
311  dst.invalid_combine_mode_error = true;
312  }
313  }
314 
315  if (dst.wrong_num_bytes_error ||
316  dst.out_of_bounds_error ||
317  dst.invalid_combine_mode_error) {
318  dst.bad_index = i;
319  dst.num_ent = num_ent;
320  dst.offset = offsets_(i);
321  dst.expected_num_bytes = expected_num_bytes;
322  dst.num_bytes = num_bytes;
323  dst.buf_size = buf_size;
324  }
325  }
326  }
327 
328 };
329 
334 template<class LocalMatrixType, class LocalMapType>
335 bool
337  LocalMatrixType& lclMatrix,
338  const LocalMapType& lclColMap,
339  std::unique_ptr<std::string>& errStr,
340  const Teuchos::ArrayView<const typename LocalMatrixType::ordinal_type>& importLIDs,
341  const Teuchos::ArrayView<const char>& imports,
342  const Teuchos::ArrayView<const size_t>& numPacketsPerLID,
343  size_t constantNumPackets,
344  const int myRank,
345  Distributor & /* distor */,
346  CombineMode combineMode,
347  const bool atomic)
348 {
349  using Kokkos::View;
352  typedef LocalMatrixType local_matrix_type;
353  typedef LocalMapType local_map_type;
354  typedef typename LocalMapType::local_ordinal_type LO;
355 
356  static_assert (std::is_same<LO, typename LocalMatrixType::ordinal_type>::value,
357  "LocalMapType::local_ordinal_type and "
358  "LocalMatrixType::ordinal_type must be the same.");
359 
360  typedef typename LocalMatrixType::device_type device_type;
361  typedef typename device_type::execution_space execution_space;
362  typedef Kokkos::RangePolicy<execution_space, LO> range_type;
363 
364  const size_t numImportLIDs = static_cast<size_t>(importLIDs.size());
365 
366  {
367  // Check for correct input
368  int qa_errors = 0;
369  std::ostringstream os;
370  if (combineMode == ABSMAX) {
371  qa_errors += 1;
372  os << "ABSMAX combine mode is not yet implemented for a matrix that has a "
373  << "static graph (i.e., was constructed with the CrsMatrix constructor "
374  << "that takes a const CrsGraph pointer).\n";
375  }
376  else if (combineMode == INSERT) {
377  qa_errors += 1;
378  os << "INSERT combine mode is not allowed if the matrix has a static graph "
379  << "(i.e., was constructed with the CrsMatrix constructor that takes a "
380  << "const CrsGraph pointer).\n";
381  }
382  else if (!(combineMode == ADD || combineMode == REPLACE)) {
383  qa_errors += 1;
384  // Unknown combine mode!
385  os << "Invalid combine mode; should never get "
386  << "here! Please report this bug to the Tpetra developers.\n";
387  }
388 
389  // Check that sizes of input objects are consistent.
390  if (numImportLIDs != static_cast<size_t>(numPacketsPerLID.size())) {
391  qa_errors += 1;
392  os << "importLIDs.size() (" << numImportLIDs << ") != "
393  << "numPacketsPerLID.size() (" << numPacketsPerLID.size() << ").";
394  }
395 
396  if (qa_errors) {
397  if (errStr.get() == NULL) {
398  errStr = std::unique_ptr<std::string>(new std::string());
399  }
400  *errStr = os.str();
401  return false;
402  }
403  } // end QA error checking
404 
405  // numPacketsPerLID, importLIDs, and imports are input, so we have to copy
406  // them to device. Since unpacking is done directly in to the local matrix
407  // (lclMatrix), not copying needs to be performed after unpacking.
408  typename LocalMatrixType::device_type outputDevice;
409  auto num_packets_per_lid_d =
411  numPacketsPerLID.getRawPtr(), numPacketsPerLID.size(),
412  true, "num_packets_per_lid_d");
413 
414  auto import_lids_d =
416  importLIDs.getRawPtr(), importLIDs.size(),
417  true, "import_lids_d");
418 
419  auto imports_d =
421  imports.getRawPtr(), imports.size(),
422  true, "imports_d");
423 
424  // Get the offsets
425  View<size_t*, device_type>
426  offsets_d("offsets_d", numImportLIDs+1);
427  computeOffsetsFromCounts(offsets_d, num_packets_per_lid_d);
428 
429  // Now do the actual unpack!
431  decltype(num_packets_per_lid_d),
432  decltype(offsets_d),
433  decltype(imports_d),
434  decltype(import_lids_d),
435  local_matrix_type,
436  local_map_type> unpack_functor_type;
437  unpack_functor_type unpack_functor(num_packets_per_lid_d, offsets_d,
438  imports_d, import_lids_d, lclMatrix, lclColMap, combineMode, atomic);
439 
440  typename unpack_functor_type::value_type result;
441  Kokkos::parallel_reduce(range_type(0, numImportLIDs), unpack_functor, result);
442 
443  if (!result.success()) {
444  std::ostringstream os;
445  os << "Proc " << myRank << ": packCrsMatrix failed. "
446  << result.summary()
447  << std::endl;
448  if (errStr.get () != NULL) {
449  errStr = std::unique_ptr<std::string> (new std::string ());
450  *errStr = os.str ();
451  }
452  }
453 
454  return result.success();
455 
456 }
457 
458 } // namespace Details
459 } // namespace Tpetra
460 
461 #endif // TPETRA_DETAILS_UNPACKCRSMATRIX_HPP
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types...
Unpacks and combines a single row of the CrsMatrix.
Implementation details of Tpetra.
Insert new values that don&#39;t currently exist.
Reduction result for UnpackCrsMatrixAndCombineFunctor below.
Declare and define the function Tpetra::Details::computeOffsetsFromCounts, an implementation detail o...
bool unpackCrsMatrixAndCombine(LocalMatrixType &lclMatrix, const LocalMapType &lclColMap, std::unique_ptr< std::string > &errStr, const Teuchos::ArrayView< const typename LocalMatrixType::ordinal_type > &importLIDs, const Teuchos::ArrayView< const char > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, const int myRank, Distributor &, CombineMode combineMode, const bool atomic)
Unpack the imported column indices and values, and combine into matrix.
Sets up and executes a communication plan for a Tpetra DistObject.
CombineMode
Rule for combining data in an Import or Export.
Sum new values into existing values.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
Replace old value with maximum of magnitudes of old and new values.
Replace existing values with new values.