Tpetra parallel linear algebra  Version of the Day
Tpetra_Details_packCrsMatrix_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
40 #ifndef TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
41 #define TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
42 
43 #include "TpetraCore_config.h"
44 #include "Teuchos_Array.hpp"
45 #include "Teuchos_ArrayView.hpp"
54 #include <memory>
55 #include <sstream>
56 #include <stdexcept>
57 #include <string>
58 
81 
82 namespace Tpetra {
83 
84 //
85 // Users must never rely on anything in the Details namespace.
86 //
87 namespace Details {
88 
89 namespace PackCrsMatrixImpl {
97 template<class OutputOffsetsViewType,
98  class CountsViewType,
99  class InputOffsetsViewType,
100  class InputLocalRowIndicesViewType,
101  class InputLocalRowPidsViewType,
102  const bool debug =
103 #ifdef HAVE_TPETRA_DEBUG
104  true
105 #else
106  false
107 #endif // HAVE_TPETRA_DEBUG
108  >
110 public:
111  typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
112  typedef typename CountsViewType::non_const_value_type count_type;
113  typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
114  typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
115  typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
116  // output Views drive where execution happens.
117  typedef typename OutputOffsetsViewType::device_type device_type;
118  static_assert (std::is_same<typename CountsViewType::device_type::execution_space,
119  typename device_type::execution_space>::value,
120  "OutputOffsetsViewType and CountsViewType must have the same execution space.");
121  static_assert (Kokkos::Impl::is_view<OutputOffsetsViewType>::value,
122  "OutputOffsetsViewType must be a Kokkos::View.");
123  static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
124  "OutputOffsetsViewType must be a nonconst Kokkos::View.");
125  static_assert (std::is_integral<output_offset_type>::value,
126  "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
127  static_assert (Kokkos::Impl::is_view<CountsViewType>::value,
128  "CountsViewType must be a Kokkos::View.");
129  static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
130  "CountsViewType must be a nonconst Kokkos::View.");
131  static_assert (std::is_integral<count_type>::value,
132  "The type of each entry of CountsViewType must be a built-in integer type.");
133  static_assert (Kokkos::Impl::is_view<InputOffsetsViewType>::value,
134  "InputOffsetsViewType must be a Kokkos::View.");
135  static_assert (std::is_integral<input_offset_type>::value,
136  "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
137  static_assert (Kokkos::Impl::is_view<InputLocalRowIndicesViewType>::value,
138  "InputLocalRowIndicesViewType must be a Kokkos::View.");
139  static_assert (std::is_integral<local_row_index_type>::value,
140  "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
141 
142  NumPacketsAndOffsetsFunctor (const OutputOffsetsViewType& outputOffsets,
143  const CountsViewType& counts,
144  const InputOffsetsViewType& rowOffsets,
145  const InputLocalRowIndicesViewType& lclRowInds,
146  const InputLocalRowPidsViewType& lclRowPids,
147  const count_type sizeOfLclCount,
148  const count_type sizeOfGblColInd,
149  const count_type sizeOfPid,
150  const count_type sizeOfValue) :
151  outputOffsets_ (outputOffsets),
152  counts_ (counts),
153  rowOffsets_ (rowOffsets),
154  lclRowInds_ (lclRowInds),
155  lclRowPids_ (lclRowPids),
156  sizeOfLclCount_ (sizeOfLclCount),
157  sizeOfGblColInd_ (sizeOfGblColInd),
158  sizeOfPid_ (sizeOfPid),
159  sizeOfValue_ (sizeOfValue),
160  error_ ("error") // don't forget this, or you'll get segfaults!
161  {
162  if (debug) {
163  const size_t numRowsToPack = static_cast<size_t> (lclRowInds_.extent (0));
164 
165  if (numRowsToPack != static_cast<size_t> (counts_.extent (0))) {
166  std::ostringstream os;
167  os << "lclRowInds.extent(0) = " << numRowsToPack
168  << " != counts.extent(0) = " << counts_.extent (0)
169  << ".";
170  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, os.str ());
171  }
172  if (static_cast<size_t> (numRowsToPack + 1) !=
173  static_cast<size_t> (outputOffsets_.extent (0))) {
174  std::ostringstream os;
175  os << "lclRowInds.extent(0) + 1 = " << (numRowsToPack + 1)
176  << " != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
177  << ".";
178  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument, os.str ());
179  }
180  }
181  }
182 
183  KOKKOS_INLINE_FUNCTION void
184  operator() (const local_row_index_type& curInd,
185  output_offset_type& update,
186  const bool final) const
187  {
188  if (debug) {
189  if (curInd < static_cast<local_row_index_type> (0)) {
190  error_ () = 1;
191  return;
192  }
193  }
194 
195  if (final) {
196  if (debug) {
197  if (curInd >= static_cast<local_row_index_type> (outputOffsets_.extent (0))) {
198  error_ () = 2;
199  return;
200  }
201  }
202  outputOffsets_(curInd) = update;
203  }
204 
205  if (curInd < static_cast<local_row_index_type> (counts_.extent (0))) {
206  const auto lclRow = lclRowInds_(curInd);
207  if (static_cast<size_t> (lclRow + 1) >= static_cast<size_t> (rowOffsets_.extent (0)) ||
208  static_cast<local_row_index_type> (lclRow) < static_cast<local_row_index_type> (0)) {
209  error_ () = 3;
210  return;
211  }
212  // count_type could differ from the type of each row offset.
213  // For example, row offsets might each be 64 bits, but if their
214  // difference always fits in 32 bits, we may then safely use a
215  // 32-bit count_type.
216  const count_type count =
217  static_cast<count_type> (rowOffsets_(lclRow+1) - rowOffsets_(lclRow));
218 
219  // We pack first the number of entries in the row, then that
220  // many global column indices, then that many pids (if any),
221  // then that many values. However, if the number of entries in
222  // the row is zero, we pack nothing.
223  const count_type numBytes = (count == 0) ?
224  static_cast<count_type> (0) :
225  sizeOfLclCount_ + count * (sizeOfGblColInd_ +
226  (lclRowPids_.size() > 0 ? sizeOfPid_ : 0) +
227  sizeOfValue_);
228 
229  if (final) {
230  counts_(curInd) = numBytes;
231  }
232  update += numBytes;
233  }
234  }
235 
236  // mfh 31 May 2017: Don't need init or join. If you have join, MUST
237  // have join both with and without volatile! Otherwise intrawarp
238  // joins are really slow on GPUs.
239 
241  int getError () const {
242  auto error_h = Kokkos::create_mirror_view (error_);
243  Kokkos::deep_copy (error_h, error_);
244  return error_h ();
245  }
246 
247 private:
248  OutputOffsetsViewType outputOffsets_;
249  CountsViewType counts_;
250  typename InputOffsetsViewType::const_type rowOffsets_;
251  typename InputLocalRowIndicesViewType::const_type lclRowInds_;
252  typename InputLocalRowPidsViewType::const_type lclRowPids_;
253  count_type sizeOfLclCount_;
254  count_type sizeOfGblColInd_;
255  count_type sizeOfPid_;
256  count_type sizeOfValue_;
257  Kokkos::View<int, device_type> error_;
258 };
259 
269 template<class OutputOffsetsViewType,
270  class CountsViewType,
271  class InputOffsetsViewType,
272  class InputLocalRowIndicesViewType,
273  class InputLocalRowPidsViewType>
274 typename CountsViewType::non_const_value_type
275 computeNumPacketsAndOffsets (const OutputOffsetsViewType& outputOffsets,
276  const CountsViewType& counts,
277  const InputOffsetsViewType& rowOffsets,
278  const InputLocalRowIndicesViewType& lclRowInds,
279  const InputLocalRowPidsViewType& lclRowPids,
280  const typename CountsViewType::non_const_value_type sizeOfLclCount,
281  const typename CountsViewType::non_const_value_type sizeOfGblColInd,
282  const typename CountsViewType::non_const_value_type sizeOfPid,
283  const typename CountsViewType::non_const_value_type sizeOfValue)
284 {
285  typedef NumPacketsAndOffsetsFunctor<OutputOffsetsViewType,
286  CountsViewType, typename InputOffsetsViewType::const_type,
287  typename InputLocalRowIndicesViewType::const_type,
288  typename InputLocalRowPidsViewType::const_type> functor_type;
289  typedef typename CountsViewType::non_const_value_type count_type;
290  typedef typename OutputOffsetsViewType::size_type size_type;
291  typedef typename OutputOffsetsViewType::execution_space execution_space;
292  typedef typename functor_type::local_row_index_type LO;
293  typedef Kokkos::RangePolicy<execution_space, LO> range_type;
294  const char prefix[] = "computeNumPacketsAndOffsets: ";
295 
296  count_type count = 0;
297  const count_type numRowsToPack = lclRowInds.extent (0);
298 
299  if (numRowsToPack == 0) {
300  return count;
301  }
302  else {
303  TEUCHOS_TEST_FOR_EXCEPTION
304  (rowOffsets.extent (0) <= static_cast<size_type> (1),
305  std::invalid_argument, prefix << "There is at least one row to pack, "
306  "but the matrix has no rows. lclRowInds.extent(0) = " <<
307  numRowsToPack << ", but rowOffsets.extent(0) = " <<
308  rowOffsets.extent (0) << " <= 1.");
309  TEUCHOS_TEST_FOR_EXCEPTION
310  (outputOffsets.extent (0) !=
311  static_cast<size_type> (numRowsToPack + 1), std::invalid_argument,
312  prefix << "Output dimension does not match number of rows to pack. "
313  << "outputOffsets.extent(0) = " << outputOffsets.extent (0)
314  << " != lclRowInds.extent(0) + 1 = "
315  << static_cast<size_type> (numRowsToPack + 1) << ".");
316  TEUCHOS_TEST_FOR_EXCEPTION
317  (counts.extent (0) != numRowsToPack, std::invalid_argument,
318  prefix << "counts.extent(0) = " << counts.extent (0)
319  << " != numRowsToPack = " << numRowsToPack << ".");
320 
321  functor_type f (outputOffsets, counts, rowOffsets,
322  lclRowInds, lclRowPids, sizeOfLclCount,
323  sizeOfGblColInd, sizeOfPid, sizeOfValue);
324  Kokkos::parallel_scan (range_type (0, numRowsToPack + 1), f);
325 
326  // At least in debug mode, this functor checks for errors.
327  const int errCode = f.getError ();
328  TEUCHOS_TEST_FOR_EXCEPTION
329  (errCode != 0, std::runtime_error, prefix << "parallel_scan error code "
330  << errCode << " != 0.");
331 
332 #if 0
333  size_t total = 0;
334  for (LO k = 0; k < numRowsToPack; ++k) {
335  total += counts[k];
336  }
337  if (outputOffsets(numRowsToPack) != total) {
338  if (errStr.get () == NULL) {
339  errStr = std::unique_ptr<std::ostringstream> (new std::ostringstream ());
340  }
341  std::ostringstream& os = *errStr;
342  os << prefix
343  << "outputOffsets(numRowsToPack=" << numRowsToPack << ") "
344  << outputOffsets(numRowsToPack) << " != sum of counts = "
345  << total << "." << std::endl;
346  if (numRowsToPack != 0) {
347  // Only print the array if it's not too long.
348  if (numRowsToPack < static_cast<LO> (10)) {
349  os << "outputOffsets: [";
350  for (LO i = 0; i <= numRowsToPack; ++i) {
351  os << outputOffsets(i);
352  if (static_cast<LO> (i + 1) <= numRowsToPack) {
353  os << ",";
354  }
355  }
356  os << "]" << std::endl;
357  os << "counts: [";
358  for (LO i = 0; i < numRowsToPack; ++i) {
359  os << counts(i);
360  if (static_cast<LO> (i + 1) < numRowsToPack) {
361  os << ",";
362  }
363  }
364  os << "]" << std::endl;
365  }
366  else {
367  os << "outputOffsets(" << (numRowsToPack-1) << ") = "
368  << outputOffsets(numRowsToPack-1) << "." << std::endl;
369  }
370  }
371  count = outputOffsets(numRowsToPack);
372  return {false, errStr};
373  }
374 #endif // HAVE_TPETRA_DEBUG
375 
376  // Get last entry of outputOffsets, which is the sum of the entries
377  // of counts. Don't assume UVM.
378  using Tpetra::Details::getEntryOnHost;
379  return static_cast<count_type> (getEntryOnHost (outputOffsets,
380  numRowsToPack));
381  }
382 }
383 
399 template<class ST, class ColumnMap, class BufferDeviceType>
400 KOKKOS_FUNCTION
401 Kokkos::pair<int, size_t>
402 packCrsMatrixRow (const ColumnMap& col_map,
403  const Kokkos::View<char*, BufferDeviceType>& exports,
405  const typename PackTraits<int>::input_array_type& pids_in,
406  const typename PackTraits<ST>::input_array_type& vals_in,
407  const size_t offset,
408  const size_t num_ent,
409  const size_t num_bytes_per_value,
410  const bool pack_pids)
411 {
412  using Kokkos::subview;
413  using LO = typename ColumnMap::local_ordinal_type;
414  using GO = typename ColumnMap::global_ordinal_type;
415  using return_type = Kokkos::pair<int, size_t>;
416 
417  if (num_ent == 0) {
418  // Empty rows always take zero bytes, to ensure sparsity.
419  return return_type (0, 0);
420  }
421 
422  const LO num_ent_LO = static_cast<LO> (num_ent); // packValueCount wants this
423  const size_t num_ent_beg = offset;
424  const size_t num_ent_len = PackTraits<LO>::packValueCount (num_ent_LO);
425 
426  const size_t gids_beg = num_ent_beg + num_ent_len;
427  const size_t gids_len = num_ent * PackTraits<GO>::packValueCount (GO (0));
428 
429  const size_t pids_beg = gids_beg + gids_len;
430  const size_t pids_len = pack_pids ?
431  num_ent * PackTraits<int>::packValueCount (int (0)) :
432  static_cast<size_t> (0);
433 
434  const size_t vals_beg = gids_beg + gids_len + pids_len;
435  const size_t vals_len = num_ent * num_bytes_per_value;
436 
437  char* const num_ent_out = exports.data () + num_ent_beg;
438  char* const gids_out = exports.data () + gids_beg;
439  char* const pids_out = pack_pids ? exports.data () + pids_beg : NULL;
440  char* const vals_out = exports.data () + vals_beg;
441 
442  size_t num_bytes_out = 0;
443  int error_code = 0;
444  num_bytes_out += PackTraits<LO>::packValue (num_ent_out, num_ent_LO);
445 
446  {
447  // Copy column indices one at a time, so that we don't need
448  // temporary storage.
449  for (size_t k = 0; k < num_ent; ++k) {
450  const LO lid = lids_in[k];
451  const GO gid = col_map.getGlobalElement (lid);
452  num_bytes_out += PackTraits<GO>::packValue (gids_out, k, gid);
453  }
454  // Copy PIDs one at a time, so that we don't need temporary storage.
455  if (pack_pids) {
456  for (size_t k = 0; k < num_ent; ++k) {
457  const LO lid = lids_in[k];
458  const int pid = pids_in[lid];
459  num_bytes_out += PackTraits<int>::packValue (pids_out, k, pid);
460  }
461  }
462  const auto p =
463  PackTraits<ST>::packArray (vals_out, vals_in.data (), num_ent);
464  error_code += p.first;
465  num_bytes_out += p.second;
466  }
467 
468  if (error_code != 0) {
469  return return_type (10, num_bytes_out);
470  }
471 
472  const size_t expected_num_bytes =
473  num_ent_len + gids_len + pids_len + vals_len;
474  if (num_bytes_out != expected_num_bytes) {
475  return return_type (11, num_bytes_out);
476  }
477  return return_type (0, num_bytes_out);
478 }
479 
480 template<class LocalMatrix, class LocalMap, class BufferDeviceType>
481 struct PackCrsMatrixFunctor {
482  typedef LocalMatrix local_matrix_device_type;
483  typedef LocalMap local_map_type;
484  typedef typename local_matrix_device_type::value_type ST;
485  typedef typename local_map_type::local_ordinal_type LO;
486  typedef typename local_map_type::global_ordinal_type GO;
487  typedef typename local_matrix_device_type::device_type DT;
488 
489  typedef Kokkos::View<const size_t*, BufferDeviceType>
490  num_packets_per_lid_view_type;
491  typedef Kokkos::View<const size_t*, BufferDeviceType> offsets_view_type;
492  typedef Kokkos::View<char*, BufferDeviceType> exports_view_type;
493  using export_lids_view_type = typename PackTraits<LO>::input_array_type;
494  using source_pids_view_type = typename PackTraits<int>::input_array_type;
495 
496  typedef typename num_packets_per_lid_view_type::non_const_value_type
497  count_type;
498  typedef typename offsets_view_type::non_const_value_type
499  offset_type;
500  typedef Kokkos::pair<int, LO> value_type;
501 
502  static_assert (std::is_same<LO, typename local_matrix_device_type::ordinal_type>::value,
503  "local_map_type::local_ordinal_type and "
504  "local_matrix_device_type::ordinal_type must be the same.");
505 
506  local_matrix_device_type local_matrix;
507  local_map_type local_col_map;
508  exports_view_type exports;
509  num_packets_per_lid_view_type num_packets_per_lid;
510  export_lids_view_type export_lids;
511  source_pids_view_type source_pids;
512  offsets_view_type offsets;
513  size_t num_bytes_per_value;
514  bool pack_pids;
515 
516  PackCrsMatrixFunctor (const local_matrix_device_type& local_matrix_in,
517  const local_map_type& local_col_map_in,
518  const exports_view_type& exports_in,
519  const num_packets_per_lid_view_type& num_packets_per_lid_in,
520  const export_lids_view_type& export_lids_in,
521  const source_pids_view_type& source_pids_in,
522  const offsets_view_type& offsets_in,
523  const size_t num_bytes_per_value_in,
524  const bool pack_pids_in) :
525  local_matrix (local_matrix_in),
526  local_col_map (local_col_map_in),
527  exports (exports_in),
528  num_packets_per_lid (num_packets_per_lid_in),
529  export_lids (export_lids_in),
530  source_pids (source_pids_in),
531  offsets (offsets_in),
532  num_bytes_per_value (num_bytes_per_value_in),
533  pack_pids (pack_pids_in)
534  {
535  const LO numRows = local_matrix_in.numRows ();
536  const LO rowMapDim =
537  static_cast<LO> (local_matrix.graph.row_map.extent (0));
538  TEUCHOS_TEST_FOR_EXCEPTION
539  (numRows != 0 && rowMapDim != numRows + static_cast<LO> (1),
540  std::logic_error, "local_matrix.graph.row_map.extent(0) = "
541  << rowMapDim << " != numRows (= " << numRows << " ) + 1.");
542  }
543 
544  KOKKOS_INLINE_FUNCTION void init (value_type& dst) const
545  {
546  using ::Tpetra::Details::OrdinalTraits;
547  dst = Kokkos::make_pair (0, OrdinalTraits<LO>::invalid ());
548  }
549 
550  KOKKOS_INLINE_FUNCTION void
551  join (volatile value_type& dst, const volatile value_type& src) const
552  {
553  // `dst` should reflect the first (least) bad index and all other
554  // associated error codes and data, so prefer keeping it.
555  if (src.first != 0 && dst.first == 0) {
556  dst = src;
557  }
558  }
559 
560  KOKKOS_INLINE_FUNCTION
561  void operator() (const LO i, value_type& dst) const
562  {
563  const size_t offset = offsets[i];
564  const LO export_lid = export_lids[i];
565  const size_t buf_size = exports.size();
566  const size_t num_bytes = num_packets_per_lid(i);
567  const size_t num_ent =
568  static_cast<size_t> (local_matrix.graph.row_map[export_lid+1]
569  - local_matrix.graph.row_map[export_lid]);
570 
571  // Only pack this row's data if it has a nonzero number of
572  // entries. We can do this because receiving processes get the
573  // number of packets, and will know that zero packets means zero
574  // entries.
575  if (num_ent == 0) {
576  return;
577  }
578 
579  if (export_lid >= local_matrix.numRows ()) {
580  if (dst.first != 0) { // keep only the first error
581  dst = Kokkos::make_pair (1, i); // invalid row
582  }
583  return;
584  }
585  else if ((offset > buf_size || offset + num_bytes > buf_size)) {
586  if (dst.first != 0) { // keep only the first error
587  dst = Kokkos::make_pair (2, i); // out of bounds
588  }
589  return;
590  }
591 
592  // We can now pack this row
593 
594  // Since the matrix is locally indexed on the calling process, we
595  // have to use its column Map (which it _must_ have in this case)
596  // to convert to global indices.
597  const auto row_beg = local_matrix.graph.row_map[export_lid];
598  const auto row_end = local_matrix.graph.row_map[export_lid + 1];
599  auto vals_in = subview (local_matrix.values,
600  Kokkos::make_pair (row_beg, row_end));
601  auto lids_in = subview (local_matrix.graph.entries,
602  Kokkos::make_pair (row_beg, row_end));
603  typedef local_map_type LMT;
604  typedef BufferDeviceType BDT;
605  auto p = packCrsMatrixRow<ST, LMT, BDT> (local_col_map, exports, lids_in,
606  source_pids, vals_in, offset,
607  num_ent, num_bytes_per_value,
608  pack_pids);
609  int error_code_this_row = p.first;
610  size_t num_bytes_packed_this_row = p.second;
611  if (error_code_this_row != 0) {
612  if (dst.first != 0) { // keep only the first error
613  dst = Kokkos::make_pair (error_code_this_row, i); // bad pack
614  }
615  }
616  else if (num_bytes_packed_this_row != num_bytes) {
617  if (dst.first != 0) { // keep only the first error
618  dst = Kokkos::make_pair (3, i);
619  }
620  }
621  }
622 };
623 
631 template<class LocalMatrix, class LocalMap, class BufferDeviceType>
632 void
633 do_pack (const LocalMatrix& local_matrix,
634  const LocalMap& local_map,
635  const Kokkos::View<char*, BufferDeviceType>& exports,
636  const typename PackTraits<size_t>::input_array_type& num_packets_per_lid,
638  const typename PackTraits<int>::input_array_type& source_pids,
639  const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
640  const size_t num_bytes_per_value,
641  const bool pack_pids)
642 {
643  using LO = typename LocalMap::local_ordinal_type;
644  using DT = typename LocalMatrix::device_type;
645  using range_type = Kokkos::RangePolicy<typename DT::execution_space, LO>;
646  const char prefix[] = "Tpetra::Details::do_pack: ";
647 
648  if (export_lids.extent (0) != 0) {
649  TEUCHOS_TEST_FOR_EXCEPTION
650  (static_cast<size_t> (offsets.extent (0)) !=
651  static_cast<size_t> (export_lids.extent (0) + 1),
652  std::invalid_argument, prefix << "offsets.extent(0) = "
653  << offsets.extent (0) << " != export_lids.extent(0) (= "
654  << export_lids.extent (0) << ") + 1.");
655  TEUCHOS_TEST_FOR_EXCEPTION
656  (export_lids.extent (0) != num_packets_per_lid.extent (0),
657  std::invalid_argument, prefix << "export_lids.extent(0) = " <<
658  export_lids.extent (0) << " != num_packets_per_lid.extent(0) = "
659  << num_packets_per_lid.extent (0) << ".");
660  // If exports has nonzero length at this point, then the matrix
661  // has at least one entry to pack. Thus, if packing process
662  // ranks, we had better have at least one process rank to pack.
663  TEUCHOS_TEST_FOR_EXCEPTION
664  (pack_pids && exports.extent (0) != 0 &&
665  source_pids.extent (0) == 0, std::invalid_argument, prefix <<
666  "pack_pids is true, and exports.extent(0) = " <<
667  exports.extent (0) << " != 0, meaning that we need to pack at "
668  "least one matrix entry, but source_pids.extent(0) = 0.");
669  }
670 
671  using pack_functor_type =
672  PackCrsMatrixFunctor<LocalMatrix, LocalMap, BufferDeviceType>;
673  pack_functor_type f (local_matrix, local_map, exports,
674  num_packets_per_lid, export_lids,
675  source_pids, offsets, num_bytes_per_value,
676  pack_pids);
677 
678  typename pack_functor_type::value_type result;
679  range_type range (0, num_packets_per_lid.extent (0));
680  Kokkos::parallel_reduce (range, f, result);
681 
682  if (result.first != 0) {
683  // We can't deep_copy from AnonymousSpace Views, so we can't print
684  // out any information from them in case of error.
685  TEUCHOS_TEST_FOR_EXCEPTION
686  (true, std::runtime_error, prefix << "PackCrsMatrixFunctor "
687  "reported error code " << result.first << " for the first "
688  "bad row " << result.second << ".");
689  }
690 }
691 
721 template<typename ST, typename LO, typename GO, typename NT, typename BufferDeviceType>
722 void
724  Kokkos::DualView<char*, BufferDeviceType>& exports,
725  const Kokkos::View<size_t*, BufferDeviceType>& num_packets_per_lid,
726  const Kokkos::View<const LO*, BufferDeviceType>& export_lids,
727  const Kokkos::View<const int*, typename NT::device_type>& export_pids,
728  size_t& constant_num_packets,
729  const bool pack_pids)
730 {
731  ::Tpetra::Details::ProfilingRegion region_pack_crs_matrix(
732  "Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix",
733  "Import/Export"
734  );
735  using Kokkos::View;
736  typedef BufferDeviceType DT;
737  typedef Kokkos::DualView<char*, BufferDeviceType> exports_view_type;
738  const char prefix[] = "Tpetra::Details::PackCrsMatrixImpl::packCrsMatrix: ";
739  constexpr bool debug = false;
740 
741  auto local_matrix = sourceMatrix.getLocalMatrixDevice ();
742  auto local_col_map = sourceMatrix.getColMap ()->getLocalMap ();
743 
744  // Setting this to zero tells the caller to expect a possibly
745  // different ("nonconstant") number of packets per local index
746  // (i.e., a possibly different number of entries per row).
747  constant_num_packets = 0;
748 
749  const size_t num_export_lids =
750  static_cast<size_t> (export_lids.extent (0));
751  TEUCHOS_TEST_FOR_EXCEPTION
752  (num_export_lids !=
753  static_cast<size_t> (num_packets_per_lid.extent (0)),
754  std::invalid_argument, prefix << "num_export_lids.extent(0) = "
755  << num_export_lids << " != num_packets_per_lid.extent(0) = "
756  << num_packets_per_lid.extent (0) << ".");
757  if (num_export_lids != 0) {
758  TEUCHOS_TEST_FOR_EXCEPTION
759  (num_packets_per_lid.data () == NULL, std::invalid_argument,
760  prefix << "num_export_lids = "<< num_export_lids << " != 0, but "
761  "num_packets_per_lid.data() = "
762  << num_packets_per_lid.data () << " == NULL.");
763  }
764 
765  const size_t num_bytes_per_lid = PackTraits<LO>::packValueCount (LO (0));
766  const size_t num_bytes_per_gid = PackTraits<GO>::packValueCount (GO (0));
767  const size_t num_bytes_per_pid = PackTraits<int>::packValueCount (int (0));
768 
769  size_t num_bytes_per_value = 0;
771  // Assume ST is default constructible; packValueCount wants an instance.
772  num_bytes_per_value = PackTraits<ST>::packValueCount (ST ());
773  }
774  else {
775  // Since the packed data come from the source matrix, we can use
776  // the source matrix to get the number of bytes per Scalar value
777  // stored in the matrix. This assumes that all Scalar values in
778  // the source matrix require the same number of bytes. If the
779  // source matrix has no entries on the calling process, then we
780  // hope that some process does have some idea how big a Scalar
781  // value is. Of course, if no processes have any entries, then no
782  // values should be packed (though this does assume that in our
783  // packing scheme, rows with zero entries take zero bytes).
784  size_t num_bytes_per_value_l = 0;
785  if (local_matrix.values.extent(0) > 0) {
786  const ST& val = local_matrix.values(0);
787  num_bytes_per_value_l = PackTraits<ST>::packValueCount (val);
788  }
789  using Teuchos::reduceAll;
790  reduceAll<int, size_t> (* (sourceMatrix.getComm ()),
791  Teuchos::REDUCE_MAX,
792  num_bytes_per_value_l,
793  Teuchos::outArg (num_bytes_per_value));
794  }
795 
796  if (num_export_lids == 0) {
797  exports = exports_view_type ("exports", 0);
798  return;
799  }
800 
801  // Array of offsets into the pack buffer.
802  Kokkos::View<size_t*, DT> offsets ("offsets", num_export_lids + 1);
803 
804  // Compute number of packets per LID (row to send), as well as
805  // corresponding offsets (the prefix sum of the packet counts).
806  const size_t count =
807  computeNumPacketsAndOffsets (offsets, num_packets_per_lid,
808  local_matrix.graph.row_map, export_lids,
809  export_pids,
810  num_bytes_per_lid, num_bytes_per_gid,
811  num_bytes_per_pid, num_bytes_per_value);
812 
813  // Resize the output pack buffer if needed.
814  if (count > static_cast<size_t> (exports.extent (0))) {
815  exports = exports_view_type ("exports", count);
816  if (debug) {
817  std::ostringstream os;
818  os << "*** exports resized to " << count << std::endl;
819  std::cerr << os.str ();
820  }
821  }
822  if (debug) {
823  std::ostringstream os;
824  os << "*** count: " << count << ", exports.extent(0): "
825  << exports.extent (0) << std::endl;
826  std::cerr << os.str ();
827  }
828 
829  // If exports has nonzero length at this point, then the matrix has
830  // at least one entry to pack. Thus, if packing process ranks, we
831  // had better have at least one process rank to pack.
832  TEUCHOS_TEST_FOR_EXCEPTION
833  (pack_pids && exports.extent (0) != 0 &&
834  export_pids.extent (0) == 0, std::invalid_argument, prefix <<
835  "pack_pids is true, and exports.extent(0) = " <<
836  exports.extent (0) << " != 0, meaning that we need to pack at least "
837  "one matrix entry, but export_pids.extent(0) = 0.");
838 
839  typedef typename std::decay<decltype (local_matrix)>::type
840  local_matrix_device_type;
841  typedef typename std::decay<decltype (local_col_map)>::type
842  local_map_type;
843 
844  exports.modify_device ();
845  auto exports_d = exports.view_device ();
846  do_pack<local_matrix_device_type, local_map_type, DT>
847  (local_matrix, local_col_map, exports_d, num_packets_per_lid,
848  export_lids, export_pids, offsets, num_bytes_per_value,
849  pack_pids);
850  // If we got this far, we succeeded.
851 }
852 
853 } // namespace PackCrsMatrixImpl
854 
855 template<typename ST, typename LO, typename GO, typename NT>
856 void
858  Teuchos::Array<char>& exports,
859  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
860  const Teuchos::ArrayView<const LO>& exportLIDs,
861  size_t& constantNumPackets)
862 {
863  using local_matrix_device_type = typename CrsMatrix<ST,LO,GO,NT>::local_matrix_device_type;
864  using device_type = typename local_matrix_device_type::device_type;
865  using buffer_device_type = typename DistObject<char, LO, GO, NT>::buffer_device_type;
866  using host_exec_space = typename Kokkos::View<size_t*, device_type>::HostMirror::execution_space;
867  using host_dev_type = Kokkos::Device<host_exec_space, Kokkos::HostSpace>;
868 
869  // Convert all Teuchos::Array to Kokkos::View
870 
871  // This is an output array, so we don't have to copy to device here.
872  // However, we'll have to remember to copy back to host when done.
873  Kokkos::View<size_t*, buffer_device_type> num_packets_per_lid_d =
874  create_mirror_view_from_raw_host_array (buffer_device_type (),
875  numPacketsPerLID.getRawPtr (),
876  numPacketsPerLID.size (), false,
877  "num_packets_per_lid");
878  // FIXME (mfh 05 Feb 2019) We should just pass the exportLIDs
879  // DualView through here, instead of recreating a device View from a
880  // host ArrayView that itself came from a DualView.
881  //
882  // This is an input array, so we have to copy to device here.
883  // However, we never need to copy it back to host.
884  Kokkos::View<const LO*, buffer_device_type> export_lids_d =
885  create_mirror_view_from_raw_host_array (buffer_device_type (),
886  exportLIDs.getRawPtr (),
887  exportLIDs.size (), true,
888  "export_lids");
889 
890  Kokkos::View<int*, device_type> export_pids_d; // output arg
891  Kokkos::DualView<char*, buffer_device_type> exports_dv; // output arg
892  constexpr bool pack_pids = false;
893  PackCrsMatrixImpl::packCrsMatrix<ST, LO, GO, NT, buffer_device_type> (
894  sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
895  export_pids_d, constantNumPackets, pack_pids);
896 
897  // The counts are an output of PackCrsMatrixImpl::packCrsMatrix, so we have to
898  // copy them back to host.
899  Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
900  (numPacketsPerLID.getRawPtr (),
901  numPacketsPerLID.size ());
902  Kokkos::deep_copy (num_packets_per_lid_h, num_packets_per_lid_d);
903 
904  // FIXME (mfh 23 Aug 2017) If we're forced to use a DualView for
905  // exports_dv above, then we have two host copies for exports_h.
906 
907  // The exports are an output of PackCrsMatrixImpl::packCrsMatrix, so we have
908  // to copy them back to host.
909  if (static_cast<size_t> (exports.size ()) !=
910  static_cast<size_t> (exports_dv.extent (0))) {
911  exports.resize (exports_dv.extent (0));
912  }
913  Kokkos::View<char*, host_dev_type> exports_h (exports.getRawPtr (),
914  exports.size ());
915  Kokkos::deep_copy (exports_h, exports_dv.d_view);
916 }
917 
918 template<typename ST, typename LO, typename GO, typename NT>
919 void
921  const CrsMatrix<ST, LO, GO, NT>& sourceMatrix,
922  Kokkos::DualView<char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& exports,
923  const Kokkos::DualView<size_t*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& numPacketsPerLID,
924  const Kokkos::DualView<const LO*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& exportLIDs,
925  size_t& constantNumPackets)
926 {
927  using device_type = typename CrsMatrix<ST, LO, GO, NT>::device_type;
928  using buffer_device_type = typename DistObject<char, LO, GO, NT>::buffer_device_type;
929 
930  // Create an empty array of PIDs, since the interface needs it.
931  Kokkos::View<int*, device_type> exportPIDs_d ("exportPIDs", 0);
932  constexpr bool pack_pids = false;
933 
934  // Write-only device access
935  auto numPacketsPerLID_nc = numPacketsPerLID; // const DV& -> DV
936  numPacketsPerLID_nc.clear_sync_state ();
937  numPacketsPerLID_nc.modify_device ();
938  auto numPacketsPerLID_d = numPacketsPerLID.view_device ();
939 
940  // Read-only device access
941  TEUCHOS_ASSERT( ! exportLIDs.need_sync_device () );
942  auto exportLIDs_d = exportLIDs.view_device ();
943 
944  ::Tpetra::Details::ProfilingRegion region_pack_crs_matrix_new(
945  "Tpetra::Details::packCrsMatrixNew",
946  "Import/Export"
947  );
948  PackCrsMatrixImpl::packCrsMatrix<ST,LO,GO,NT,buffer_device_type> (
949  sourceMatrix, exports, numPacketsPerLID_d, exportLIDs_d,
950  exportPIDs_d, constantNumPackets, pack_pids);
951 }
952 
953 template<typename ST, typename LO, typename GO, typename NT>
954 void
956  Kokkos::DualView<char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>& exports_dv,
957  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
958  const Teuchos::ArrayView<const LO>& exportLIDs,
959  const Teuchos::ArrayView<const int>& sourcePIDs,
960  size_t& constantNumPackets)
961 {
962  typedef typename CrsMatrix<ST,LO,GO,NT>::local_matrix_device_type local_matrix_device_type;
963  typedef typename DistObject<char, LO, GO, NT>::buffer_device_type buffer_device_type;
964  typedef typename Kokkos::DualView<char*, buffer_device_type>::t_host::execution_space host_exec_space;
965  typedef Kokkos::Device<host_exec_space, Kokkos::HostSpace> host_dev_type;
966 
967  typename local_matrix_device_type::device_type outputDevice;
968 
969  const bool verbose = ::Tpetra::Details::Behavior::verbose ();
970  std::unique_ptr<std::string> prefix;
971  if (verbose) {
972  const int myRank = [&] () {
973  auto map = sourceMatrix.getMap ();
974  if (map.get () == nullptr) {
975  return -1;
976  }
977  auto comm = map->getComm ();
978  if (comm.get () == nullptr) {
979  return -2;
980  }
981  return comm->getRank ();
982  } ();
983  std::ostringstream os;
984  os << "Proc " << myRank << ": packCrsMatrixWithOwningPIDs: ";
985  prefix = std::unique_ptr<std::string> (new std::string (os.str ()));
986 
987  std::ostringstream os2;
988  os2 << *prefix << "start" << std::endl;
989  std::cerr << os2.str ();
990  }
991 
992  // Convert all Teuchos::Array to Kokkos::View
993 
994  // This is an output array, so we don't have to copy to device here.
995  // However, we'll have to remember to copy back to host when done.
996  auto num_packets_per_lid_d =
997  create_mirror_view_from_raw_host_array (buffer_device_type (),
998  numPacketsPerLID.getRawPtr (),
999  numPacketsPerLID.size (), false,
1000  "num_packets_per_lid");
1001 
1002  // This is an input array, so we have to copy to device here.
1003  // However, we never need to copy it back to host.
1004  auto export_lids_d =
1005  create_mirror_view_from_raw_host_array (buffer_device_type (),
1006  exportLIDs.getRawPtr (),
1007  exportLIDs.size (), true,
1008  "export_lids");
1009  // This is an input array, so we have to copy to device here.
1010  // However, we never need to copy it back to host.
1011  auto export_pids_d =
1013  sourcePIDs.getRawPtr (),
1014  sourcePIDs.size (), true,
1015  "export_pids");
1016  constexpr bool pack_pids = true;
1017  try {
1018  PackCrsMatrixImpl::packCrsMatrix
1019  (sourceMatrix, exports_dv, num_packets_per_lid_d, export_lids_d,
1020  export_pids_d, constantNumPackets, pack_pids);
1021  }
1022  catch (std::exception& e) {
1023  if (verbose) {
1024  std::ostringstream os;
1025  os << *prefix << "PackCrsMatrixImpl::packCrsMatrix threw: "
1026  << e.what () << std::endl;
1027  std::cerr << os.str ();
1028  }
1029  throw;
1030  }
1031  catch (...) {
1032  if (verbose) {
1033  std::ostringstream os;
1034  os << *prefix << "PackCrsMatrixImpl::packCrsMatrix threw an exception "
1035  "not a subclass of std::exception" << std::endl;
1036  std::cerr << os.str ();
1037  }
1038  throw;
1039  }
1040 
1041  if (numPacketsPerLID.size () != 0) {
1042  try {
1043  // The counts are an output of PackCrsMatrixImpl::packCrsMatrix,
1044  // so we have to copy them back to host.
1045  Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
1046  (numPacketsPerLID.getRawPtr (), numPacketsPerLID.size ());
1047  Kokkos::deep_copy (num_packets_per_lid_h, num_packets_per_lid_d);
1048  }
1049  catch (std::exception& e) {
1050  if (verbose) {
1051  std::ostringstream os;
1052  os << *prefix << "Kokkos::deep_copy threw: " << e.what () << std::endl;
1053  std::cerr << os.str ();
1054  }
1055  throw;
1056  }
1057  catch (...) {
1058  if (verbose) {
1059  std::ostringstream os;
1060  os << *prefix << "Kokkos::deep_copy threw an exception not a subclass "
1061  "of std::exception" << std::endl;
1062  std::cerr << os.str ();
1063  }
1064  throw;
1065  }
1066  }
1067 
1068  if (verbose) {
1069  std::ostringstream os;
1070  os << *prefix << "done" << std::endl;
1071  std::cerr << os.str ();
1072  }
1073 }
1074 
1075 } // namespace Details
1076 } // namespace Tpetra
1077 
1078 #define TPETRA_DETAILS_PACKCRSMATRIX_INSTANT( ST, LO, GO, NT ) \
1079  template void \
1080  Details::packCrsMatrix<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1081  Teuchos::Array<char>&, \
1082  const Teuchos::ArrayView<size_t>&, \
1083  const Teuchos::ArrayView<const LO>&, \
1084  size_t&); \
1085  template void \
1086  Details::packCrsMatrixNew<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1087  Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1088  const Kokkos::DualView<size_t*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1089  const Kokkos::DualView<const LO*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1090  size_t&); \
1091  template void \
1092  Details::packCrsMatrixWithOwningPIDs<ST, LO, GO, NT> (const CrsMatrix<ST, LO, GO, NT>&, \
1093  Kokkos::DualView<char*, DistObject<char, LO, GO, NT>::buffer_device_type>&, \
1094  const Teuchos::ArrayView<size_t>&, \
1095  const Teuchos::ArrayView<const LO>&, \
1096  const Teuchos::ArrayView<const int>&, \
1097  size_t&);
1098 
1099 #endif // TPETRA_DETAILS_PACKCRSMATRIX_DEF_HPP
Declaration of the Tpetra::CrsMatrix class.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types,...
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
Declaration and definition of Tpetra::Details::getEntryOnHost.
KOKKOS_FUNCTION Kokkos::pair< int, size_t > packCrsMatrixRow(const ColumnMap &col_map, const Kokkos::View< char *, BufferDeviceType > &exports, const typename PackTraits< typename ColumnMap::local_ordinal_type >::input_array_type &lids_in, const typename PackTraits< int >::input_array_type &pids_in, const typename PackTraits< ST >::input_array_type &vals_in, const size_t offset, const size_t num_ent, const size_t num_bytes_per_value, const bool pack_pids)
Packs a single row of the CrsMatrix.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
typename Node::device_type device_type
The Kokkos device type.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
local_matrix_device_type getLocalMatrixDevice() const
The local sparse matrix.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
static bool verbose()
Whether Tpetra is in verbose mode.
"Local" part of Map suitable for Kokkos kernels.
LocalOrdinal local_ordinal_type
The type of local indices.
GlobalOrdinal global_ordinal_type
The type of global indices.
Compute the number of packets and offsets for the pack procedure.
Base class for distributed Tpetra objects that support data redistribution.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
Implementation details of Tpetra.
void packCrsMatrix(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Teuchos::Array< char > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
void packCrsMatrixNew(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication, for "new" DistObject inter...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Traits class for packing / unpacking data of type T.
static KOKKOS_INLINE_FUNCTION size_t packValue(char outBuf[], const T &inVal)
Pack the given value of type value_type into the given output buffer of bytes (char).
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const T &)
Number of bytes required to pack or unpack the given value of type value_type.
Kokkos::View< const value_type *, Kokkos::AnonymousSpace > input_array_type
The type of an input array of value_type.
static KOKKOS_INLINE_FUNCTION Kokkos::pair< int, size_t > packArray(char outBuf[], const value_type inBuf[], const size_t numEnt)
Pack the first numEnt entries of the given input buffer of value_type, into the output buffer of byte...