Tpetra parallel linear algebra  Version of the Day
Tpetra_Details_CooMatrix.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_DETAILS_COOMATRIX_HPP
43 #define TPETRA_DETAILS_COOMATRIX_HPP
44 
49 
50 #include "TpetraCore_config.h"
51 #include "Tpetra_Details_PackTriples.hpp"
52 #include "Tpetra_DistObject.hpp"
53 #include "Tpetra_Details_gathervPrint.hpp"
54 #include "Teuchos_TypeNameTraits.hpp"
55 
56 #include <initializer_list>
57 #include <map>
58 #include <memory>
59 #include <string>
60 #include <type_traits>
61 #include <vector>
62 
63 
64 namespace Tpetra {
65 namespace Details {
66 
67 // Implementation details of Tpetra::Details.
68 // So, users REALLY should not use anything in here.
69 namespace Impl {
70 
73 template<class IndexType>
74 struct CooGraphEntry {
75  IndexType row;
76  IndexType col;
77 };
78 
83 template<class IndexType>
85  bool
86  operator () (const CooGraphEntry<IndexType>& x,
87  const CooGraphEntry<IndexType>& y) const
88  {
89  return (x.row < y.row) || (x.row == y.row && x.col < y.col);
90  }
91 };
92 
95 template<class SC, class GO>
97 private:
105  typedef std::map<CooGraphEntry<GO>, SC,
106  CompareCooGraphEntries<GO> > entries_type;
107 
110  entries_type entries_;
111 
112 public:
114  typedef char packet_type;
115 
117  CooMatrixImpl () = default;
118 
125  void
126  sumIntoGlobalValue (const GO gblRowInd,
127  const GO gblColInd,
128  const SC& val)
129  {
130  // There's no sense in worrying about the insertion hint, since
131  // indices may be all over the place. Users make no promises of
132  // sorting or locality of input.
133  CooGraphEntry<GO> ij {gblRowInd, gblColInd};
134  auto result = this->entries_.insert ({ij, val});
135  if (! result.second) { // already in the map
136  result.first->second += val; // sum in the new value
137  }
138  }
139 
148  void
149  sumIntoGlobalValues (const GO gblRowInds[],
150  const GO gblColInds[],
151  const SC vals[],
152  const std::size_t numEnt)
153  {
154  for (std::size_t k = 0; k < numEnt; ++k) {
155  // There's no sense in worrying about the insertion hint, since
156  // indices may be all over the place. Users make no promises of
157  // sorting or locality of input.
158  CooGraphEntry<GO> ij {gblRowInds[k], gblColInds[k]};
159  const SC val = vals[k];
160  auto result = this->entries_.insert ({ij, val});
161  if (! result.second) { // already in the map
162  result.first->second += val; // sum in the new value
163  }
164  }
165  }
166 
168  std::size_t
170  {
171  return this->entries_.size ();
172  }
173 
177  void
178  forAllEntries (std::function<void (const GO, const GO, const SC&)> f) const
179  {
180  for (auto iter = this->entries_.begin ();
181  iter != this->entries_.end (); ++iter) {
182  f (iter->first.row, iter->first.col, iter->second);
183  }
184  }
185 
191  void
192  mergeIntoRow (const GO tgtGblRow,
193  const CooMatrixImpl<SC, GO>& src,
194  const GO srcGblRow)
195  {
196  // const char prefix[] =
197  // "Tpetra::Details::Impl::CooMatrixImpl::mergeIntoRow: ";
198 
199  entries_type& tgtEntries = this->entries_;
200  const entries_type& srcEntries = src.entries_;
201 
202  // Find all entries with the given global row index. The min GO
203  // value is guaranteed to be the least possible column index, so
204  // beg1 is a lower bound for all (row index, column index) pairs.
205  // Lower bound is inclusive; upper bound is exclusive.
206  auto srcBeg = srcEntries.lower_bound ({srcGblRow, std::numeric_limits<GO>::min ()});
207  auto srcEnd = srcEntries.upper_bound ({srcGblRow+1, std::numeric_limits<GO>::min ()});
208  auto tgtBeg = tgtEntries.lower_bound ({tgtGblRow, std::numeric_limits<GO>::min ()});
209  auto tgtEnd = tgtEntries.upper_bound ({tgtGblRow+1, std::numeric_limits<GO>::min ()});
210 
211  // Don't waste time iterating over the current row of *this, if
212  // the current row of src is empty.
213  if (srcBeg != srcEnd) {
214  for (auto tgtCur = tgtBeg; tgtCur != tgtEnd; ++tgtCur) {
215  auto srcCur = srcBeg;
216  while (srcCur != srcEnd && srcCur->first.col < tgtCur->first.col) {
217  ++srcCur;
218  }
219  // At this point, one of the following is true:
220  //
221  // 1. srcCur == srcEnd, thus nothing more to insert.
222  // 2. srcCur->first.col > tgtCur->first.col, thus the current
223  // row of src has no entry matching tgtCur->first, so we
224  // have to insert it. Insertion does not invalidate
225  // tgtEntries iterators, and neither srcEntries nor
226  // tgtEntries have duplicates, so this is safe.
227  // 3. srcCur->first.col == tgtCur->first.col, so add the two entries.
228  if (srcCur != srcEnd) {
229  if (srcCur->first.col == tgtCur->first.col) {
230  tgtCur->second += srcCur->second;
231  }
232  else {
233  // tgtCur is (optimally) right before where we want to put
234  // the new entry, since srcCur points to the first entry
235  // in srcEntries whose column index is greater than that
236  // of the entry to which tgtCur points.
237  (void) tgtEntries.insert (tgtCur, *srcCur);
238  }
239  } // if srcCur != srcEnd
240  } // for each entry in the current row (lclRow) of *this
241  } // if srcBeg != srcEnd
242  }
243 
253  int
254  countPackRow (int& numPackets,
255  const GO gblRow,
256  const ::Teuchos::Comm<int>& comm,
257  std::ostream* errStrm = NULL) const
258  {
259  using ::Tpetra::Details::countPackTriples;
260  using ::Tpetra::Details::countPackTriplesCount;
261  using std::endl;
262  const char prefix[] = "Tpetra::Details::Impl::CooMatrixImpl::countPackRow: ";
263 #ifdef HAVE_TPETRACORE_MPI
264  int errCode = MPI_SUCCESS;
265 #else
266  int errCode = 0;
267 #endif // HAVE_TPETRACORE_MPI
268 
269  // Count the number of entries in the given row.
270  const GO minGO = std::numeric_limits<GO>::min ();
271  auto beg = this->entries_.lower_bound ({gblRow, minGO});
272  auto end = this->entries_.upper_bound ({gblRow+1, minGO});
273  int numEnt = 0;
274  for (auto iter = beg; iter != end; ++iter) {
275  ++numEnt;
276  if (numEnt == std::numeric_limits<int>::max ()) {
277  if (errStrm != NULL) {
278  *errStrm << prefix << "In (global) row " << gblRow << ", the number "
279  "of entries thus far, numEnt = " << numEnt << ", has reached the "
280  "maximum int value. We need to store this count as int for MPI's "
281  "sake." << endl;
282  }
283  return -1;
284  }
285  }
286 
287  int numPacketsForCount = 0; // output argument of countPackTriplesCount
288  {
289  const int errCode =
290  countPackTriplesCount (comm, numPacketsForCount, errStrm);
291  if (errCode != 0) {
292  if (errStrm != NULL) {
293  *errStrm << prefix << "countPackTriplesCount "
294  "returned errCode = " << errCode << " != 0." << endl;
295  }
296  return errCode;
297  }
298  if (numPacketsForCount < 0) {
299  if (errStrm != NULL) {
300  *errStrm << prefix << "countPackTriplesCount returned "
301  "numPacketsForCount = " << numPacketsForCount << " < 0." << endl;
302  }
303  return -1;
304  }
305  }
306 
307  int numPacketsForTriples = 0; // output argument of countPackTriples
308  {
309  const int errCode = countPackTriples<SC, GO> (numEnt, comm, numPacketsForTriples);
310  TEUCHOS_TEST_FOR_EXCEPTION
311  (errCode != 0, std::runtime_error, prefix << "countPackTriples "
312  "returned errCode = " << errCode << " != 0.");
313  TEUCHOS_TEST_FOR_EXCEPTION
314  (numPacketsForTriples < 0, std::logic_error, prefix << "countPackTriples "
315  "returned numPacketsForTriples = " << numPacketsForTriples << " < 0.");
316  }
317 
318  numPackets = numPacketsForCount + numPacketsForTriples;
319  return errCode;
320  }
321 
336  void
337  packRow (packet_type outBuf[],
338  const int outBufSize,
339  int& outBufCurPos, // in/out argument
340  const ::Teuchos::Comm<int>& comm,
341  std::vector<GO>& gblRowInds,
342  std::vector<GO>& gblColInds,
343  std::vector<SC>& vals,
344  const GO gblRow) const
345  {
346  using ::Tpetra::Details::packTriples;
347  using ::Tpetra::Details::packTriplesCount;
348  const char prefix[] = "Tpetra::Details::Impl::CooMatrixImpl::packRow: ";
349 
350  const GO minGO = std::numeric_limits<GO>::min ();
351  auto beg = this->entries_.lower_bound ({gblRow, minGO});
352  auto end = this->entries_.upper_bound ({gblRow+1, minGO});
353 
354  // This doesn't actually deallocate. Only swapping with an empty
355  // std::vector does that.
356  gblRowInds.resize (0);
357  gblColInds.resize (0);
358  vals.resize (0);
359 
360  int numEnt = 0;
361  for (auto iter = beg; iter != end; ++iter) {
362  gblRowInds.push_back (iter->first.row);
363  gblColInds.push_back (iter->first.col);
364  vals.push_back (iter->second);
365  ++numEnt;
366  TEUCHOS_TEST_FOR_EXCEPTION
367  (numEnt == std::numeric_limits<int>::max (), std::runtime_error, prefix
368  << "In (global) row " << gblRow << ", the number of entries thus far, "
369  "numEnt = " << numEnt << ", has reached the maximum int value. "
370  "We need to store this count as int for MPI's sake.");
371  }
372 
373  {
374  const int errCode =
375  packTriplesCount (numEnt, outBuf, outBufSize, outBufCurPos, comm);
376  TEUCHOS_TEST_FOR_EXCEPTION
377  (errCode != 0, std::runtime_error, prefix
378  << "In (global) row " << gblRow << ", packTriplesCount returned "
379  "errCode = " << errCode << " != 0.");
380  }
381  {
382  const int errCode =
383  packTriples (gblRowInds.data (),
384  gblColInds.data (),
385  vals.data (),
386  numEnt,
387  outBuf,
388  outBufSize,
389  outBufCurPos, // in/out argument
390  comm);
391  TEUCHOS_TEST_FOR_EXCEPTION
392  (errCode != 0, std::runtime_error, prefix << "In (global) row "
393  << gblRow << ", packTriples returned errCode = " << errCode
394  << " != 0.");
395  }
396  }
397 
405  GO
406  getMyGlobalRowIndices (std::vector<GO>& rowInds) const
407  {
408  rowInds.clear ();
409 
410  GO lclMinRowInd = std::numeric_limits<GO>::max (); // compute local min
411  for (typename entries_type::const_iterator iter = this->entries_.begin ();
412  iter != this->entries_.end (); ++iter) {
413  const GO rowInd = iter->first.row;
414  if (rowInd < lclMinRowInd) {
415  lclMinRowInd = rowInd;
416  }
417  if (rowInds.empty () || rowInds.back () != rowInd) {
418  rowInds.push_back (rowInd); // don't insert duplicates
419  }
420  }
421  return lclMinRowInd;
422  }
423 
424  template<class OffsetType>
425  void
426  buildCrs (std::vector<OffsetType>& rowOffsets,
427  GO gblColInds[],
428  SC vals[]) const
429  {
430  static_assert (std::is_integral<OffsetType>::value,
431  "OffsetType must be a built-in integer type.");
432 
433  // clear() doesn't generally free storage; it just resets the
434  // length. Thus, we reuse any existing storage here.
435  rowOffsets.clear ();
436 
437  const std::size_t numEnt = this->getLclNumEntries ();
438  if (numEnt == 0) {
439  rowOffsets.push_back (0);
440  }
441  else {
442  typename entries_type::const_iterator iter = this->entries_.begin ();
443  GO prevGblRowInd = iter->first.row;
444 
445  OffsetType k = 0;
446  for ( ; iter != this->entries_.end (); ++iter, ++k) {
447  const GO gblRowInd = iter->first.row;
448  if (k == 0 || gblRowInd != prevGblRowInd) {
449  // The row offsets array always has at least one entry. The
450  // first entry is always zero, and the last entry is always
451  // the number of matrix entries.
452  rowOffsets.push_back (k);
453  prevGblRowInd = gblRowInd;
454  }
455  gblColInds[k] = iter->first.col;
456 
457  static_assert (std::is_same<typename std::decay<decltype (iter->second)>::type, SC>::value,
458  "The type of iter->second != SC.");
459  vals[k] = iter->second;
460  }
461  rowOffsets.push_back (static_cast<OffsetType> (numEnt));
462  }
463  }
464 
477  template<class OffsetType, class LO>
478  void
479  buildLocallyIndexedCrs (std::vector<OffsetType>& rowOffsets,
480  LO lclColInds[],
481  SC vals[],
482  std::function<LO (const GO)> gblToLcl) const
483  {
484  static_assert (std::is_integral<OffsetType>::value,
485  "OffsetType must be a built-in integer type.");
486  static_assert (std::is_integral<LO>::value,
487  "LO must be a built-in integer type.");
488 
489  // clear() doesn't generally free storage; it just resets the
490  // length. Thus, we reuse any existing storage here.
491  rowOffsets.clear ();
492 
493  const std::size_t numEnt = this->getLclNumEntries ();
494  if (numEnt == 0) {
495  rowOffsets.push_back (0);
496  }
497  else {
498  typename entries_type::const_iterator iter = this->entries_.begin ();
499  GO prevGblRowInd = iter->first.row;
500 
501  OffsetType k = 0;
502  for ( ; iter != this->entries_.end (); ++iter, ++k) {
503  const GO gblRowInd = iter->first.row;
504  if (k == 0 || gblRowInd != prevGblRowInd) {
505  // The row offsets array always has at least one entry. The
506  // first entry is always zero, and the last entry is always
507  // the number of matrix entries.
508  rowOffsets.push_back (k);
509  prevGblRowInd = gblRowInd;
510  }
511  lclColInds[k] = gblToLcl (iter->first.col);
512  vals[k] = iter->second;
513  }
514  rowOffsets.push_back (static_cast<OffsetType> (numEnt));
515  }
516  }
517 };
518 
519 } // namespace Impl
520 
569 template<class SC,
573 class CooMatrix : public ::Tpetra::DistObject<char, LO, GO, NT> {
574 public:
576  typedef char packet_type;
578  typedef SC scalar_type;
579  typedef LO local_ordinal_type;
580  typedef GO global_ordinal_type;
581  typedef NT node_type;
582  typedef typename NT::device_type device_type;
584  typedef ::Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type> map_type;
585 
586 private:
588  typedef ::Tpetra::DistObject<packet_type, local_ordinal_type,
589  global_ordinal_type, node_type> base_type;
590 
593 
594 public:
601  base_type (::Teuchos::null),
602  localError_ (new bool (false)),
603  errs_ (new std::shared_ptr<std::ostringstream> ()) // ptr to a null ptr
604  {}
605 
613  CooMatrix (const ::Teuchos::RCP<const map_type>& map) :
614  base_type (map),
615  localError_ (new bool (false)),
616  errs_ (new std::shared_ptr<std::ostringstream> ()) // ptr to a null ptr
617  {}
618 
620  virtual ~CooMatrix () {}
621 
628  void
629  sumIntoGlobalValue (const GO gblRowInd,
630  const GO gblColInd,
631  const SC& val)
632  {
633  this->impl_.sumIntoGlobalValue (gblRowInd, gblColInd, val);
634  }
635 
644  void
645  sumIntoGlobalValues (const GO gblRowInds[],
646  const GO gblColInds[],
647  const SC vals[],
648  const std::size_t numEnt)
649  {
650  this->impl_.sumIntoGlobalValues (gblRowInds, gblColInds, vals, numEnt);
651  }
652 
654  void
655  sumIntoGlobalValues (std::initializer_list<GO> gblRowInds,
656  std::initializer_list<GO> gblColInds,
657  std::initializer_list<SC> vals,
658  const std::size_t numEnt)
659  {
660  this->impl_.sumIntoGlobalValues (gblRowInds.begin (), gblColInds.begin (),
661  vals.begin (), numEnt);
662  }
663 
665  std::size_t
667  {
668  return this->impl_.getLclNumEntries ();
669  }
670 
671  template<class OffsetType>
672  void
673  buildCrs (::Kokkos::View<OffsetType*, device_type>& rowOffsets,
674  ::Kokkos::View<GO*, device_type>& gblColInds,
675  ::Kokkos::View<typename ::Kokkos::Details::ArithTraits<SC>::val_type*, device_type>& vals)
676  {
677  static_assert (std::is_integral<OffsetType>::value,
678  "OffsetType must be a built-in integer type.");
679  using ::Kokkos::create_mirror_view;
681  using ::Kokkos::View;
682  typedef typename ::Kokkos::Details::ArithTraits<SC>::val_type ISC;
683 
684  const std::size_t numEnt = this->getLclNumEntries ();
685 
686  gblColInds = View<GO*, device_type> ("gblColInds", numEnt);
687  vals = View<ISC*, device_type> ("vals", numEnt);
688  auto gblColInds_h = create_mirror_view (gblColInds);
689  auto vals_h = create_mirror_view (vals);
690 
691  std::vector<std::size_t> rowOffsetsSV;
692  this->impl_.buildCrs (rowOffsetsSV,
693  gblColInds_h.ptr_on_device (),
694  vals_h.ptr_on_device ());
695  rowOffsets =
696  View<OffsetType*, device_type> ("rowOffsets", rowOffsetsSV.size ());
697  typename View<OffsetType*, device_type>::HostMirror
698  rowOffsets_h (rowOffsetsSV.data (), rowOffsetsSV.size ());
699  deep_copy (rowOffsets, rowOffsets_h);
700 
701  deep_copy (gblColInds, gblColInds_h);
702  deep_copy (vals, vals_h);
703  }
704 
715  void
716  fillComplete (const ::Teuchos::RCP<const ::Teuchos::Comm<int> >& comm)
717  {
718  if (comm.is_null ()) {
719  this->map_ = ::Teuchos::null;
720  }
721  else {
722  this->map_ = this->buildMap (comm);
723  }
724  }
725 
734  void
736  {
737  TEUCHOS_TEST_FOR_EXCEPTION
738  (this->getMap ().is_null (), std::runtime_error, "Tpetra::Details::"
739  "CooMatrix::fillComplete: This object does not yet have a Map. "
740  "You must call the version of fillComplete "
741  "that takes an input communicator.");
742  this->fillComplete (this->getMap ()->getComm ());
743  }
744 
759  bool localError () const {
760  return *localError_;
761  }
762 
780  std::string errorMessages () const {
781  return ((*errs_).get () == NULL) ? std::string ("") : (*errs_)->str ();
782  }
783 
784 private:
797  std::shared_ptr<bool> localError_;
798 
806  std::shared_ptr<std::shared_ptr<std::ostringstream> > errs_;
807 
809  std::ostream&
810  markLocalErrorAndGetStream ()
811  {
812  * (this->localError_) = true;
813  if ((*errs_).get () == NULL) {
814  *errs_ = std::shared_ptr<std::ostringstream> (new std::ostringstream ());
815  }
816  return **errs_;
817  }
818 
819 public:
822  virtual std::string description () const {
823  using Teuchos::TypeNameTraits;
824 
825  std::ostringstream os;
826  os << "\"Tpetra::Details::CooMatrix\": {"
827  << "SC: " << TypeNameTraits<SC>::name ()
828  << ", LO: " << TypeNameTraits<LO>::name ()
829  << ", GO: " << TypeNameTraits<GO>::name ()
830  << ", NT: " << TypeNameTraits<NT>::name ();
831  if (this->getObjectLabel () != "") {
832  os << ", Label: \"" << this->getObjectLabel () << "\"";
833  }
834  os << ", Has Map: " << (this->map_.is_null () ? "false" : "true")
835  << "}";
836  return os.str ();
837  }
838 
841  virtual void
842  describe (Teuchos::FancyOStream& out,
843  const Teuchos::EVerbosityLevel verbLevel =
844  Teuchos::Describable::verbLevel_default) const
845  {
846  using ::Tpetra::Details::gathervPrint;
847  using ::Teuchos::EVerbosityLevel;
848  using ::Teuchos::OSTab;
849  using ::Teuchos::TypeNameTraits;
850  using ::Teuchos::VERB_DEFAULT;
851  using ::Teuchos::VERB_LOW;
852  using ::Teuchos::VERB_MEDIUM;
853  using std::endl;
854 
855  const auto vl = (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
856 
857  auto comm = this->getMap ().is_null () ?
858  Teuchos::null : this->getMap ()->getComm ();
859  const int myRank = comm.is_null () ? 0 : comm->getRank ();
860  // const int numProcs = comm.is_null () ? 1 : comm->getSize ();
861 
862  if (vl != Teuchos::VERB_NONE) {
863  // Convention is to start describe() implementations with a tab.
864  OSTab tab0 (out);
865  if (myRank == 0) {
866  out << "\"Tpetra::Details::CooMatrix\":" << endl;
867  }
868  OSTab tab1 (out);
869  if (myRank == 0) {
870  out << "Template parameters:" << endl;
871  {
872  OSTab tab2 (out);
873  out << "SC: " << TypeNameTraits<SC>::name () << endl
874  << "LO: " << TypeNameTraits<LO>::name () << endl
875  << "GO: " << TypeNameTraits<GO>::name () << endl
876  << "NT: " << TypeNameTraits<NT>::name () << endl;
877  }
878  if (this->getObjectLabel () != "") {
879  out << "Label: \"" << this->getObjectLabel () << "\"" << endl;
880  }
881  out << "Has Map: " << (this->map_.is_null () ? "false" : "true") << endl;
882  } // if myRank == 0
883 
884  // Describe the Map, if it exists.
885  if (! this->map_.is_null ()) {
886  if (myRank == 0) {
887  out << "Map:" << endl;
888  }
889  OSTab tab2 (out);
890  this->map_->describe (out, vl);
891  }
892 
893  // At verbosity > VERB_LOW, each process prints something.
894  if (vl > VERB_LOW) {
895  std::ostringstream os;
896  os << "Process " << myRank << ":" << endl;
897  //OSTab tab3 (os);
898 
899  const std::size_t numEnt = this->impl_.getLclNumEntries ();
900  os << " Local number of entries: " << numEnt << endl;
901  if (vl > VERB_MEDIUM) {
902  os << " Local entries:" << endl;
903  //OSTab tab4 (os);
904  this->impl_.forAllEntries ([&os] (const GO row, const GO col, const SC& val) {
905  os << " {"
906  << "row: " << row
907  << ", col: " << col
908  << ", val: " << val
909  << "}" << endl;
910  });
911  }
912  gathervPrint (out, os.str (), *comm);
913  }
914  } // vl != Teuchos::VERB_NONE
915  }
916 
917 private:
926  Teuchos::RCP<const map_type>
927  buildMap (const ::Teuchos::RCP<const ::Teuchos::Comm<int> >& comm)
928  {
929  using ::Teuchos::outArg;
930  using ::Teuchos::rcp;
931  using ::Teuchos::REDUCE_MIN;
932  using ::Teuchos::reduceAll;
933  typedef ::Tpetra::global_size_t GST;
934  //const char prefix[] = "Tpetra::Details::CooMatrix::buildMap: ";
935 
936  // Processes where comm is null don't participate in the Map.
937  if (comm.is_null ()) {
938  return ::Teuchos::null;
939  }
940 
941  // mfh 17 Jan 2017: We just happen to use row indices, because
942  // that's what Tpetra::CrsMatrix currently uses. That's probably
943  // not the best thing to use, but it's not bad for commonly
944  // encountered matrices. A better more general approach might be
945  // to hash (row index, column index) pairs to a global index. One
946  // could make that unique by doing a parallel scan at map
947  // construction time.
948 
949  std::vector<GO> rowInds;
950  const GO lclMinGblRowInd = this->impl_.getMyGlobalRowIndices (rowInds);
951 
952  // Compute the globally min row index for the "index base."
953  GO gblMinGblRowInd = 0; // output argument
954  reduceAll<int, GO> (*comm, REDUCE_MIN, lclMinGblRowInd,
955  outArg (gblMinGblRowInd));
956  const GO indexBase = gblMinGblRowInd;
957  const GST INV = Tpetra::Details::OrdinalTraits<GST>::invalid ();
958  return rcp (new map_type (INV, rowInds.data (), rowInds.size (),
959  indexBase, comm));
960  }
961 
962 protected:
966  virtual size_t constantNumberOfPackets () const {
967  return static_cast<size_t> (0);
968  }
969 
973  virtual bool
974  checkSizes (const ::Tpetra::SrcDistObject& source)
975  {
976  using std::endl;
978  const char prefix[] = "Tpetra::Details::CooMatrix::checkSizes: ";
979 
980  const this_type* src = dynamic_cast<const this_type* > (&source);
981 
982  if (src == NULL) {
983  std::ostream& err = markLocalErrorAndGetStream ();
984  err << prefix << "The source object of the Import or Export "
985  "must be a CooMatrix with the same template parameters as the "
986  "target object." << endl;
987  }
988  else if (this->map_.is_null ()) {
989  std::ostream& err = markLocalErrorAndGetStream ();
990  err << prefix << "The target object of the Import or Export "
991  "must be a CooMatrix with a nonnull Map." << endl;
992  }
993  return ! (* (this->localError_));
994  }
995 
999  virtual bool useNewInterface () { return true; }
1000 
1003  virtual void
1004  copyAndPermuteNew (const ::Tpetra::SrcDistObject& sourceObject,
1005  const size_t numSameIDs,
1006  const ::Kokkos::DualView<const LO*, device_type>& permuteToLIDs,
1007  const ::Kokkos::DualView<const LO*, device_type>& permuteFromLIDs)
1008  {
1009  using std::endl;
1011  const char prefix[] = "Tpetra::Details::CooMatrix::copyAndPermuteNew: ";
1012 
1013  // There's no communication in this method, so it's safe just to
1014  // return on error.
1015 
1016  if (* (this->localError_)) {
1017  std::ostream& err = this->markLocalErrorAndGetStream ();
1018  err << prefix << "The target object of the Import or Export is "
1019  "already in an error state." << endl;
1020  return;
1021  }
1022 
1023  const this_type* src = dynamic_cast<const this_type*> (&sourceObject);
1024  if (src == NULL) {
1025  std::ostream& err = this->markLocalErrorAndGetStream ();
1026  err << prefix << "Input argument 'sourceObject' is not a CooMatrix."
1027  << endl;
1028  return;
1029  }
1030 
1031  const size_t numPermuteIDs =
1032  static_cast<size_t> (permuteToLIDs.dimension_0 ());
1033  if (numPermuteIDs != static_cast<size_t> (permuteFromLIDs.dimension_0 ())) {
1034  std::ostream& err = this->markLocalErrorAndGetStream ();
1035  err << prefix << "permuteToLIDs.dimension_0() = "
1036  << numPermuteIDs << " != permuteFromLIDs.dimension_0() = "
1037  << permuteFromLIDs.dimension_0 () << "." << endl;
1038  return;
1039  }
1040  if (sizeof (int) <= sizeof (size_t) &&
1041  numPermuteIDs > static_cast<size_t> (std::numeric_limits<int>::max ())) {
1042  std::ostream& err = this->markLocalErrorAndGetStream ();
1043  err << prefix << "numPermuteIDs = " << numPermuteIDs
1044  << ", a size_t, overflows int." << endl;
1045  return;
1046  }
1047 
1048  // TODO (mfh 19 Jan 2017) Check that permuteToLIDs and
1049  // permuteFromLIDs are sync'd to host.
1050 
1051  // Even though this is an std::set, we can start with numSameIDs
1052  // just by iterating through the first entries of the set.
1053 
1054  if (sizeof (int) <= sizeof (size_t) &&
1055  numSameIDs > static_cast<size_t> (std::numeric_limits<int>::max ())) {
1056  std::ostream& err = this->markLocalErrorAndGetStream ();
1057  err << prefix << "numSameIDs = " << numSameIDs
1058  << ", a size_t, overflows int." << endl;
1059  return;
1060  }
1061 
1062  //
1063  // Copy in entries from any initial rows with the same global row indices.
1064  //
1065  const LO numSame = static_cast<int> (numSameIDs);
1066  // Count of local row indices encountered here with invalid global
1067  // row indices. If nonzero, something went wrong. If something
1068  // did go wrong, we'll defer responding until the end of this
1069  // method, so we can print as much useful info as possible.
1070  LO numInvalidSameRows = 0;
1071  for (LO lclRow = 0; lclRow < numSame; ++lclRow) {
1072  // All numSame initial rows have the same global index in both
1073  // source and target Maps, so we only need to convert to global
1074  // once.
1075  const GO gblRow = this->map_->getGlobalElement (lclRow);
1076  if (gblRow == ::Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
1077  ++numInvalidSameRows;
1078  continue;
1079  }
1080  else {
1081  this->impl_.mergeIntoRow (gblRow, src->impl_, gblRow);
1082  }
1083  }
1084 
1085  //
1086  // Copy in entries from remaining rows that are permutations, that
1087  // is, that live in both the source and target Maps, but aren't
1088  // included in the "same" list (see above).
1089  //
1090  const LO numPermute = static_cast<int> (numPermuteIDs);
1091  // Count of local "from" row indices encountered here with invalid
1092  // global row indices. If nonzero, something went wrong. If
1093  // something did go wrong, we'll defer responding until the end of
1094  // this method, so we can print as much useful info as possible.
1095  LO numInvalidRowsFrom = 0;
1096  // Count of local "to" row indices encountered here with invalid
1097  // global row indices. If nonzero, something went wrong. If
1098  // something did go wrong, we'll defer responding until the end of
1099  // this method, so we can print as much useful info as possible.
1100  LO numInvalidRowsTo = 0;
1101  for (LO k = 0; k < numPermute; ++k) {
1102  const LO lclRowFrom = permuteFromLIDs.h_view(k);
1103  const LO lclRowTo = permuteToLIDs.h_view(k);
1104  const GO gblRowFrom = src->map_->getGlobalElement (lclRowFrom);
1105  const GO gblRowTo = this->map_->getGlobalElement (lclRowTo);
1106 
1107  bool bothConversionsValid = true;
1108  if (gblRowFrom == ::Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
1109  ++numInvalidRowsFrom;
1110  bothConversionsValid = false;
1111  }
1112  if (gblRowTo == ::Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
1113  ++numInvalidRowsTo;
1114  bothConversionsValid = false;
1115  }
1116  if (bothConversionsValid) {
1117  this->impl_.mergeIntoRow (gblRowTo, src->impl_, gblRowFrom);
1118  }
1119  }
1120 
1121  // Print info if any errors occurred.
1122  if (numInvalidSameRows != 0 || numInvalidRowsFrom != 0 || numInvalidRowsTo != 0) {
1123  // Collect and print all the invalid input row indices, for the
1124  // "same," "from," and "to" lists.
1125  std::vector<std::pair<LO, GO> > invalidSameRows;
1126  invalidSameRows.reserve (numInvalidSameRows);
1127  std::vector<std::pair<LO, GO> > invalidRowsFrom;
1128  invalidRowsFrom.reserve (numInvalidRowsFrom);
1129  std::vector<std::pair<LO, GO> > invalidRowsTo;
1130  invalidRowsTo.reserve (numInvalidRowsTo);
1131 
1132  for (LO lclRow = 0; lclRow < numSame; ++lclRow) {
1133  // All numSame initial rows have the same global index in both
1134  // source and target Maps, so we only need to convert to global
1135  // once.
1136  const GO gblRow = this->map_->getGlobalElement (lclRow);
1137  if (gblRow == ::Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
1138  invalidSameRows.push_back ({lclRow, gblRow});
1139  }
1140  }
1141 
1142  for (LO k = 0; k < numPermute; ++k) {
1143  const LO lclRowFrom = permuteFromLIDs.h_view(k);
1144  const LO lclRowTo = permuteToLIDs.h_view(k);
1145  const GO gblRowFrom = src->map_->getGlobalElement (lclRowFrom);
1146  const GO gblRowTo = this->map_->getGlobalElement (lclRowTo);
1147 
1148  if (gblRowFrom == ::Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
1149  invalidRowsFrom.push_back ({lclRowFrom, gblRowFrom});
1150  }
1151  if (gblRowTo == ::Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
1152  invalidRowsTo.push_back ({lclRowTo, gblRowTo});
1153  }
1154  }
1155 
1156  std::ostringstream os;
1157  if (numInvalidSameRows != 0) {
1158  os << "Invalid permute \"same\" (local, global) index pairs: [";
1159  for (std::size_t k = 0; k < invalidSameRows.size (); ++k) {
1160  const auto& p = invalidSameRows[k];
1161  os << "(" << p.first << "," << p.second << ")";
1162  if (k + 1 < invalidSameRows.size ()) {
1163  os << ", ";
1164  }
1165  }
1166  os << "]" << std::endl;
1167  }
1168  if (numInvalidRowsFrom != 0) {
1169  os << "Invalid permute \"from\" (local, global) index pairs: [";
1170  for (std::size_t k = 0; k < invalidRowsFrom.size (); ++k) {
1171  const auto& p = invalidRowsFrom[k];
1172  os << "(" << p.first << "," << p.second << ")";
1173  if (k + 1 < invalidRowsFrom.size ()) {
1174  os << ", ";
1175  }
1176  }
1177  os << "]" << std::endl;
1178  }
1179  if (numInvalidRowsTo != 0) {
1180  os << "Invalid permute \"to\" (local, global) index pairs: [";
1181  for (std::size_t k = 0; k < invalidRowsTo.size (); ++k) {
1182  const auto& p = invalidRowsTo[k];
1183  os << "(" << p.first << "," << p.second << ")";
1184  if (k + 1 < invalidRowsTo.size ()) {
1185  os << ", ";
1186  }
1187  }
1188  os << "]" << std::endl;
1189  }
1190 
1191  std::ostream& err = this->markLocalErrorAndGetStream ();
1192  err << prefix << os.str ();
1193  return;
1194  }
1195  }
1196 
1199  virtual void
1200  packAndPrepareNew (const ::Tpetra::SrcDistObject& sourceObject,
1201  const ::Kokkos::DualView<const local_ordinal_type*, device_type>& exportLIDs,
1202  ::Kokkos::DualView<packet_type*, device_type>& exports,
1203  const ::Kokkos::DualView<size_t*, device_type>& numPacketsPerLID,
1204  size_t& constantNumPackets,
1205  ::Tpetra::Distributor& /* distor */)
1206  {
1207  using ::Teuchos::Comm;
1208  using ::Teuchos::RCP;
1209  using std::endl;
1211  typedef ::Kokkos::DualView<packet_type*, device_type> out_buf_dv_type;
1212  const char prefix[] = "Tpetra::Details::CooMatrix::packAndPrepareNew: ";
1213  const char suffix[] = " This should never happen. "
1214  "Please report this bug to the Tpetra developers.";
1215 
1216  // Tell the caller that different rows may have different numbers
1217  // of matrix entries.
1218  constantNumPackets = 0;
1219 
1220  const this_type* src = dynamic_cast<const this_type*> (&sourceObject);
1221  if (src == NULL) {
1222  std::ostream& err = this->markLocalErrorAndGetStream ();
1223  err << prefix << "Input argument 'sourceObject' is not a CooMatrix."
1224  << endl;
1225  }
1226  else if (* (src->localError_)) {
1227  std::ostream& err = this->markLocalErrorAndGetStream ();
1228  err << prefix << "The source (input) object of the Import or Export "
1229  "is already in an error state on this process."
1230  << endl;
1231  }
1232  else if (* (this->localError_)) {
1233  std::ostream& err = this->markLocalErrorAndGetStream ();
1234  err << prefix << "The target (output, \"this\") object of the Import "
1235  "or Export is already in an error state on this process." << endl;
1236  }
1237  // Respond to detected error(s) by resizing 'exports' to zero (so
1238  // we won't be tempted to read it later), and filling
1239  // numPacketsPerLID with zeros.
1240  if (* (this->localError_)) {
1241  // Resize 'exports' to zero, so we won't be tempted to read it.
1242  exports = out_buf_dv_type ("CooMatrix exports", 0);
1243  // Trick to get around const DualView& being const.
1244  {
1245  using ::Kokkos::DualView;
1246  DualView<size_t*, device_type> numPacketsPerLID_tmp = numPacketsPerLID;
1247  numPacketsPerLID_tmp.template sync<Kokkos::HostSpace> ();
1248  numPacketsPerLID_tmp.template modify<Kokkos::HostSpace> ();
1249  }
1250  // Fill numPacketsPerLID with zeros.
1251  ::Kokkos::deep_copy (numPacketsPerLID.h_view, static_cast<size_t> (0));
1252  return;
1253  }
1254 
1255  // TODO (mfh 28 Jan 2017) pack source object's data, NOT *this's data!
1256 
1257  const size_t numExports = exportLIDs.dimension_0 ();
1258  if (numExports == 0) {
1259  exports = out_buf_dv_type (exports.h_view.label (), 0);
1260  return; // nothing to send
1261  }
1262  RCP<const Comm<int> > comm = src->getMap ().is_null () ?
1263  Teuchos::null : src->getMap ()->getComm ();
1264  if (comm.is_null () || comm->getSize () == 1) {
1265  if (numExports != static_cast<size_t> (0)) {
1266  std::ostream& err = this->markLocalErrorAndGetStream ();
1267  err << prefix << "The input communicator is either null or "
1268  "has only one process, but numExports = " << numExports << " != 0."
1269  << suffix << endl;
1270  return;
1271  }
1272  // Don't go into the rest of this method unless there are
1273  // actually processes other than the calling process. This is
1274  // because the pack and unpack functions only have nonstub
1275  // implementations if building with MPI.
1276  return;
1277  }
1278 
1279  // Trick to get around const DualView& being const.
1280  {
1281  using ::Kokkos::DualView;
1282  DualView<size_t*, device_type> numPacketsPerLID_tmp = numPacketsPerLID;
1283  numPacketsPerLID_tmp.template sync<Kokkos::HostSpace> ();
1284  numPacketsPerLID_tmp.template modify<Kokkos::HostSpace> ();
1285  }
1286 
1287  int totalNumPackets = 0;
1288  size_t numInvalidRowInds = 0;
1289  std::ostringstream errStrm; // current loop iteration's error messages
1290  for (size_t k = 0; k < numExports; ++k) {
1291  const LO lclRow = exportLIDs.h_view[k];
1292  // We're packing the source object's data, so we need to use the
1293  // source object's Map to convert from local to global indices.
1294  const GO gblRow = src->map_->getGlobalElement (lclRow);
1295  if (gblRow == ::Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
1296  // Mark the error later; just count for now.
1297  ++numInvalidRowInds;
1298  numPacketsPerLID.h_view[k] = 0;
1299  continue;
1300  }
1301 
1302  // Count the number of bytes needed to pack the current row of
1303  // the source object.
1304  int numPackets = 0;
1305  const int errCode =
1306  src->impl_.countPackRow (numPackets, gblRow, *comm, &errStrm);
1307  if (errCode != 0) {
1308  std::ostream& err = this->markLocalErrorAndGetStream ();
1309  err << prefix << errStrm.str () << endl;
1310  numPacketsPerLID.h_view[k] = 0;
1311  continue;
1312  }
1313 
1314  // Make sure that the total number of packets fits in int.
1315  // MPI requires this.
1316  const long long newTotalNumPackets =
1317  static_cast<long long> (totalNumPackets) +
1318  static_cast<long long> (numPackets);
1319  if (newTotalNumPackets >
1320  static_cast<long long> (std::numeric_limits<int>::max ())) {
1321  std::ostream& err = this->markLocalErrorAndGetStream ();
1322  err << prefix << "The new total number of packets "
1323  << newTotalNumPackets << " does not fit in int." << endl;
1324  // At this point, we definitely cannot continue. In order to
1325  // leave the output arguments in a rational state, we zero out
1326  // all remaining entries of numPacketsPerLID before returning.
1327  for (size_t k2 = k; k2 < numExports; ++k2) {
1328  numPacketsPerLID.h_view[k2] = 0;
1329  }
1330  return;
1331  }
1332  numPacketsPerLID.h_view[k] = static_cast<size_t> (numPackets);
1333  totalNumPackets = static_cast<int> (newTotalNumPackets);
1334  }
1335 
1336  // If we found invalid row indices in exportLIDs, go back,
1337  // collect, and report them.
1338  if (numInvalidRowInds != 0) {
1339  std::vector<std::pair<LO, GO> > invalidRowInds;
1340  for (size_t k = 0; k < numExports; ++k) {
1341  const LO lclRow = exportLIDs.h_view[k];
1342  // We're packing the source object's data, so we need to use
1343  // the source object's Map to convert from local to global
1344  // indices.
1345  const GO gblRow = src->map_->getGlobalElement (lclRow);
1346  if (gblRow == ::Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
1347  invalidRowInds.push_back ({lclRow, gblRow});
1348  }
1349  }
1350  std::ostringstream os;
1351  os << prefix << "We found " << numInvalidRowInds << " invalid row ind"
1352  << (numInvalidRowInds != static_cast<size_t> (1) ? "ices" : "ex")
1353  << " out of " << numExports << " in exportLIDs. Here is the list "
1354  << " of invalid row indices: [";
1355  for (size_t k = 0; k < invalidRowInds.size (); ++k) {
1356  os << "(LID: " << invalidRowInds[k].first << ", GID: "
1357  << invalidRowInds[k].second << ")";
1358  if (k + 1 < invalidRowInds.size ()) {
1359  os << ", ";
1360  }
1361  }
1362  os << "].";
1363 
1364  std::ostream& err = this->markLocalErrorAndGetStream ();
1365  err << prefix << os.str () << std::endl;
1366  return;
1367  }
1368 
1369  if (static_cast<int> (exports.dimension_0 ()) != totalNumPackets) {
1370  exports = out_buf_dv_type ("CooMatrix exports", totalNumPackets);
1371  exports.template sync<Kokkos::HostSpace> (); // make sure alloc'd on host
1372  }
1373  exports.template modify<Kokkos::HostSpace> ();
1374 
1375  // FIXME (mfh 17 Jan 2017) packTriples wants three arrays, not a
1376  // single array of structs. For now, we just copy.
1377  std::vector<GO> gblRowInds;
1378  std::vector<GO> gblColInds;
1379  std::vector<SC> vals;
1380 
1381  int outBufCurPos = 0;
1382  packet_type* outBuf = exports.h_view.ptr_on_device ();
1383  for (size_t k = 0; k < numExports; ++k) {
1384  const LO lclRow = exportLIDs.h_view[k];
1385  // We're packing the source object's data, so we need to use the
1386  // source object's Map to convert from local to global indices.
1387  const GO gblRow = src->map_->getGlobalElement (lclRow);
1388  // Pack the current row of the source object.
1389  src->impl_.packRow (outBuf, totalNumPackets, outBufCurPos, *comm,
1390  gblRowInds, gblColInds, vals, gblRow);
1391  }
1392 
1393  // Keep 'exports' modified on host.
1394  //exports.template sync<typename device_type::memory_space> ();
1395  }
1396 
1399  virtual void
1400  unpackAndCombineNew (const ::Kokkos::DualView<const local_ordinal_type*, device_type>& importLIDs,
1401  const ::Kokkos::DualView<const packet_type*, device_type>& imports,
1402  const ::Kokkos::DualView<const size_t*, device_type>& numPacketsPerLID,
1403  const size_t /* constantNumPackets */, // we don't actually use this; we assume this is 0
1404  ::Tpetra::Distributor& /* distor */,
1405  const ::Tpetra::CombineMode /* CM */)
1406  {
1407  using ::Kokkos::DualView;
1408  using ::Teuchos::Comm;
1409  using ::Teuchos::RCP;
1410  using std::endl;
1411  //typedef ::Kokkos::DualView<const packet_type*, device_type> in_buf_dv_type;
1412  typedef typename device_type::memory_space DMS;
1413  typedef ::Kokkos::HostSpace HMS;
1414  const char prefix[] = "Tpetra::Details::CooMatrix::unpackAndCombineNew: ";
1415  const char suffix[] = " This should never happen. "
1416  "Please report this bug to the Tpetra developers.";
1417 
1418  const std::size_t numImports = importLIDs.dimension_0 ();
1419  if (numImports == 0) {
1420  return; // nothing to receive
1421  }
1422  else if (imports.dimension_0 () == 0) {
1423  std::ostream& err = this->markLocalErrorAndGetStream ();
1424  typename DualView<const LO*, device_type>::t_host importLIDs_h;
1425  {
1426  if (importLIDs.template need_sync<HMS> ()) {
1427  // Device version of the DualView is the most recently updated.
1428  // It's forbidden to sync a DualView<const T>, so we must copy.
1429  auto importLIDs_d = importLIDs.template view<DMS> ();
1430  typedef typename decltype (importLIDs_h)::non_const_type HVNC;
1431  HVNC importLIDs_h_nc (importLIDs_d.label (), importLIDs.dimension_0 ());
1432  Kokkos::deep_copy (importLIDs_h_nc, importLIDs_d);
1433  importLIDs_h = importLIDs_h_nc;
1434  }
1435  else { // host version of the DualView is up-to-date
1436  importLIDs_h = importLIDs.h_view; // importLIDs.template view<HMS> ();
1437  }
1438  }
1439  err << prefix << "importLIDs.dimension_0() = " << numImports << " != 0, "
1440  << "but imports.dimension_0() = 0. This doesn't make sense, because "
1441  << "for every incoming LID, CooMatrix packs at least the count of "
1442  << "triples associated with that LID, even if the count is zero. "
1443  << "importLIDs = [";
1444  for (std::size_t k = 0; k < numImports; ++k) {
1445  err << importLIDs.h_view[k];
1446  if (k + 1 < numImports) {
1447  err << ", ";
1448  }
1449  }
1450  err << "]. " << suffix << endl;
1451  return;
1452  }
1453 
1454  RCP<const Comm<int> > comm = this->getMap ().is_null () ?
1455  Teuchos::null : this->getMap ()->getComm ();
1456  if (comm.is_null () || comm->getSize () == 1) {
1457  if (numImports != static_cast<size_t> (0)) {
1458  std::ostream& err = this->markLocalErrorAndGetStream ();
1459  err << prefix << "The input communicator is either null or "
1460  "has only one process, but numImports = " << numImports << " != 0."
1461  << suffix << endl;
1462  return;
1463  }
1464  // Don't go into the rest of this method unless there are
1465  // actually processes other than the calling process. This is
1466  // because the pack and unpack functions only have nonstub
1467  // implementations if building with MPI.
1468  return;
1469  }
1470 
1471  // Make sure that the length of 'imports' fits in int.
1472  // This is ultimately an MPI restriction.
1473  if (static_cast<size_t> (imports.dimension_0 ()) >
1474  static_cast<size_t> (std::numeric_limits<int>::max ())) {
1475  std::ostream& err = this->markLocalErrorAndGetStream ();
1476  err << prefix << "imports.dimension_0() = "
1477  << imports.dimension_0 () << " does not fit in int." << endl;
1478  return;
1479  }
1480  const int inBufSize = static_cast<int> (imports.dimension_0 ());
1481 
1482  // It's forbidden to sync a DualView<const T>, so if the input
1483  // DualView are not in sync on host, we must make copies.
1484 
1485  typename std::decay<decltype (importLIDs)>::type::t_host importLIDs_h;
1486  typename std::decay<decltype (imports)>::type::t_host imports_h;
1487  typename std::decay<decltype (numPacketsPerLID)>::type::t_host numPacketsPerLID_h;
1488  {
1489  if (importLIDs.template need_sync<HMS> ()) {
1490  // Device version of the DualView is the most recently updated.
1491  auto importLIDs_d = importLIDs.template view<DMS> ();
1492  typedef typename decltype (importLIDs_h)::non_const_type HVNC;
1493  HVNC importLIDs_h_nc (importLIDs_d.label (),
1494  importLIDs.dimension_0 ());
1495  Kokkos::deep_copy (importLIDs_h_nc, importLIDs_d);
1496  importLIDs_h = importLIDs_h_nc;
1497  }
1498  else { // host version of the DualView is up-to-date
1499  importLIDs_h = importLIDs.h_view; // importLIDs.template view<HMS> ();
1500  }
1501 
1502  if (imports.template need_sync<HMS> ()) {
1503  // Device version of the DualView is the most recently updated.
1504  auto imports_d = imports.template view<DMS> ();
1505  typedef typename decltype (imports_h)::non_const_type HVNC;
1506  HVNC imports_h_nc (imports_d.label (),
1507  imports.dimension_0 ());
1508  Kokkos::deep_copy (imports_h_nc, imports_d);
1509  imports_h = imports_h_nc;
1510  }
1511  else { // host version of the DualView is up-to-date
1512  imports_h = imports.h_view; // imports.template view<HMS> ();
1513  }
1514 
1515  if (numPacketsPerLID.template need_sync<HMS> ()) {
1516  // Device version of the DualView is the most recently updated.
1517  auto numPacketsPerLID_d = numPacketsPerLID.template view<DMS> ();
1518  typedef typename decltype (numPacketsPerLID_h)::non_const_type HVNC;
1519  HVNC numPacketsPerLID_h_nc (numPacketsPerLID_d.label (),
1520  numPacketsPerLID.dimension_0 ());
1521  Kokkos::deep_copy (numPacketsPerLID_h_nc, numPacketsPerLID_d);
1522  numPacketsPerLID_h = numPacketsPerLID_h_nc;
1523  }
1524  else { // host version of the DualView is up-to-date
1525  numPacketsPerLID_h = numPacketsPerLID.h_view; // numPacketsPerLID.template view<HMS> ();
1526  }
1527  }
1528 
1529  // FIXME (mfh 17,24 Jan 2017) packTriples wants three arrays, not a
1530  // single array of structs. For now, we just copy.
1531  std::vector<GO> gblRowInds;
1532  std::vector<GO> gblColInds;
1533  std::vector<SC> vals;
1534 
1535  const packet_type* inBuf = imports_h.ptr_on_device ();
1536  int inBufCurPos = 0;
1537  size_t numInvalidRowInds = 0;
1538  int errCode = 0;
1539  std::ostringstream errStrm; // for unpack* error output.
1540  for (size_t k = 0; k < numImports; ++k) {
1541  const LO lclRow = importLIDs_h(k);
1542  const GO gblRow = this->map_->getGlobalElement (lclRow);
1543  if (gblRow == ::Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
1544  ++numInvalidRowInds;
1545  continue;
1546  }
1547 
1548  // Remember where we were, so we don't overrun the buffer
1549  // length. inBufCurPos is an in/out argument of unpackTriples*.
1550  const int origInBufCurPos = inBufCurPos;
1551 
1552  int numEnt = 0; // output argument of unpackTriplesCount
1553  errCode = unpackTriplesCount (inBuf, inBufSize, inBufCurPos,
1554  numEnt, *comm, &errStrm);
1555  if (errCode != 0 || numEnt < 0 || inBufCurPos < origInBufCurPos) {
1556  std::ostream& err = this->markLocalErrorAndGetStream ();
1557 
1558  err << prefix << "In unpack loop, k=" << k << ": ";
1559  if (errCode != 0) {
1560  err << " unpackTriplesCount returned errCode = " << errCode
1561  << " != 0." << endl;
1562  }
1563  if (numEnt < 0) {
1564  err << " unpackTriplesCount returned errCode = 0, but numEnt = "
1565  << numEnt << " < 0." << endl;
1566  }
1567  if (inBufCurPos < origInBufCurPos) {
1568  err << " After unpackTriplesCount, inBufCurPos = " << inBufCurPos
1569  << " < origInBufCurPos = " << origInBufCurPos << "." << endl;
1570  }
1571  err << " unpackTriplesCount report: " << errStrm.str () << endl;
1572  err << suffix << endl;
1573 
1574  // We only continue in a debug build, because the above error
1575  // messages could consume too much memory and cause an
1576  // out-of-memory error, without actually printing. Printing
1577  // everything is useful in a debug build, but not so much in a
1578  // release build.
1579 #ifdef HAVE_TPETRA_DEBUG
1580  // Clear out the current error stream, so we don't accumulate
1581  // over loop iterations.
1582  errStrm.str ("");
1583  continue;
1584 #else
1585  return;
1586 #endif // HAVE_TPETRA_DEBUG
1587  }
1588 
1589  // FIXME (mfh 17,24 Jan 2017) packTriples wants three arrays,
1590  // not a single array of structs. For now, we just copy.
1591  gblRowInds.resize (numEnt);
1592  gblColInds.resize (numEnt);
1593  vals.resize (numEnt);
1594 
1595  errCode = unpackTriples (inBuf, inBufSize, inBufCurPos,
1596  gblRowInds.data (), gblColInds.data (),
1597  vals.data (), numEnt, *comm, &errStrm);
1598  if (errCode != 0) {
1599  std::ostream& err = this->markLocalErrorAndGetStream ();
1600  err << prefix << "unpackTriples returned errCode = "
1601  << errCode << " != 0. It reports: " << errStrm.str ()
1602  << endl;
1603  // We only continue in a debug build, because the above error
1604  // messages could consume too much memory and cause an
1605  // out-of-memory error, without actually printing. Printing
1606  // everything is useful in a debug build, but not so much in a
1607  // release build.
1608 #ifdef HAVE_TPETRA_DEBUG
1609  // Clear out the current error stream, so we don't accumulate
1610  // over loop iterations.
1611  errStrm.str ("");
1612  continue;
1613 #else
1614  return;
1615 #endif // HAVE_TPETRA_DEBUG
1616  }
1617  this->sumIntoGlobalValues (gblRowInds.data (), gblColInds.data (),
1618  vals.data (), numEnt);
1619  }
1620 
1621  // If we found invalid row indices in exportLIDs, go back,
1622  // collect, and report them.
1623  if (numInvalidRowInds != 0) {
1624  // Mark the error now, before we possibly run out of memory.
1625  // The latter could raise an exception (e.g., std::bad_alloc),
1626  // but at least we would get the error state right.
1627  std::ostream& err = this->markLocalErrorAndGetStream ();
1628 
1629  std::vector<std::pair<LO, GO> > invalidRowInds;
1630  for (size_t k = 0; k < numImports; ++k) {
1631  const LO lclRow = importLIDs_h(k);
1632  const GO gblRow = this->map_->getGlobalElement (lclRow);
1633  if (gblRow == ::Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
1634  invalidRowInds.push_back ({lclRow, gblRow});
1635  }
1636  }
1637 
1638  err << prefix << "We found " << numInvalidRowInds << " invalid row ind"
1639  << (numInvalidRowInds != static_cast<size_t> (1) ? "ices" : "ex")
1640  << " out of " << numImports << " in importLIDs. Here is the list "
1641  << " of invalid row indices: [";
1642  for (size_t k = 0; k < invalidRowInds.size (); ++k) {
1643  err << "(LID: " << invalidRowInds[k].first << ", GID: "
1644  << invalidRowInds[k].second << ")";
1645  if (k + 1 < invalidRowInds.size ()) {
1646  err << ", ";
1647  }
1648  }
1649  err << "].";
1650  return;
1651  }
1652  }
1653 };
1654 
1655 } // namespace Details
1656 } // namespace Tpetra
1657 
1658 #endif // TPETRA_DETAILS_COOMATRIX_HPP
char packet_type
Type for packing and unpacking data.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
int packTriplesCount(const int, char [], const int, int &, const ::Teuchos::Comm< int > &, std::ostream *errStrm)
Pack the count (number) of matrix triples.
int packTriples(const OrdinalType [], const OrdinalType [], const ScalarType [], const int, char [], const int, int &, const ::Teuchos::Comm< int > &, std::ostream *errStrm=NULL)
Pack matrix entries ("triples" (i, j, A(i,j))) into the given output buffer.
void sumIntoGlobalValues(std::initializer_list< GO > gblRowInds, std::initializer_list< GO > gblColInds, std::initializer_list< SC > vals, const std::size_t numEnt)
Initializer-list overload of the above method (which see).
Node node_type
The Kokkos Node type.
virtual std::string description() const
One-line descriptiion of this object; overrides Teuchos::Describable method.
virtual size_t constantNumberOfPackets() const
By returning 0, tell DistObject that this class may not necessarily have a constant number of "packet...
int unpackTriplesCount(const char [], const int, int &, int &, const ::Teuchos::Comm< int > &, std::ostream *errStrm)
Unpack just the count of triples from the given input buffer.
void deep_copy(MultiVector< DS, DL, DG, DN, dstClassic > &dst, const MultiVector< SS, SL, SG, SN, srcClassic > &src)
Copy the contents of the MultiVector src into dst.
void buildLocallyIndexedCrs(std::vector< OffsetType > &rowOffsets, LO lclColInds[], SC vals[], std::function< LO(const GO)> gblToLcl) const
Build a locally indexed version of CRS storage.
Function comparing two CooGraphEntry structs, lexicographically, first by row index, then by column index.
void sumIntoGlobalValue(const GO gblRowInd, const GO gblColInd, const SC &val)
Insert one entry locally into the sparse matrix, if it does not exist there yet. If it does exist...
Sparse matrix used only for file input / output.
void fillComplete()
Special version of fillComplete that assumes that the matrix already has a Map, and reuses its commun...
virtual ~CooMatrix()
Destructor (virtual for memory safety of derived classes).
Implementation details of Tpetra.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
GlobalOrdinal global_ordinal_type
The type of global indices.
LocalOrdinal local_ordinal_type
The type of local indices.
void fillComplete(const ::Teuchos::RCP< const ::Teuchos::Comm< int > > &comm)
Tell the matrix that you are done inserting entries locally, and that the matrix should build its Map...
void sumIntoGlobalValue(const GO gblRowInd, const GO gblColInd, const SC &val)
Insert one entry locally into the sparse matrix, if it does not exist there yet. If it does exist...
int unpackTriples(const char [], const int, int &, OrdinalType [], OrdinalType [], ScalarType [], const int, const ::Teuchos::Comm< int > &, std::ostream *errStrm=NULL)
Unpack matrix entries ("triples" (i, j, A(i,j))) from the given input buffer.
std::size_t getLclNumEntries() const
Number of entries in the sparse matrix on the calling process.
Type of each (row index, column index) pair in the Tpetra::Details::CooMatrix (see below)...
SC scalar_type
Type of each entry (value) in the sparse matrix.
void sumIntoGlobalValues(const GO gblRowInds[], const GO gblColInds[], const SC vals[], const std::size_t numEnt)
Insert multiple entries locally into the sparse matrix.
CooMatrix(const ::Teuchos::RCP< const map_type > &map)
Constructor that takes a Map.
void forAllEntries(std::function< void(const GO, const GO, const SC &)> f) const
Execute the given function for all entries of the sparse matrix, sequentially (no thread parallelism)...
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print a descriptiion of this object to the given output stream; overrides Teuchos::Describable method...
void packRow(packet_type outBuf[], const int outBufSize, int &outBufCurPos, const ::Teuchos::Comm< int > &comm, std::vector< GO > &gblRowInds, std::vector< GO > &gblColInds, std::vector< SC > &vals, const GO gblRow) const
Pack the given row of the matrix.
virtual bool checkSizes(const ::Tpetra::SrcDistObject &source)
Compare the source and target (this) objects for compatibility.
Sets up and executes a communication plan for a Tpetra DistObject.
void sumIntoGlobalValues(const GO gblRowInds[], const GO gblColInds[], const SC vals[], const std::size_t numEnt)
Insert multiple entries locally into the sparse matrix.
CombineMode
Rule for combining data in an Import or Export.
std::size_t getLclNumEntries() const
Number of entries in the sparse matrix on the calling process.
int countPackTriplesCount(const ::Teuchos::Comm< int > &, int &size, std::ostream *errStrm)
Compute the buffer size required by packTriples for packing the number of matrix entries ("triples")...
Teuchos::RCP< const map_type > map_
The Map over which this object is distributed.
virtual bool useNewInterface()
Whether the subclass implements the "old" or "new" (Kokkos-friendly) interface (it implements the "ne...
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
::Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > map_type
Type of the Map specialization to give to the constructor.
std::string errorMessages() const
The current stream of error messages.
bool localError() const
Whether this object had an error on the calling process.
virtual void packAndPrepareNew(const ::Tpetra::SrcDistObject &sourceObject, const ::Kokkos::DualView< const local_ordinal_type *, device_type > &exportLIDs, ::Kokkos::DualView< packet_type *, device_type > &exports, const ::Kokkos::DualView< size_t *, device_type > &numPacketsPerLID, size_t &constantNumPackets, ::Tpetra::Distributor &)
While we do use the "new" Kokkos::DualView - based interface, we don&#39;t currently use device Views...
GO getMyGlobalRowIndices(std::vector< GO > &rowInds) const
Get the global row indices on this process, sorted and made unique, and return the minimum global row...
virtual void copyAndPermuteNew(const ::Tpetra::SrcDistObject &sourceObject, const size_t numSameIDs, const ::Kokkos::DualView< const LO *, device_type > &permuteToLIDs, const ::Kokkos::DualView< const LO *, device_type > &permuteFromLIDs)
While we do use the "new" Kokkos::DualView - based interface, we don&#39;t currently use device Views...
void mergeIntoRow(const GO tgtGblRow, const CooMatrixImpl< SC, GO > &src, const GO srcGblRow)
Into global row tgtGblRow of *this, merge global row srcGblRow of src.
char packet_type
This class transfers data as bytes (MPI_BYTE).
virtual void unpackAndCombineNew(const ::Kokkos::DualView< const local_ordinal_type *, device_type > &importLIDs, const ::Kokkos::DualView< const packet_type *, device_type > &imports, const ::Kokkos::DualView< const size_t *, device_type > &numPacketsPerLID, const size_t, ::Tpetra::Distributor &, const ::Tpetra::CombineMode)
While we do use the "new" Kokkos::DualView - based interface, we don&#39;t currently use device Views...
Base class for distributed Tpetra objects that support data redistribution.
Implementation detail of Tpetra::Details::CooMatrix (which see below).
int countPackRow(int &numPackets, const GO gblRow, const ::Teuchos::Comm< int > &comm, std::ostream *errStrm=NULL) const
Count the number of packets (bytes, in this case) needed to pack the given row of the matrix...