Tpetra parallel linear algebra  Version of the Day
Tpetra_CrsGraph_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
40 #ifndef TPETRA_CRSGRAPH_DEF_HPP
41 #define TPETRA_CRSGRAPH_DEF_HPP
42 
45 
50 #include "Tpetra_Details_getGraphDiagOffsets.hpp"
51 #include "Tpetra_Details_getGraphOffRankOffsets.hpp"
52 #include "Tpetra_Details_makeColMap.hpp"
56 #include "Tpetra_Distributor.hpp"
57 #include "Teuchos_SerialDenseMatrix.hpp"
58 #include "Tpetra_Vector.hpp"
59 #include "Tpetra_Import_Util.hpp"
60 #include "Tpetra_Import_Util2.hpp"
61 #include "Tpetra_Details_packCrsGraph.hpp"
62 #include "Tpetra_Details_unpackCrsGraphAndCombine.hpp"
63 #include "Tpetra_Details_CrsPadding.hpp"
64 #include "Tpetra_Util.hpp"
65 #include <algorithm>
66 #include <limits>
67 #include <map>
68 #include <sstream>
69 #include <string>
70 #include <type_traits>
71 #include <utility>
72 #include <vector>
73 
74 namespace Tpetra {
75  namespace Details {
76  namespace Impl {
77 
78  template<class MapIter>
79  void
80  verbosePrintMap(std::ostream& out,
81  MapIter beg,
82  MapIter end,
83  const size_t numEnt,
84  const char mapName[])
85  {
86  using ::Tpetra::Details::Behavior;
88 
89  out << mapName << ": {";
90  const size_t maxNumToPrint =
92  if (maxNumToPrint == 0) {
93  if (numEnt != 0) {
94  out << "...";
95  }
96  }
97  else {
98  const size_t numToPrint = numEnt > maxNumToPrint ?
99  maxNumToPrint : numEnt;
100  size_t count = 0;
101  for (MapIter it = beg; it != end; ++it) {
102  out << "(" << (*it).first << ", ";
103  verbosePrintArray(out, (*it).second, "gblColInds",
104  maxNumToPrint);
105  out << ")";
106  if (count + size_t(1) < numToPrint) {
107  out << ", ";
108  }
109  ++count;
110  }
111  if (count < numEnt) {
112  out << ", ...";
113  }
114  }
115  out << "}";
116  }
117 
118  template<class LO, class GO, class Node>
119  Teuchos::ArrayView<GO>
120  getRowGraphGlobalRow(
121  std::vector<GO>& gblColIndsStorage,
122  const RowGraph<LO, GO, Node>& graph,
123  const GO gblRowInd)
124  {
125  size_t origNumEnt = graph.getNumEntriesInGlobalRow(gblRowInd);
126  if (gblColIndsStorage.size() < origNumEnt) {
127  gblColIndsStorage.resize(origNumEnt);
128  }
129  typename CrsGraph<LO,GO,Node>::nonconst_global_inds_host_view_type gblColInds(gblColIndsStorage.data(),
130  origNumEnt);
131  graph.getGlobalRowCopy(gblRowInd, gblColInds, origNumEnt);
132  Teuchos::ArrayView<GO> retval(gblColIndsStorage.data(),origNumEnt);
133  return retval;
134  }
135 
136  template<class LO, class GO, class DT, class OffsetType, class NumEntType>
137  class ConvertColumnIndicesFromGlobalToLocal {
138  public:
139  ConvertColumnIndicesFromGlobalToLocal (const ::Kokkos::View<LO*, DT>& lclColInds,
140  const ::Kokkos::View<const GO*, DT>& gblColInds,
141  const ::Kokkos::View<const OffsetType*, DT>& ptr,
142  const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
143  const ::Kokkos::View<const NumEntType*, DT>& numRowEnt) :
144  lclColInds_ (lclColInds),
145  gblColInds_ (gblColInds),
146  ptr_ (ptr),
147  lclColMap_ (lclColMap),
148  numRowEnt_ (numRowEnt)
149  {}
150 
151  KOKKOS_FUNCTION void
152  operator () (const LO& lclRow, OffsetType& curNumBad) const
153  {
154  const OffsetType offset = ptr_(lclRow);
155  // NOTE (mfh 26 Jun 2016) It's always legal to cast the number
156  // of entries in a row to LO, as long as the row doesn't have
157  // too many duplicate entries.
158  const LO numEnt = static_cast<LO> (numRowEnt_(lclRow));
159  for (LO j = 0; j < numEnt; ++j) {
160  const GO gid = gblColInds_(offset + j);
161  const LO lid = lclColMap_.getLocalElement (gid);
162  lclColInds_(offset + j) = lid;
163  if (lid == ::Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
164  ++curNumBad;
165  }
166  }
167  }
168 
169  static OffsetType
170  run (const ::Kokkos::View<LO*, DT>& lclColInds,
171  const ::Kokkos::View<const GO*, DT>& gblColInds,
172  const ::Kokkos::View<const OffsetType*, DT>& ptr,
173  const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
174  const ::Kokkos::View<const NumEntType*, DT>& numRowEnt)
175  {
176  typedef ::Kokkos::RangePolicy<typename DT::execution_space, LO> range_type;
177  typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> functor_type;
178 
179  const LO lclNumRows = ptr.extent (0) == 0 ?
180  static_cast<LO> (0) : static_cast<LO> (ptr.extent (0) - 1);
181  OffsetType numBad = 0;
182  // Count of "bad" column indices is a reduction over rows.
183  ::Kokkos::parallel_reduce (range_type (0, lclNumRows),
184  functor_type (lclColInds, gblColInds, ptr,
185  lclColMap, numRowEnt),
186  numBad);
187  return numBad;
188  }
189 
190  private:
191  ::Kokkos::View<LO*, DT> lclColInds_;
192  ::Kokkos::View<const GO*, DT> gblColInds_;
193  ::Kokkos::View<const OffsetType*, DT> ptr_;
195  ::Kokkos::View<const NumEntType*, DT> numRowEnt_;
196  };
197 
198  } // namespace Impl
199 
214  template<class LO, class GO, class DT, class OffsetType, class NumEntType>
215  OffsetType
216  convertColumnIndicesFromGlobalToLocal (const Kokkos::View<LO*, DT>& lclColInds,
217  const Kokkos::View<const GO*, DT>& gblColInds,
218  const Kokkos::View<const OffsetType*, DT>& ptr,
219  const LocalMap<LO, GO, DT>& lclColMap,
220  const Kokkos::View<const NumEntType*, DT>& numRowEnt)
221  {
222  using Impl::ConvertColumnIndicesFromGlobalToLocal;
223  typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> impl_type;
224  return impl_type::run (lclColInds, gblColInds, ptr, lclColMap, numRowEnt);
225  }
226 
227  template<class ViewType, class LO>
228  class MaxDifference {
229  public:
230  MaxDifference (const ViewType& ptr) : ptr_ (ptr) {}
231 
232  KOKKOS_INLINE_FUNCTION void init (LO& dst) const {
233  dst = 0;
234  }
235 
236  KOKKOS_INLINE_FUNCTION void
237  join (volatile LO& dst, const volatile LO& src) const
238  {
239  dst = (src > dst) ? src : dst;
240  }
241 
242  KOKKOS_INLINE_FUNCTION void
243  operator () (const LO lclRow, LO& maxNumEnt) const
244  {
245  const LO numEnt = static_cast<LO> (ptr_(lclRow+1) - ptr_(lclRow));
246  maxNumEnt = (numEnt > maxNumEnt) ? numEnt : maxNumEnt;
247  }
248  private:
249  typename ViewType::const_type ptr_;
250  };
251 
252  template<class ViewType, class LO>
253  typename ViewType::non_const_value_type
254  maxDifference (const char kernelLabel[],
255  const ViewType& ptr,
256  const LO lclNumRows)
257  {
258  if (lclNumRows == 0) {
259  // mfh 07 May 2018: Weirdly, I need this special case,
260  // otherwise I get the wrong answer.
261  return static_cast<LO> (0);
262  }
263  else {
264  using execution_space = typename ViewType::execution_space;
265  using range_type = Kokkos::RangePolicy<execution_space, LO>;
266  LO theMaxNumEnt {0};
267  Kokkos::parallel_reduce (kernelLabel,
268  range_type (0, lclNumRows),
269  MaxDifference<ViewType, LO> (ptr),
270  theMaxNumEnt);
271  return theMaxNumEnt;
272  }
273  }
274 
275  } // namespace Details
276 
277  template <class LocalOrdinal, class GlobalOrdinal, class Node>
278  bool
279  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
280  getDebug() {
281  return Details::Behavior::debug("CrsGraph");
282  }
283 
284  template <class LocalOrdinal, class GlobalOrdinal, class Node>
285  bool
286  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
287  getVerbose() {
288  return Details::Behavior::verbose("CrsGraph");
289  }
290 
291  template <class LocalOrdinal, class GlobalOrdinal, class Node>
293  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
294  const size_t maxNumEntriesPerRow,
295  const ProfileType /* pftype */,
296  const Teuchos::RCP<Teuchos::ParameterList>& params) :
297  dist_object_type (rowMap)
298  , rowMap_ (rowMap)
299  , numAllocForAllRows_ (maxNumEntriesPerRow)
300  {
301  const char tfecfFuncName[] =
302  "CrsGraph(rowMap,maxNumEntriesPerRow,pftype,params): ";
303  staticAssertions ();
304  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
305  (maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
306  std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
307  "a valid size_t value, which in this case means it must not be "
308  "Teuchos::OrdinalTraits<size_t>::invalid().");
309  resumeFill (params);
311  }
312 
313  template <class LocalOrdinal, class GlobalOrdinal, class Node>
315  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
316  const Teuchos::RCP<const map_type>& colMap,
317  const size_t maxNumEntriesPerRow,
318  const ProfileType /* pftype */,
319  const Teuchos::RCP<Teuchos::ParameterList>& params) :
320  dist_object_type (rowMap)
321  , rowMap_ (rowMap)
322  , colMap_ (colMap)
323  , numAllocForAllRows_ (maxNumEntriesPerRow)
324  {
325  const char tfecfFuncName[] =
326  "CrsGraph(rowMap,colMap,maxNumEntriesPerRow,pftype,params): ";
327  staticAssertions ();
328  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
329  maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
330  std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
331  "a valid size_t value, which in this case means it must not be "
332  "Teuchos::OrdinalTraits<size_t>::invalid().");
333  resumeFill (params);
335  }
336 
337  template <class LocalOrdinal, class GlobalOrdinal, class Node>
339  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
340  const Teuchos::ArrayView<const size_t>& numEntPerRow,
341  const ProfileType /* pftype */,
342  const Teuchos::RCP<Teuchos::ParameterList>& params) :
343  dist_object_type (rowMap)
344  , rowMap_ (rowMap)
345  , numAllocForAllRows_ (0)
346  {
347  const char tfecfFuncName[] =
348  "CrsGraph(rowMap,numEntPerRow,pftype,params): ";
349  staticAssertions ();
350 
351  const size_t lclNumRows = rowMap.is_null () ?
352  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
353  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
354  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
355  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
356  << " != the local number of rows " << lclNumRows << " as specified by "
357  "the input row Map.");
358 
359  if (debug_) {
360  for (size_t r = 0; r < lclNumRows; ++r) {
361  const size_t curRowCount = numEntPerRow[r];
362  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
363  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
364  std::invalid_argument, "numEntPerRow(" << r << ") "
365  "specifies an invalid number of entries "
366  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
367  }
368  }
369 
370  // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
371  // The latter is a const View, so we have to copy into a nonconst
372  // View first, then assign.
373  typedef decltype (k_numAllocPerRow_) out_view_type;
374  typedef typename out_view_type::non_const_type nc_view_type;
375  typedef Kokkos::View<const size_t*,
376  typename nc_view_type::array_layout,
377  Kokkos::HostSpace,
378  Kokkos::MemoryUnmanaged> in_view_type;
379  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
380  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
381  lclNumRows);
382  Kokkos::deep_copy (numAllocPerRowOut, numAllocPerRowIn);
383  k_numAllocPerRow_ = numAllocPerRowOut;
384 
385  resumeFill (params);
386  checkInternalState ();
387  }
388 
389 
390 
391  template <class LocalOrdinal, class GlobalOrdinal, class Node>
393  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
394  const Kokkos::DualView<const size_t*, device_type>& numEntPerRow,
395  const ProfileType /* pftype */,
396  const Teuchos::RCP<Teuchos::ParameterList>& params) :
397  dist_object_type (rowMap)
398  , rowMap_ (rowMap)
399  , k_numAllocPerRow_ (numEntPerRow.h_view)
400  , numAllocForAllRows_ (0)
401  {
402  const char tfecfFuncName[] =
403  "CrsGraph(rowMap,numEntPerRow,pftype,params): ";
404  staticAssertions ();
405 
406  const size_t lclNumRows = rowMap.is_null () ?
407  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
408  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
409  static_cast<size_t> (numEntPerRow.extent (0)) != lclNumRows,
410  std::invalid_argument, "numEntPerRow has length " <<
411  numEntPerRow.extent (0) << " != the local number of rows " <<
412  lclNumRows << " as specified by " "the input row Map.");
413 
414  if (debug_) {
415  for (size_t r = 0; r < lclNumRows; ++r) {
416  const size_t curRowCount = numEntPerRow.h_view(r);
417  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
418  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
419  std::invalid_argument, "numEntPerRow(" << r << ") "
420  "specifies an invalid number of entries "
421  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
422  }
423  }
424 
425  resumeFill (params);
427  }
428 
429 
430  template <class LocalOrdinal, class GlobalOrdinal, class Node>
432  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
433  const Teuchos::RCP<const map_type>& colMap,
434  const Kokkos::DualView<const size_t*, device_type>& numEntPerRow,
435  const ProfileType /* pftype */,
436  const Teuchos::RCP<Teuchos::ParameterList>& params) :
437  dist_object_type (rowMap)
438  , rowMap_ (rowMap)
439  , colMap_ (colMap)
440  , k_numAllocPerRow_ (numEntPerRow.h_view)
441  , numAllocForAllRows_ (0)
442  {
443  const char tfecfFuncName[] =
444  "CrsGraph(rowMap,colMap,numEntPerRow,pftype,params): ";
445  staticAssertions ();
446 
447  const size_t lclNumRows = rowMap.is_null () ?
448  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
449  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
450  static_cast<size_t> (numEntPerRow.extent (0)) != lclNumRows,
451  std::invalid_argument, "numEntPerRow has length " <<
452  numEntPerRow.extent (0) << " != the local number of rows " <<
453  lclNumRows << " as specified by " "the input row Map.");
454 
455  if (debug_) {
456  for (size_t r = 0; r < lclNumRows; ++r) {
457  const size_t curRowCount = numEntPerRow.h_view(r);
458  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
459  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
460  std::invalid_argument, "numEntPerRow(" << r << ") "
461  "specifies an invalid number of entries "
462  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
463  }
464  }
465 
466  resumeFill (params);
468  }
469 
470 
471  template <class LocalOrdinal, class GlobalOrdinal, class Node>
473  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
474  const Teuchos::RCP<const map_type>& colMap,
475  const Teuchos::ArrayView<const size_t>& numEntPerRow,
476  const ProfileType /* pftype */,
477  const Teuchos::RCP<Teuchos::ParameterList>& params) :
478  dist_object_type (rowMap)
479  , rowMap_ (rowMap)
480  , colMap_ (colMap)
481  , numAllocForAllRows_ (0)
482  {
483  const char tfecfFuncName[] =
484  "CrsGraph(rowMap,colMap,numEntPerRow,pftype,params): ";
485  staticAssertions ();
486 
487  const size_t lclNumRows = rowMap.is_null () ?
488  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
489  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
490  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
491  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
492  << " != the local number of rows " << lclNumRows << " as specified by "
493  "the input row Map.");
494 
495  if (debug_) {
496  for (size_t r = 0; r < lclNumRows; ++r) {
497  const size_t curRowCount = numEntPerRow[r];
498  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
499  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
500  std::invalid_argument, "numEntPerRow(" << r << ") "
501  "specifies an invalid number of entries "
502  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
503  }
504  }
505 
506  // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
507  // The latter is a const View, so we have to copy into a nonconst
508  // View first, then assign.
509  typedef decltype (k_numAllocPerRow_) out_view_type;
510  typedef typename out_view_type::non_const_type nc_view_type;
511  typedef Kokkos::View<const size_t*,
512  typename nc_view_type::array_layout,
513  Kokkos::HostSpace,
514  Kokkos::MemoryUnmanaged> in_view_type;
515  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
516  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
517  lclNumRows);
518  Kokkos::deep_copy (numAllocPerRowOut, numAllocPerRowIn);
519  k_numAllocPerRow_ = numAllocPerRowOut;
520 
521  resumeFill (params);
523  }
524 
525 
526  template <class LocalOrdinal, class GlobalOrdinal, class Node>
529  const Teuchos::RCP<const map_type>& rowMap,
530  const Teuchos::RCP<Teuchos::ParameterList>& params) :
531  dist_object_type (rowMap)
532  , rowMap_(rowMap)
533  , colMap_(originalGraph.colMap_)
534  , numAllocForAllRows_(originalGraph.numAllocForAllRows_)
535  , storageStatus_(originalGraph.storageStatus_)
536  , indicesAreAllocated_(originalGraph.indicesAreAllocated_)
537  , indicesAreLocal_(originalGraph.indicesAreLocal_)
538  , indicesAreSorted_(originalGraph.indicesAreSorted_)
539  {
540  staticAssertions();
541 
542  int numRows = rowMap->getNodeNumElements();
543  size_t numNonZeros = originalGraph.rowPtrsPacked_host_(numRows);
544  auto rowsToUse = Kokkos::pair<size_t, size_t>(0, numRows+1);
545 
546  rowPtrsUnpacked_dev_ = Kokkos::subview(originalGraph.rowPtrsUnpacked_dev_, rowsToUse);
547  rowPtrsUnpacked_host_ = Kokkos::subview(originalGraph.rowPtrsUnpacked_host_, rowsToUse);
548 
549  rowPtrsPacked_dev_ = Kokkos::subview(originalGraph.rowPtrsPacked_dev_, rowsToUse);
550  rowPtrsPacked_host_ = Kokkos::subview(originalGraph.rowPtrsPacked_host_, rowsToUse);
551 
552  if (indicesAreLocal_) {
553  lclIndsUnpacked_wdv = local_inds_wdv_type(originalGraph.lclIndsUnpacked_wdv, 0, numNonZeros);
554  lclIndsPacked_wdv = local_inds_wdv_type(originalGraph.lclIndsPacked_wdv, 0, numNonZeros);
555  }
556  else {
557  gblInds_wdv = global_inds_wdv_type(originalGraph.gblInds_wdv, 0, numNonZeros);
558  }
559 
561  }
562 
563  template <class LocalOrdinal, class GlobalOrdinal, class Node>
565  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
566  const Teuchos::RCP<const map_type>& colMap,
567  const typename local_graph_device_type::row_map_type& rowPointers,
568  const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
569  const Teuchos::RCP<Teuchos::ParameterList>& params) :
570  dist_object_type (rowMap)
571  , rowMap_(rowMap)
572  , colMap_(colMap)
573  , numAllocForAllRows_(0)
574  , storageStatus_(Details::STORAGE_1D_PACKED)
575  , indicesAreAllocated_(true)
576  , indicesAreLocal_(true)
577  {
578  staticAssertions ();
579  if (! params.is_null() && params->isParameter("sorted") &&
580  ! params->get<bool>("sorted")) {
581  indicesAreSorted_ = false;
582  }
583  else {
584  indicesAreSorted_ = true;
585  }
586  setAllIndices (rowPointers, columnIndices);
588  }
589 
590  template <class LocalOrdinal, class GlobalOrdinal, class Node>
592  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
593  const Teuchos::RCP<const map_type>& colMap,
594  const Teuchos::ArrayRCP<size_t>& rowPointers,
595  const Teuchos::ArrayRCP<LocalOrdinal> & columnIndices,
596  const Teuchos::RCP<Teuchos::ParameterList>& params) :
597  dist_object_type (rowMap)
598  , rowMap_ (rowMap)
599  , colMap_ (colMap)
600  , numAllocForAllRows_ (0)
601  , storageStatus_ (Details::STORAGE_1D_PACKED)
602  , indicesAreAllocated_ (true)
603  , indicesAreLocal_ (true)
604  {
605  staticAssertions ();
606  if (! params.is_null() && params->isParameter("sorted") &&
607  ! params->get<bool>("sorted")) {
608  indicesAreSorted_ = false;
609  }
610  else {
611  indicesAreSorted_ = true;
612  }
613  setAllIndices (rowPointers, columnIndices);
615  }
616 
617  template <class LocalOrdinal, class GlobalOrdinal, class Node>
619  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
620  const Teuchos::RCP<const map_type>& colMap,
621  const local_graph_device_type& k_local_graph_,
622  const Teuchos::RCP<Teuchos::ParameterList>& params)
623  : CrsGraph (k_local_graph_,
624  rowMap,
625  colMap,
626  Teuchos::null,
627  Teuchos::null,
628  params)
629  {}
630 
631  template <class LocalOrdinal, class GlobalOrdinal, class Node>
633  CrsGraph (const local_graph_device_type& k_local_graph_,
634  const Teuchos::RCP<const map_type>& rowMap,
635  const Teuchos::RCP<const map_type>& colMap,
636  const Teuchos::RCP<const map_type>& domainMap,
637  const Teuchos::RCP<const map_type>& rangeMap,
638  const Teuchos::RCP<Teuchos::ParameterList>& params)
639  : DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap)
640  , rowMap_ (rowMap)
641  , colMap_ (colMap)
642  , numAllocForAllRows_ (0)
643  , storageStatus_ (Details::STORAGE_1D_PACKED)
644  , indicesAreAllocated_ (true)
645  , indicesAreLocal_ (true)
646  {
647  staticAssertions();
648  const char tfecfFuncName[] = "CrsGraph(Kokkos::LocalStaticCrsGraph,Map,Map,Map,Map)";
649 
650  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
651  colMap.is_null (), std::runtime_error,
652  ": The input column Map must be nonnull.");
653  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
654  k_local_graph_.numRows () != rowMap->getNodeNumElements (),
655  std::runtime_error,
656  ": The input row Map and the input local graph need to have the same "
657  "number of rows. The row Map claims " << rowMap->getNodeNumElements ()
658  << " row(s), but the local graph claims " << k_local_graph_.numRows ()
659  << " row(s).");
660 
661  // NOTE (mfh 17 Mar 2014) getNodeNumRows() returns
662  // rowMap_->getNodeNumElements(), but it doesn't have to.
663  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
664  // k_local_graph_.numRows () != getNodeNumRows (), std::runtime_error,
665  // ": The input row Map and the input local graph need to have the same "
666  // "number of rows. The row Map claims " << getNodeNumRows () << " row(s), "
667  // "but the local graph claims " << k_local_graph_.numRows () << " row(s).");
668  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
669  lclIndsUnpacked_wdv.extent (0) != 0 || gblInds_wdv.extent (0) != 0, std::logic_error,
670  ": cannot have 1D data structures allocated.");
671 
672  if(! params.is_null() && params->isParameter("sorted") &&
673  ! params->get<bool>("sorted")) {
674  indicesAreSorted_ = false;
675  }
676  else {
677  indicesAreSorted_ = true;
678  }
679 
680  setDomainRangeMaps (domainMap.is_null() ? rowMap_ : domainMap,
681  rangeMap .is_null() ? rowMap_ : rangeMap);
682  Teuchos::Array<int> remotePIDs (0); // unused output argument
683  this->makeImportExport (remotePIDs, false);
684 
685  lclIndsPacked_wdv = local_inds_wdv_type(k_local_graph_.entries);
687  this->setRowPtrsUnpacked(k_local_graph_.row_map);
688  this->setRowPtrsPacked(k_local_graph_.row_map);
689 
690  set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
691 
692  const bool callComputeGlobalConstants = params.get () == nullptr ||
693  params->get ("compute global constants", true);
694 
695  if (callComputeGlobalConstants) {
696  this->computeGlobalConstants ();
697  }
698  this->fillComplete_ = true;
699  this->checkInternalState ();
700  }
701 
702  template <class LocalOrdinal, class GlobalOrdinal, class Node>
704  CrsGraph (const local_graph_device_type& lclGraph,
705  const Teuchos::RCP<const map_type>& rowMap,
706  const Teuchos::RCP<const map_type>& colMap,
707  const Teuchos::RCP<const map_type>& domainMap,
708  const Teuchos::RCP<const map_type>& rangeMap,
709  const Teuchos::RCP<const import_type>& importer,
710  const Teuchos::RCP<const export_type>& exporter,
711  const Teuchos::RCP<Teuchos::ParameterList>& params) :
712  DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap),
713  rowMap_ (rowMap),
714  colMap_ (colMap),
715  rangeMap_ (rangeMap.is_null () ? rowMap : rangeMap),
716  domainMap_ (domainMap.is_null () ? rowMap : domainMap),
717  importer_ (importer),
718  exporter_ (exporter),
719  numAllocForAllRows_ (0),
720  storageStatus_ (Details::STORAGE_1D_PACKED),
721  indicesAreAllocated_ (true),
722  indicesAreLocal_ (true)
723  {
724  staticAssertions();
725  const char tfecfFuncName[] = "Tpetra::CrsGraph(local_graph_device_type,"
726  "Map,Map,Map,Map,Import,Export,params): ";
727 
728  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
729  (colMap.is_null (), std::runtime_error,
730  "The input column Map must be nonnull.");
731 
732  lclIndsPacked_wdv = local_inds_wdv_type(lclGraph.entries);
733  lclIndsUnpacked_wdv = lclIndsPacked_wdv;
734  setRowPtrsUnpacked(lclGraph.row_map);
735  setRowPtrsPacked(lclGraph.row_map);
736 
737  set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
738 
739  if (! params.is_null() && params->isParameter("sorted") &&
740  ! params->get<bool>("sorted")) {
741  indicesAreSorted_ = false;
742  }
743  else {
744  indicesAreSorted_ = true;
745  }
746 
747  const bool callComputeGlobalConstants =
748  params.get () == nullptr ||
749  params->get ("compute global constants", true);
750  if (callComputeGlobalConstants) {
751  this->computeGlobalConstants ();
752  }
753  fillComplete_ = true;
754  checkInternalState ();
755  }
756 
757  template <class LocalOrdinal, class GlobalOrdinal, class Node>
758  Teuchos::RCP<const Teuchos::ParameterList>
760  getValidParameters () const
761  {
762  using Teuchos::RCP;
763  using Teuchos::ParameterList;
764  using Teuchos::parameterList;
765 
766  RCP<ParameterList> params = parameterList ("Tpetra::CrsGraph");
767 
768  // Make a sublist for the Import.
769  RCP<ParameterList> importSublist = parameterList ("Import");
770 
771  // FIXME (mfh 02 Apr 2012) We should really have the Import and
772  // Export objects fill in these lists. However, we don't want to
773  // create an Import or Export unless we need them. For now, we
774  // know that the Import and Export just pass the list directly to
775  // their Distributor, so we can create a Distributor here
776  // (Distributor's constructor is a lightweight operation) and have
777  // it fill in the list.
778 
779  // Fill in Distributor default parameters by creating a
780  // Distributor and asking it to do the work.
781  Distributor distributor (rowMap_->getComm (), importSublist);
782  params->set ("Import", *importSublist, "How the Import performs communication.");
783 
784  // Make a sublist for the Export. For now, it's a clone of the
785  // Import sublist. It's not a shallow copy, though, since we
786  // might like the Import to do communication differently than the
787  // Export.
788  params->set ("Export", *importSublist, "How the Export performs communication.");
789 
790  return params;
791  }
792 
793  template <class LocalOrdinal, class GlobalOrdinal, class Node>
794  void
796  setParameterList (const Teuchos::RCP<Teuchos::ParameterList>& params)
797  {
798  Teuchos::RCP<const Teuchos::ParameterList> validParams =
799  getValidParameters ();
800  params->validateParametersAndSetDefaults (*validParams);
801  this->setMyParamList (params);
802  }
803 
804  template <class LocalOrdinal, class GlobalOrdinal, class Node>
807  getGlobalNumRows () const
808  {
809  return rowMap_->getGlobalNumElements ();
810  }
811 
812  template <class LocalOrdinal, class GlobalOrdinal, class Node>
815  getGlobalNumCols () const
816  {
817  const char tfecfFuncName[] = "getGlobalNumCols: ";
818  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
819  ! isFillComplete () || getDomainMap ().is_null (), std::runtime_error,
820  "The graph does not have a domain Map. You may not call this method in "
821  "that case.");
822  return getDomainMap ()->getGlobalNumElements ();
823  }
824 
825  template <class LocalOrdinal, class GlobalOrdinal, class Node>
826  size_t
828  getNodeNumRows () const
829  {
830  return this->rowMap_.is_null () ?
831  static_cast<size_t> (0) :
832  this->rowMap_->getNodeNumElements ();
833  }
834 
835  template <class LocalOrdinal, class GlobalOrdinal, class Node>
836  size_t
838  getNodeNumCols () const
839  {
840  const char tfecfFuncName[] = "getNodeNumCols: ";
841  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
842  ! hasColMap (), std::runtime_error,
843  "The graph does not have a column Map. You may not call this method "
844  "unless the graph has a column Map. This requires either that a custom "
845  "column Map was given to the constructor, or that fillComplete() has "
846  "been called.");
847  return colMap_.is_null () ? static_cast<size_t> (0) :
848  colMap_->getNodeNumElements ();
849  }
850 
851 
852 
853  template <class LocalOrdinal, class GlobalOrdinal, class Node>
854  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
856  getRowMap () const
857  {
858  return rowMap_;
859  }
860 
861  template <class LocalOrdinal, class GlobalOrdinal, class Node>
862  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
864  getColMap () const
865  {
866  return colMap_;
867  }
868 
869  template <class LocalOrdinal, class GlobalOrdinal, class Node>
870  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
872  getDomainMap () const
873  {
874  return domainMap_;
875  }
876 
877  template <class LocalOrdinal, class GlobalOrdinal, class Node>
878  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
880  getRangeMap () const
881  {
882  return rangeMap_;
883  }
884 
885  template <class LocalOrdinal, class GlobalOrdinal, class Node>
886  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::import_type>
888  getImporter () const
889  {
890  return importer_;
891  }
892 
893  template <class LocalOrdinal, class GlobalOrdinal, class Node>
894  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::export_type>
896  getExporter () const
897  {
898  return exporter_;
899  }
900 
901  template <class LocalOrdinal, class GlobalOrdinal, class Node>
902  bool
904  hasColMap () const
905  {
906  return ! colMap_.is_null ();
907  }
908 
909  template <class LocalOrdinal, class GlobalOrdinal, class Node>
910  bool
913  {
914  // FIXME (mfh 07 Aug 2014) Why wouldn't storage be optimized if
915  // getNodeNumRows() is zero?
916 
917  const bool isOpt = indicesAreAllocated_ &&
918  k_numRowEntries_.extent (0) == 0 &&
919  getNodeNumRows () > 0;
920 
921  return isOpt;
922  }
923 
924  template <class LocalOrdinal, class GlobalOrdinal, class Node>
927  getProfileType () const
928  {
929  return StaticProfile;
930  }
931 
932  template <class LocalOrdinal, class GlobalOrdinal, class Node>
935  getGlobalNumEntries () const
936  {
937  const char tfecfFuncName[] = "getGlobalNumEntries: ";
938  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
939  (! this->haveGlobalConstants_, std::logic_error,
940  "The graph does not have global constants computed, "
941  "but the user has requested them.");
942 
943  return globalNumEntries_;
944  }
945 
946  template <class LocalOrdinal, class GlobalOrdinal, class Node>
947  size_t
949  getNodeNumEntries () const
950  {
951  typedef LocalOrdinal LO;
952 
953  if (this->indicesAreAllocated_) {
954  const LO lclNumRows = this->getNodeNumRows ();
955  if (lclNumRows == 0) {
956  return static_cast<size_t> (0);
957  }
958  else {
959  // Avoid the "*this capture" issue by creating a local Kokkos::View.
960  auto numEntPerRow = this->k_numRowEntries_;
961  const LO numNumEntPerRow = numEntPerRow.extent (0);
962  if (numNumEntPerRow == 0) {
963  if (static_cast<LO> (this->rowPtrsPacked_dev_.extent (0)) <
964  static_cast<LO> (lclNumRows + 1)) {
965  return static_cast<size_t> (0);
966  }
967  else {
968  return this->rowPtrsPacked_host_(lclNumRows);
969  }
970  }
971  else { // k_numRowEntries_ is populated
972  // k_numRowEntries_ is actually be a host View, so we run
973  // the sum in its native execution space. This also means
974  // that we can use explicit capture (which could perhaps
975  // improve build time) instead of KOKKOS_LAMBDA, and avoid
976  // any CUDA build issues with trying to run a __device__ -
977  // only function on host.
978  typedef typename num_row_entries_type::execution_space
979  host_exec_space;
980  typedef Kokkos::RangePolicy<host_exec_space, LO> range_type;
981 
982  const LO upperLoopBound = lclNumRows < numNumEntPerRow ?
983  lclNumRows :
984  numNumEntPerRow;
985  size_t nodeNumEnt = 0;
986  Kokkos::parallel_reduce ("Tpetra::CrsGraph::getNumNodeEntries",
987  range_type (0, upperLoopBound),
988  [=] (const LO& k, size_t& lclSum) {
989  lclSum += numEntPerRow(k);
990  }, nodeNumEnt);
991  return nodeNumEnt;
992  }
993  }
994  }
995  else { // nothing allocated on this process, so no entries
996  return static_cast<size_t> (0);
997  }
998  }
999 
1000  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1004  {
1005  const char tfecfFuncName[] = "getGlobalMaxNumRowEntries: ";
1006  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1007  (! this->haveGlobalConstants_, std::logic_error,
1008  "The graph does not have global constants computed, "
1009  "but the user has requested them.");
1010 
1011  return globalMaxNumRowEntries_;
1012  }
1014  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1015  size_t
1017  getNodeMaxNumRowEntries () const
1018  {
1019  return nodeMaxNumRowEntries_;
1020  }
1021 
1022  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1023  bool
1025  isFillComplete () const
1026  {
1027  return fillComplete_;
1028  }
1029 
1030  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1031  bool
1033  isFillActive () const
1034  {
1035  return ! fillComplete_;
1036  }
1038 
1039  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1040  bool
1042  isLocallyIndexed () const
1043  {
1044  return indicesAreLocal_;
1045  }
1046 
1047  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1048  bool
1050  isGloballyIndexed () const
1051  {
1052  return indicesAreGlobal_;
1053  }
1054 
1055  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1056  size_t
1058  getNodeAllocationSize () const
1059  {
1060  typedef LocalOrdinal LO;
1061 
1062  if (this->indicesAreAllocated_) {
1063  const LO lclNumRows = this->getNodeNumRows ();
1064  if (lclNumRows == 0) {
1065  return static_cast<size_t> (0);
1066  }
1067  else if (storageStatus_ == Details::STORAGE_1D_PACKED) {
1068  if (static_cast<LO> (this->rowPtrsPacked_dev_.extent (0)) <
1069  static_cast<LO> (lclNumRows + 1)) {
1070  return static_cast<size_t> (0);
1071  }
1072  else {
1073  return this->rowPtrsPacked_host_(lclNumRows);
1074  }
1075  }
1076  else if (storageStatus_ == Details::STORAGE_1D_UNPACKED) {
1077  if (rowPtrsUnpacked_host_.extent (0) == 0) {
1078  return static_cast<size_t> (0);
1079  }
1080  else {
1081  return rowPtrsUnpacked_host_(lclNumRows);
1082  }
1083  }
1084  else {
1085  return static_cast<size_t> (0);
1086  }
1087  }
1088  else {
1089  return Tpetra::Details::OrdinalTraits<size_t>::invalid ();
1090  }
1091  }
1092 
1093  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1094  Teuchos::RCP<const Teuchos::Comm<int> >
1096  getComm () const
1097  {
1098  return this->rowMap_.is_null () ? Teuchos::null : this->rowMap_->getComm ();
1099  }
1100 
1101  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1102  GlobalOrdinal
1104  getIndexBase () const
1105  {
1106  return rowMap_->getIndexBase ();
1107  }
1108 
1109  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1110  bool
1112  indicesAreAllocated () const
1113  {
1114  return indicesAreAllocated_;
1115  }
1116 
1117  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1118  bool
1120  isSorted () const
1121  {
1122  return indicesAreSorted_;
1123  }
1124 
1125  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1126  bool
1128  isMerged () const
1129  {
1130  return noRedundancies_;
1131  }
1132 
1133  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1134  void
1137  {
1138  // FIXME (mfh 07 May 2013) How do we know that the change
1139  // introduced a redundancy, or even that it invalidated the sorted
1140  // order of indices? CrsGraph has always made this conservative
1141  // guess. It could be a bit costly to check at insertion time,
1142  // though.
1143  indicesAreSorted_ = false;
1144  noRedundancies_ = false;
1145 
1146  // We've modified the graph, so we'll have to recompute local
1147  // constants like the number of diagonal entries on this process.
1148  haveLocalConstants_ = false;
1149  }
1150 
1151  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1152  void
1154  allocateIndices (const ELocalGlobal lg, const bool verbose)
1155  {
1157  using Teuchos::arcp;
1158  using Teuchos::Array;
1159  using Teuchos::ArrayRCP;
1160  using std::endl;
1161  typedef Teuchos::ArrayRCP<size_t>::size_type size_type;
1162  typedef typename local_graph_device_type::row_map_type::non_const_type
1163  non_const_row_map_type;
1164  const char tfecfFuncName[] = "allocateIndices: ";
1165  const char suffix[] =
1166  " Please report this bug to the Tpetra developers.";
1167  ProfilingRegion profRegion("Tpetra::CrsGraph::allocateIndices");
1169  std::unique_ptr<std::string> prefix;
1170  if (verbose) {
1171  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
1172  std::ostringstream os;
1173  os << *prefix << "Start: lg="
1174  << (lg == GlobalIndices ? "GlobalIndices" : "LocalIndices")
1175  << ", numRows: " << this->getNodeNumRows() << endl;
1176  std::cerr << os.str();
1177  }
1178 
1179  // This is a protected function, only callable by us. If it was
1180  // called incorrectly, it is our fault. That's why the tests
1181  // below throw std::logic_error instead of std::invalid_argument.
1182  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1183  (isLocallyIndexed () && lg == GlobalIndices, std::logic_error,
1184  ": The graph is locally indexed, but Tpetra code is calling "
1185  "this method with lg=GlobalIndices." << suffix);
1186  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1187  (isGloballyIndexed () && lg == LocalIndices, std::logic_error,
1188  ": The graph is globally indexed, but Tpetra code is calling "
1189  "this method with lg=LocalIndices." << suffix);
1190  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1191  (indicesAreAllocated (), std::logic_error, ": The graph's "
1192  "indices are already allocated, but Tpetra is calling "
1193  "allocateIndices again." << suffix);
1194  const size_t numRows = this->getNodeNumRows ();
1195 
1196  //
1197  // STATIC ALLOCATION PROFILE
1198  //
1199  {
1200  if (verbose) {
1201  std::ostringstream os;
1202  os << *prefix << "Allocate k_rowPtrs: " << (numRows+1) << endl;
1203  std::cerr << os.str();
1204  }
1205  non_const_row_map_type k_rowPtrs ("Tpetra::CrsGraph::ptr", numRows + 1);
1206 
1207  if (this->k_numAllocPerRow_.extent (0) != 0) {
1208  // It's OK to throw std::invalid_argument here, because we
1209  // haven't incurred any side effects yet. Throwing that
1210  // exception (and not, say, std::logic_error) implies that the
1211  // instance can recover.
1212  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1213  (this->k_numAllocPerRow_.extent (0) != numRows,
1214  std::invalid_argument, "k_numAllocPerRow_ is allocated, that is, "
1215  "has nonzero length " << this->k_numAllocPerRow_.extent (0)
1216  << ", but its length != numRows = " << numRows << ".");
1217 
1218  // k_numAllocPerRow_ is a host View, but k_rowPtrs (the thing
1219  // we want to compute here) lives on device. That's OK;
1220  // computeOffsetsFromCounts can handle this case.
1222 
1223  // FIXME (mfh 27 Jun 2016) Currently, computeOffsetsFromCounts
1224  // doesn't attempt to check its input for "invalid" flag
1225  // values. For now, we omit that feature of the sequential
1226  // code disabled below.
1227  computeOffsetsFromCounts (k_rowPtrs, k_numAllocPerRow_);
1228  }
1229  else {
1230  // It's OK to throw std::invalid_argument here, because we
1231  // haven't incurred any side effects yet. Throwing that
1232  // exception (and not, say, std::logic_error) implies that the
1233  // instance can recover.
1234  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1235  (this->numAllocForAllRows_ ==
1236  Tpetra::Details::OrdinalTraits<size_t>::invalid (),
1237  std::invalid_argument, "numAllocForAllRows_ has an invalid value, "
1238  "namely Tpetra::Details::OrdinalTraits<size_t>::invalid() = " <<
1239  Tpetra::Details::OrdinalTraits<size_t>::invalid () << ".");
1240 
1242  computeOffsetsFromConstantCount (k_rowPtrs, this->numAllocForAllRows_);
1243  }
1244 
1245  // "Commit" the resulting row offsets.
1246  setRowPtrsUnpacked(k_rowPtrs);
1247  }
1248 
1249  const size_type numInds = rowPtrsUnpacked_host_(numRows);
1250  if (lg == LocalIndices) {
1251  if (verbose) {
1252  std::ostringstream os;
1253  os << *prefix << "Allocate local column indices "
1254  "lclIndsUnpacked_wdv: " << numInds << endl;
1255  std::cerr << os.str();
1256  }
1257  lclIndsUnpacked_wdv = local_inds_wdv_type (
1258  local_inds_dualv_type("Tpetra::CrsGraph::lclInd",numInds));
1259  }
1260  else {
1261  if (verbose) {
1262  std::ostringstream os;
1263  os << *prefix << "Allocate global column indices "
1264  "gblInds_wdv: " << numInds << endl;
1265  std::cerr << os.str();
1266  }
1267  gblInds_wdv = global_inds_wdv_type (
1268  global_inds_dualv_type("Tpetra::CrsGraph::gblInd",numInds));
1269  }
1270  storageStatus_ = Details::STORAGE_1D_UNPACKED;
1271 
1272  this->indicesAreLocal_ = (lg == LocalIndices);
1273  this->indicesAreGlobal_ = (lg == GlobalIndices);
1274 
1275  if (numRows > 0) { // reallocate k_numRowEntries_ & fill w/ 0s
1276  using Kokkos::ViewAllocateWithoutInitializing;
1277  typedef decltype (k_numRowEntries_) row_ent_type;
1278  const char label[] = "Tpetra::CrsGraph::numRowEntries";
1279  if (verbose) {
1280  std::ostringstream os;
1281  os << *prefix << "Allocate k_numRowEntries_: " << numRows
1282  << endl;
1283  std::cerr << os.str();
1284  }
1285  row_ent_type numRowEnt (ViewAllocateWithoutInitializing (label), numRows);
1286  Kokkos::deep_copy (numRowEnt, static_cast<size_t> (0)); // fill w/ 0s
1287  this->k_numRowEntries_ = numRowEnt; // "commit" our allocation
1288  }
1289 
1290  // Once indices are allocated, CrsGraph needs to free this information.
1291  this->numAllocForAllRows_ = 0;
1292  this->k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
1293  this->indicesAreAllocated_ = true;
1294 
1295  try {
1296  this->checkInternalState ();
1297  }
1298  catch (std::logic_error& e) {
1299  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1300  (true, std::logic_error, "At end of allocateIndices, "
1301  "checkInternalState threw std::logic_error: "
1302  << e.what ());
1303  }
1304  catch (std::exception& e) {
1305  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1306  (true, std::runtime_error, "At end of allocateIndices, "
1307  "checkInternalState threw std::exception: "
1308  << e.what ());
1309  }
1310  catch (...) {
1311  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1312  (true, std::runtime_error, "At end of allocateIndices, "
1313  "checkInternalState threw an exception "
1314  "not a subclass of std::exception.");
1315  }
1316 
1317  if (verbose) {
1318  std::ostringstream os;
1319  os << *prefix << "Done" << endl;
1320  std::cerr << os.str();
1321  }
1322  }
1323 
1324  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1325  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1326  local_inds_dualv_type::t_host::const_type
1328  getLocalIndsViewHost (const RowInfo& rowinfo) const
1329  {
1330  if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1331  return typename local_inds_dualv_type::t_host::const_type ();
1332  else
1333  return lclIndsUnpacked_wdv.getHostSubview(rowinfo.offset1D,
1334  rowinfo.allocSize,
1335  Access::ReadOnly);
1336  }
1337 
1338  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1340  local_inds_dualv_type::t_host
1342  getLocalIndsViewHostNonConst (const RowInfo& rowinfo)
1343  {
1344  if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1345  return typename local_inds_dualv_type::t_host ();
1346  else
1347  return lclIndsUnpacked_wdv.getHostSubview(rowinfo.offset1D,
1348  rowinfo.allocSize,
1349  Access::ReadWrite);
1350  }
1351 
1352  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1354  global_inds_dualv_type::t_host::const_type
1356  getGlobalIndsViewHost (const RowInfo& rowinfo) const
1357  {
1358  if (rowinfo.allocSize == 0 || gblInds_wdv.extent(0) == 0)
1359  return typename global_inds_dualv_type::t_host::const_type ();
1360  else
1361  return gblInds_wdv.getHostSubview(rowinfo.offset1D,
1362  rowinfo.allocSize,
1363  Access::ReadOnly);
1364  }
1365 
1366  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1368  local_inds_dualv_type::t_dev::const_type
1370  getLocalIndsViewDevice (const RowInfo& rowinfo) const
1371  {
1372  if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1373  return typename local_inds_dualv_type::t_dev::const_type ();
1374  else
1375  return lclIndsUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
1376  rowinfo.allocSize,
1377  Access::ReadOnly);
1378  }
1379 
1380  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1382  global_inds_dualv_type::t_dev::const_type
1384  getGlobalIndsViewDevice (const RowInfo& rowinfo) const
1385  {
1386  if (rowinfo.allocSize == 0 || gblInds_wdv.extent(0) == 0)
1387  return typename global_inds_dualv_type::t_dev::const_type ();
1388  else
1389  return gblInds_wdv.getDeviceSubview(rowinfo.offset1D,
1390  rowinfo.allocSize,
1391  Access::ReadOnly);
1392  }
1393 
1394 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
1395  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1396  Teuchos::ArrayView<const LocalOrdinal>
1398  getLocalView (const RowInfo& rowinfo) const
1399  {
1400  using Kokkos::subview;
1401  typedef LocalOrdinal LO;
1402 
1403  if (rowinfo.allocSize == 0) {
1404  return Teuchos::ArrayView<const LO> ();
1405  }
1406  else {
1407  if (lclIndsUnpacked_wdv.extent (0) != 0) { // 1-D storage
1408  const size_t start = rowinfo.offset1D;
1409  const size_t len = rowinfo.allocSize;
1410  const std::pair<size_t, size_t> rng (start, start + len);
1411  // mfh 23 Nov 2015: Don't just create a subview of
1412  // lclIndsUnpacked_wdv directly, because that first creates a
1413  // _managed_ subview, then returns an unmanaged version of
1414  // that. That touches the reference count, which costs
1415  // performance in a measurable way.
1416  // KDDKDD Function is deprecated; we ignore the unmanaged bit above.
1417  // KDDKDD Breaks the reference counting paradigm; reference to
1418  // KDDKDD host view is lost.
1419  auto rowViewHost = lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
1420  auto rowView = subview(rowViewHost, rng);
1421  const LO* const rowViewRaw = (len == 0) ? nullptr : rowView.data ();
1422  return Teuchos::ArrayView<const LO> (rowViewRaw, len, Teuchos::RCP_DISABLE_NODE_LOOKUP);
1423  }
1424  else {
1425  return Teuchos::ArrayView<const LO> (); // nothing in the row to view
1426  }
1427  }
1428  }
1430 
1431  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1432  Teuchos::ArrayView<const GlobalOrdinal>
1434  getGlobalView (const RowInfo& rowinfo) const
1435  {
1436  using GO = global_ordinal_type;
1437 
1438  Teuchos::ArrayView<const GO> view;
1439  if (rowinfo.allocSize > 0 && gblInds_wdv.extent (0) != 0) {
1440  const auto rng =
1441  std::make_pair (rowinfo.offset1D,
1442  rowinfo.offset1D + rowinfo.allocSize);
1443  // mfh 23 Nov 2015: Don't just create a subview of
1444  // gblInds_wdv directly, because that first creates a
1445  // _managed_ subview, then returns an unmanaged version of
1446  // that. That touches the reference count, which costs
1447  // performance in a measurable way.
1448  // KDDKDD This method is deprecated; we ignore the unmanaged bit above
1449  // KDDKDD Breaks the reference counting paradigm; unmanaged
1450  // KDDKDD memory does not do reference counting
1451  auto gblInds = gblInds_wdv.getHostView(Access::ReadOnly);
1452  using Kokkos::Compat::getConstArrayView;
1453  using Kokkos::subview;
1454  view = getConstArrayView (subview (gblInds, rng));
1455  }
1456  return view;
1457  }
1458 #endif // TPETRA_ENABLE_DEPRECATED_CODE
1459 
1460  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1461  RowInfo
1463  getRowInfo (const LocalOrdinal myRow) const
1464  {
1465  const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1466  RowInfo ret;
1467  if (this->rowMap_.is_null () || ! this->rowMap_->isNodeLocalElement (myRow)) {
1468  ret.localRow = STINV;
1469  ret.allocSize = 0;
1470  ret.numEntries = 0;
1471  ret.offset1D = STINV;
1472  return ret;
1473  }
1474 
1475  ret.localRow = static_cast<size_t> (myRow);
1476  if (this->indicesAreAllocated ()) {
1477  // Offsets tell us the allocation size in this case.
1478  if (rowPtrsUnpacked_host_.extent (0) == 0) {
1479  ret.offset1D = 0;
1480  ret.allocSize = 0;
1481  }
1482  else {
1483  ret.offset1D = rowPtrsUnpacked_host_(myRow);
1484  ret.allocSize = rowPtrsUnpacked_host_(myRow+1) - rowPtrsUnpacked_host_(myRow);
1485  }
1486 
1487  ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
1488  ret.allocSize :
1489  this->k_numRowEntries_(myRow);
1490  }
1491  else { // haven't performed allocation yet; probably won't hit this code
1492  // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1493  // allocate, rather than doing lazy allocation at first insert.
1494  // This will make k_numAllocPerRow_ obsolete.
1495  ret.allocSize = (this->k_numAllocPerRow_.extent (0) != 0) ?
1496  this->k_numAllocPerRow_(myRow) : // this is a host View
1497  this->numAllocForAllRows_;
1498  ret.numEntries = 0;
1499  ret.offset1D = STINV;
1500  }
1501 
1502  return ret;
1503  }
1504 
1505 
1506  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1507  RowInfo
1509  getRowInfoFromGlobalRowIndex (const GlobalOrdinal gblRow) const
1510  {
1511  const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1512  RowInfo ret;
1513  if (this->rowMap_.is_null ()) {
1514  ret.localRow = STINV;
1515  ret.allocSize = 0;
1516  ret.numEntries = 0;
1517  ret.offset1D = STINV;
1518  return ret;
1519  }
1520  const LocalOrdinal myRow = this->rowMap_->getLocalElement (gblRow);
1521  if (myRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) {
1522  ret.localRow = STINV;
1523  ret.allocSize = 0;
1524  ret.numEntries = 0;
1525  ret.offset1D = STINV;
1526  return ret;
1527  }
1528 
1529  ret.localRow = static_cast<size_t> (myRow);
1530  if (this->indicesAreAllocated ()) {
1531  // graph data structures have the info that we need
1532  //
1533  // if static graph, offsets tell us the allocation size
1534  if (rowPtrsUnpacked_host_.extent (0) == 0) {
1535  ret.offset1D = 0;
1536  ret.allocSize = 0;
1537  }
1538  else {
1539  ret.offset1D = rowPtrsUnpacked_host_(myRow);
1540  ret.allocSize = rowPtrsUnpacked_host_(myRow+1) - rowPtrsUnpacked_host_(myRow);
1541  }
1542 
1543  ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
1544  ret.allocSize :
1545  this->k_numRowEntries_(myRow);
1546  }
1547  else { // haven't performed allocation yet; probably won't hit this code
1548  // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1549  // allocate, rather than doing lazy allocation at first insert.
1550  // This will make k_numAllocPerRow_ obsolete.
1551  ret.allocSize = (this->k_numAllocPerRow_.extent (0) != 0) ?
1552  this->k_numAllocPerRow_(myRow) : // this is a host View
1553  this->numAllocForAllRows_;
1554  ret.numEntries = 0;
1555  ret.offset1D = STINV;
1556  }
1557 
1558  return ret;
1559  }
1560 
1561 
1562  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1563  void
1564  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1565  staticAssertions () const
1566  {
1567  using Teuchos::OrdinalTraits;
1568  typedef LocalOrdinal LO;
1569  typedef GlobalOrdinal GO;
1570  typedef global_size_t GST;
1571 
1572  // Assumption: sizeof(GlobalOrdinal) >= sizeof(LocalOrdinal):
1573  // This is so that we can store local indices in the memory
1574  // formerly occupied by global indices.
1575  static_assert (sizeof (GlobalOrdinal) >= sizeof (LocalOrdinal),
1576  "Tpetra::CrsGraph: sizeof(GlobalOrdinal) must be >= sizeof(LocalOrdinal).");
1577  // Assumption: max(size_t) >= max(LocalOrdinal)
1578  // This is so that we can represent any LocalOrdinal as a size_t.
1579  static_assert (sizeof (size_t) >= sizeof (LocalOrdinal),
1580  "Tpetra::CrsGraph: sizeof(size_t) must be >= sizeof(LocalOrdinal).");
1581  static_assert (sizeof(GST) >= sizeof(size_t),
1582  "Tpetra::CrsGraph: sizeof(Tpetra::global_size_t) must be >= sizeof(size_t).");
1583 
1584  // FIXME (mfh 30 Sep 2015) We're not using
1585  // Teuchos::CompileTimeAssert any more. Can we do these checks
1586  // with static_assert?
1587 
1588  // can't call max() with CompileTimeAssert, because it isn't a
1589  // constant expression; will need to make this a runtime check
1590  const char msg[] = "Tpetra::CrsGraph: Object cannot be created with the "
1591  "given template arguments: size assumptions are not valid.";
1592  TEUCHOS_TEST_FOR_EXCEPTION(
1593  static_cast<size_t> (Teuchos::OrdinalTraits<LO>::max ()) > Teuchos::OrdinalTraits<size_t>::max (),
1594  std::runtime_error, msg);
1595  TEUCHOS_TEST_FOR_EXCEPTION(
1596  static_cast<GST> (Teuchos::OrdinalTraits<LO>::max ()) > static_cast<GST> (Teuchos::OrdinalTraits<GO>::max ()),
1597  std::runtime_error, msg);
1598  TEUCHOS_TEST_FOR_EXCEPTION(
1599  static_cast<size_t> (Teuchos::OrdinalTraits<GO>::max ()) > Teuchos::OrdinalTraits<GST>::max(),
1600  std::runtime_error, msg);
1601  TEUCHOS_TEST_FOR_EXCEPTION(
1602  Teuchos::OrdinalTraits<size_t>::max () > Teuchos::OrdinalTraits<GST>::max (),
1603  std::runtime_error, msg);
1604  }
1605 
1606 
1607  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1608  size_t
1610  insertIndices (RowInfo& rowinfo,
1611  const SLocalGlobalViews &newInds,
1612  const ELocalGlobal lg,
1613  const ELocalGlobal I)
1614  {
1615  using Teuchos::ArrayView;
1616  typedef LocalOrdinal LO;
1617  typedef GlobalOrdinal GO;
1618  const char tfecfFuncName[] = "insertIndices: ";
1619 
1620  size_t oldNumEnt = 0;
1621  if (debug_) {
1622  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1623  (lg != GlobalIndices && lg != LocalIndices, std::invalid_argument,
1624  "lg must be either GlobalIndices or LocalIndices.");
1625  oldNumEnt = this->getNumEntriesInLocalRow (rowinfo.localRow);
1626  }
1627 
1628  size_t numNewInds = 0;
1629  if (lg == GlobalIndices) { // input indices are global
1630  ArrayView<const GO> new_ginds = newInds.ginds;
1631  numNewInds = new_ginds.size();
1632  if (I == GlobalIndices) { // store global indices
1633  auto gind_view = gblInds_wdv.getHostView(Access::ReadWrite);
1634  if (debug_) {
1635  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1636  (static_cast<size_t> (gind_view.size ()) <
1637  rowinfo.numEntries + numNewInds, std::logic_error,
1638  "gind_view.size() = " << gind_view.size ()
1639  << " < rowinfo.numEntries (= " << rowinfo.numEntries
1640  << ") + numNewInds (= " << numNewInds << ").");
1641  }
1642  GO* const gblColInds_out = gind_view.data () + rowinfo.offset1D
1643  + rowinfo.numEntries;
1644  for (size_t k = 0; k < numNewInds; ++k) {
1645  gblColInds_out[k] = new_ginds[k];
1646  }
1647  }
1648  else if (I == LocalIndices) { // store local indices
1649  auto lind_view = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1650  if (debug_) {
1651  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1652  (static_cast<size_t> (lind_view.size ()) <
1653  rowinfo.numEntries + numNewInds, std::logic_error,
1654  "lind_view.size() = " << lind_view.size ()
1655  << " < rowinfo.numEntries (= " << rowinfo.numEntries
1656  << ") + numNewInds (= " << numNewInds << ").");
1657  }
1658  LO* const lclColInds_out = lind_view.data () + rowinfo.offset1D
1659  + rowinfo.numEntries;
1660  for (size_t k = 0; k < numNewInds; ++k) {
1661  lclColInds_out[k] = colMap_->getLocalElement (new_ginds[k]);
1662  }
1663  }
1664  }
1665  else if (lg == LocalIndices) { // input indices are local
1666  ArrayView<const LO> new_linds = newInds.linds;
1667  numNewInds = new_linds.size();
1668  if (I == LocalIndices) { // store local indices
1669  auto lind_view = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1670  if (debug_) {
1671  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1672  (static_cast<size_t> (lind_view.size ()) <
1673  rowinfo.numEntries + numNewInds, std::logic_error,
1674  "lind_view.size() = " << lind_view.size ()
1675  << " < rowinfo.numEntries (= " << rowinfo.numEntries
1676  << ") + numNewInds (= " << numNewInds << ").");
1677  }
1678  LO* const lclColInds_out = lind_view.data () + rowinfo.offset1D
1679  + rowinfo.numEntries;
1680  for (size_t k = 0; k < numNewInds; ++k) {
1681  lclColInds_out[k] = new_linds[k];
1682  }
1683  }
1684  else if (I == GlobalIndices) {
1685  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1686  (true, std::logic_error, "The case where the input indices are local "
1687  "and the indices to write are global (lg=LocalIndices, I="
1688  "GlobalIndices) is not implemented, because it does not make sense."
1689  << std::endl << "If you have correct local column indices, that "
1690  "means the graph has a column Map. In that case, you should be "
1691  "storing local indices.");
1692  }
1693  }
1694 
1695  rowinfo.numEntries += numNewInds;
1696  this->k_numRowEntries_(rowinfo.localRow) += numNewInds;
1697  this->setLocallyModified ();
1698 
1699  if (debug_) {
1700  const size_t chkNewNumEnt =
1701  this->getNumEntriesInLocalRow (rowinfo.localRow);
1702  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1703  (chkNewNumEnt != oldNumEnt + numNewInds, std::logic_error,
1704  "chkNewNumEnt = " << chkNewNumEnt
1705  << " != oldNumEnt (= " << oldNumEnt
1706  << ") + numNewInds (= " << numNewInds << ").");
1707  }
1708 
1709  return numNewInds;
1710  }
1711 
1712  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1713  size_t
1715  insertGlobalIndicesImpl (const LocalOrdinal lclRow,
1716  const GlobalOrdinal inputGblColInds[],
1717  const size_t numInputInds)
1718  {
1719  return this->insertGlobalIndicesImpl (this->getRowInfo (lclRow),
1720  inputGblColInds, numInputInds);
1721  }
1722 
1723  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1724  size_t
1726  insertGlobalIndicesImpl (const RowInfo& rowInfo,
1727  const GlobalOrdinal inputGblColInds[],
1728  const size_t numInputInds,
1729  std::function<void(const size_t, const size_t, const size_t)> fun)
1730  {
1732  using Kokkos::View;
1733  using Kokkos::subview;
1734  using Kokkos::MemoryUnmanaged;
1735  using Teuchos::ArrayView;
1736  using LO = LocalOrdinal;
1737  using GO = GlobalOrdinal;
1738  const char tfecfFuncName[] = "insertGlobalIndicesImpl: ";
1739  const LO lclRow = static_cast<LO> (rowInfo.localRow);
1740 
1741  auto numEntries = rowInfo.numEntries;
1742  using inp_view_type = View<const GO*, Kokkos::HostSpace, MemoryUnmanaged>;
1743  inp_view_type inputInds(inputGblColInds, numInputInds);
1744  size_t numInserted;
1745  {
1746  auto gblIndsHostView = this->gblInds_wdv.getHostView(Access::ReadWrite);
1747  numInserted = Details::insertCrsIndices(lclRow, this->rowPtrsUnpacked_host_,
1748  gblIndsHostView,
1749  numEntries, inputInds, fun);
1750  }
1751 
1752  const bool insertFailed =
1753  numInserted == Teuchos::OrdinalTraits<size_t>::invalid();
1754  if(insertFailed) {
1755  constexpr size_t ONE (1);
1756  const int myRank = this->getComm()->getRank();
1757  std::ostringstream os;
1758 
1759  os << "Proc " << myRank << ": Not enough capacity to insert "
1760  << numInputInds
1761  << " ind" << (numInputInds != ONE ? "ices" : "ex")
1762  << " into local row " << lclRow << ", which currently has "
1763  << rowInfo.numEntries
1764  << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y")
1765  << " and total allocation size " << rowInfo.allocSize
1766  << ". ";
1767  const size_t maxNumToPrint =
1769  ArrayView<const GO> inputGblColIndsView(inputGblColInds,
1770  numInputInds);
1771  verbosePrintArray(os, inputGblColIndsView, "Input global "
1772  "column indices", maxNumToPrint);
1773  os << ", ";
1774  auto curGblColInds = getGlobalIndsViewHost(rowInfo);
1775  ArrayView<const GO> curGblColIndsView(curGblColInds.data(),
1776  rowInfo.numEntries);
1777  verbosePrintArray(os, curGblColIndsView, "Current global "
1778  "column indices", maxNumToPrint);
1779  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1780  (true, std::runtime_error, os.str());
1781  }
1783  this->k_numRowEntries_(lclRow) += numInserted;
1784 
1785  this->setLocallyModified();
1786  return numInserted;
1787  }
1788 
1789 
1790  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1791  void
1793  insertLocalIndicesImpl (const LocalOrdinal myRow,
1794  const Teuchos::ArrayView<const LocalOrdinal>& indices,
1795  std::function<void(const size_t, const size_t, const size_t)> fun)
1796  {
1797  using Kokkos::MemoryUnmanaged;
1798  using Kokkos::subview;
1799  using Kokkos::View;
1800  using LO = LocalOrdinal;
1801  const char tfecfFuncName[] = "insertLocallIndicesImpl: ";
1802 
1803  const RowInfo rowInfo = this->getRowInfo(myRow);
1804 
1805  size_t numNewInds = 0;
1806  size_t newNumEntries = 0;
1807 
1808  auto numEntries = rowInfo.numEntries;
1809  // Note: Teuchos::ArrayViews are in HostSpace
1810  using inp_view_type = View<const LO*, Kokkos::HostSpace, MemoryUnmanaged>;
1811  inp_view_type inputInds(indices.getRawPtr(), indices.size());
1812  size_t numInserted = 0;
1813  {
1814  auto lclInds = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1815  numInserted = Details::insertCrsIndices(myRow, rowPtrsUnpacked_host_, lclInds,
1816  numEntries, inputInds, fun);
1817  }
1818 
1819  const bool insertFailed =
1820  numInserted == Teuchos::OrdinalTraits<size_t>::invalid();
1821  if(insertFailed) {
1822  constexpr size_t ONE (1);
1823  const size_t numInputInds(indices.size());
1824  const int myRank = this->getComm()->getRank();
1825  std::ostringstream os;
1826  os << "On MPI Process " << myRank << ": Not enough capacity to "
1827  "insert " << numInputInds
1828  << " ind" << (numInputInds != ONE ? "ices" : "ex")
1829  << " into local row " << myRow << ", which currently has "
1830  << rowInfo.numEntries
1831  << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y")
1832  << " and total allocation size " << rowInfo.allocSize << ".";
1833  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1834  (true, std::runtime_error, os.str());
1835  }
1836  numNewInds = numInserted;
1837  newNumEntries = rowInfo.numEntries + numNewInds;
1838 
1839  this->k_numRowEntries_(myRow) += numNewInds;
1840  this->setLocallyModified ();
1841 
1842  if (debug_) {
1843  const size_t chkNewNumEntries = this->getNumEntriesInLocalRow (myRow);
1844  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1845  (chkNewNumEntries != newNumEntries, std::logic_error,
1846  "getNumEntriesInLocalRow(" << myRow << ") = " << chkNewNumEntries
1847  << " != newNumEntries = " << newNumEntries
1848  << ". Please report this bug to the Tpetra developers.");
1849  }
1850  }
1851 
1852  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1853  size_t
1855  findGlobalIndices(const RowInfo& rowInfo,
1856  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
1857  std::function<void(const size_t, const size_t, const size_t)> fun) const
1858  {
1859  using GO = GlobalOrdinal;
1860  using Kokkos::View;
1861  using Kokkos::MemoryUnmanaged;
1862  auto invalidCount = Teuchos::OrdinalTraits<size_t>::invalid();
1863 
1864  using inp_view_type = View<const GO*, Kokkos::HostSpace, MemoryUnmanaged>;
1865  inp_view_type inputInds(indices.getRawPtr(), indices.size());
1866 
1867  size_t numFound = 0;
1868  LocalOrdinal lclRow = rowInfo.localRow;
1869  if (this->isLocallyIndexed())
1870  {
1871  if (this->colMap_.is_null())
1872  return invalidCount;
1873  const auto& colMap = *(this->colMap_);
1874  auto map = [&](GO const gblInd){return colMap.getLocalElement(gblInd);};
1875  numFound = Details::findCrsIndices(lclRow, rowPtrsUnpacked_host_,
1876  rowInfo.numEntries,
1877  lclIndsUnpacked_wdv.getHostView(Access::ReadOnly), inputInds, map, fun);
1878  }
1879  else if (this->isGloballyIndexed())
1880  {
1881  numFound = Details::findCrsIndices(lclRow, rowPtrsUnpacked_host_,
1882  rowInfo.numEntries,
1883  gblInds_wdv.getHostView(Access::ReadOnly), inputInds, fun);
1884  }
1885  return numFound;
1886  }
1887 
1888 
1889  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1890  size_t
1892  sortAndMergeRowIndices (const RowInfo& rowInfo,
1893  const bool sorted,
1894  const bool merged)
1895  {
1896  const size_t origNumEnt = rowInfo.numEntries;
1897  if (origNumEnt != Tpetra::Details::OrdinalTraits<size_t>::invalid () &&
1898  origNumEnt != 0) {
1899  auto lclColInds = this->getLocalIndsViewHostNonConst (rowInfo);
1900 
1901  LocalOrdinal* const lclColIndsRaw = lclColInds.data ();
1902  if (! sorted) {
1903  std::sort (lclColIndsRaw, lclColIndsRaw + origNumEnt);
1904  }
1905 
1906  if (! merged) {
1907  LocalOrdinal* const beg = lclColIndsRaw;
1908  LocalOrdinal* const end = beg + rowInfo.numEntries;
1909  LocalOrdinal* const newend = std::unique (beg, end);
1910  const size_t newNumEnt = newend - beg;
1911 
1912  // NOTE (mfh 08 May 2017) This is a host View, so it does not assume UVM.
1913  this->k_numRowEntries_(rowInfo.localRow) = newNumEnt;
1914  return origNumEnt - newNumEnt; // the number of duplicates in the row
1915  }
1916  else {
1917  return static_cast<size_t> (0); // assume no duplicates
1918  }
1919  }
1920  else {
1921  return static_cast<size_t> (0); // no entries in the row
1922  }
1923  }
1925 
1926  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1927  void
1929  setDomainRangeMaps (const Teuchos::RCP<const map_type>& domainMap,
1930  const Teuchos::RCP<const map_type>& rangeMap)
1931  {
1932  // simple pointer comparison for equality
1933  if (domainMap_ != domainMap) {
1934  domainMap_ = domainMap;
1935  importer_ = Teuchos::null;
1936  }
1937  if (rangeMap_ != rangeMap) {
1938  rangeMap_ = rangeMap;
1939  exporter_ = Teuchos::null;
1940  }
1941  }
1942 
1943 
1944  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1945  void
1946  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1947  clearGlobalConstants ()
1948  {
1949  const auto INV = Teuchos::OrdinalTraits<global_size_t>::invalid();
1950 
1951  globalNumEntries_ = INV;
1952  globalMaxNumRowEntries_ = INV;
1953  haveGlobalConstants_ = false;
1954  }
1955 
1956 
1957  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1958  void
1960  checkInternalState () const
1961  {
1962  if (debug_) {
1963  using std::endl;
1964  const char tfecfFuncName[] = "checkInternalState: ";
1965  const char suffix[] = " Please report this bug to the Tpetra developers.";
1966 
1967  std::unique_ptr<std::string> prefix;
1968  if (verbose_) {
1969  prefix = this->createPrefix("CrsGraph", "checkInternalState");
1970  std::ostringstream os;
1971  os << *prefix << "Start" << endl;
1972  std::cerr << os.str();
1973  }
1974 
1975  const global_size_t GSTI = Teuchos::OrdinalTraits<global_size_t>::invalid ();
1976  //const size_t STI = Teuchos::OrdinalTraits<size_t>::invalid (); // unused
1977  // check the internal state of this data structure
1978  // this is called by numerous state-changing methods, in a debug build, to ensure that the object
1979  // always remains in a valid state
1980 
1981  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1982  (this->rowMap_.is_null (), std::logic_error,
1983  "Row Map is null." << suffix);
1984  // This may access the row Map, so we need to check first (above)
1985  // whether the row Map is null.
1986  const LocalOrdinal lclNumRows =
1987  static_cast<LocalOrdinal> (this->getNodeNumRows ());
1988 
1989  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1990  (this->isFillActive () == this->isFillComplete (), std::logic_error,
1991  "Graph cannot be both fill active and fill complete." << suffix);
1992  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1993  (this->isFillComplete () &&
1994  (this->colMap_.is_null () ||
1995  this->rangeMap_.is_null () ||
1996  this->domainMap_.is_null ()),
1997  std::logic_error,
1998  "Graph is full complete, but at least one of {column, range, domain} "
1999  "Map is null." << suffix);
2000  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2001  (this->isStorageOptimized () && ! this->indicesAreAllocated (),
2002  std::logic_error, "Storage is optimized, but indices are not "
2003  "allocated, not even trivially." << suffix);
2004 
2005  size_t nodeAllocSize = 0;
2006  try {
2007  nodeAllocSize = this->getNodeAllocationSize ();
2008  }
2009  catch (std::logic_error& e) {
2010  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2011  (true, std::runtime_error, "getNodeAllocationSize threw "
2012  "std::logic_error: " << e.what ());
2013  }
2014  catch (std::exception& e) {
2015  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2016  (true, std::runtime_error, "getNodeAllocationSize threw an "
2017  "std::exception: " << e.what ());
2018  }
2019  catch (...) {
2020  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2021  (true, std::runtime_error, "getNodeAllocationSize threw an exception "
2022  "not a subclass of std::exception.");
2023  }
2024 
2025  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2026  (this->isStorageOptimized () &&
2027  nodeAllocSize != this->getNodeNumEntries (),
2028  std::logic_error, "Storage is optimized, but "
2029  "this->getNodeAllocationSize() = " << nodeAllocSize
2030  << " != this->getNodeNumEntries() = " << this->getNodeNumEntries ()
2031  << "." << suffix);
2032  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2033  (! this->haveGlobalConstants_ &&
2034  (this->globalNumEntries_ != GSTI ||
2035  this->globalMaxNumRowEntries_ != GSTI),
2036  std::logic_error, "Graph claims not to have global constants, but "
2037  "some of the global constants are not marked as invalid." << suffix);
2038  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2039  (this->haveGlobalConstants_ &&
2040  (this->globalNumEntries_ == GSTI ||
2041  this->globalMaxNumRowEntries_ == GSTI),
2042  std::logic_error, "Graph claims to have global constants, but "
2043  "some of them are marked as invalid." << suffix);
2044  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2045  (this->haveGlobalConstants_ &&
2046  (this->globalNumEntries_ < this->getNodeNumEntries () ||
2047  this->globalMaxNumRowEntries_ < this->nodeMaxNumRowEntries_),
2048  std::logic_error, "Graph claims to have global constants, and "
2049  "all of the values of the global constants are valid, but "
2050  "some of the local constants are greater than "
2051  "their corresponding global constants." << suffix);
2052  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2053  (this->indicesAreAllocated () &&
2054  (this->numAllocForAllRows_ != 0 ||
2055  this->k_numAllocPerRow_.extent (0) != 0),
2056  std::logic_error, "The graph claims that its indices are allocated, but "
2057  "either numAllocForAllRows_ (= " << this->numAllocForAllRows_ << ") is "
2058  "nonzero, or k_numAllocPerRow_ has nonzero dimension. In other words, "
2059  "the graph is supposed to release its \"allocation specifications\" "
2060  "when it allocates its indices." << suffix);
2061  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2062  (rowPtrsUnpacked_host_.extent(0) != rowPtrsUnpacked_dev_.extent(0),
2063  std::logic_error, "The host and device views of k_rowPtrs_ have "
2064  "different sizes; rowPtrsUnpacked_host_ has size "
2065  << rowPtrsUnpacked_host_.extent(0)
2066  << ", but rowPtrsUnpacked_dev_ has size "
2067  << rowPtrsUnpacked_dev_.extent(0)
2068  << "." << suffix);
2069  if (isGloballyIndexed() && rowPtrsUnpacked_host_.extent(0) != 0) {
2070  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2071  (size_t(rowPtrsUnpacked_host_.extent(0)) != size_t(lclNumRows + 1),
2072  std::logic_error, "The graph is globally indexed and "
2073  "k_rowPtrs has nonzero size " << rowPtrsUnpacked_host_.extent(0)
2074  << ", but that size does not equal lclNumRows+1 = "
2075  << (lclNumRows+1) << "." << suffix);
2076  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2077  (rowPtrsUnpacked_host_(lclNumRows) != size_t(gblInds_wdv.extent(0)),
2078  std::logic_error, "The graph is globally indexed and "
2079  "k_rowPtrs_ has nonzero size " << rowPtrsUnpacked_host_.extent(0)
2080  << ", but k_rowPtrs_(lclNumRows=" << lclNumRows << ")="
2081  << rowPtrsUnpacked_host_(lclNumRows)
2082  << " != gblInds_wdv.extent(0)="
2083  << gblInds_wdv.extent(0) << "." << suffix);
2084  }
2085  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2086  (this->isLocallyIndexed () &&
2087  this->rowPtrsUnpacked_host_.extent (0) != 0 &&
2088  (static_cast<size_t> (rowPtrsUnpacked_host_.extent (0)) !=
2089  static_cast<size_t> (lclNumRows + 1) ||
2090  this->rowPtrsUnpacked_host_(lclNumRows) !=
2091  static_cast<size_t> (this->lclIndsUnpacked_wdv.extent (0))),
2092  std::logic_error, "If k_rowPtrs_ has nonzero size and "
2093  "the graph is locally indexed, then "
2094  "k_rowPtrs_ must have N+1 rows, and "
2095  "k_rowPtrs_(N) must equal lclIndsUnpacked_wdv.extent(0)." << suffix);
2096 
2097  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2098  (this->indicesAreAllocated () &&
2099  nodeAllocSize > 0 &&
2100  this->lclIndsUnpacked_wdv.extent (0) == 0 &&
2101  this->gblInds_wdv.extent (0) == 0,
2102  std::logic_error, "Graph is allocated nontrivially, but "
2103  "but 1-D allocations are not present." << suffix);
2104 
2105  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2106  (! this->indicesAreAllocated () &&
2107  ((this->rowPtrsUnpacked_host_.extent (0) != 0 ||
2108  this->k_numRowEntries_.extent (0) != 0) ||
2109  this->lclIndsUnpacked_wdv.extent (0) != 0 ||
2110  this->gblInds_wdv.extent (0) != 0),
2111  std::logic_error, "If indices are not allocated, "
2112  "then none of the buffers should be." << suffix);
2113  // indices may be local or global only if they are allocated
2114  // (numAllocated is redundant; could simply be indicesAreLocal_ ||
2115  // indicesAreGlobal_)
2116  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2117  ((this->indicesAreLocal_ || this->indicesAreGlobal_) &&
2118  ! this->indicesAreAllocated_,
2119  std::logic_error, "Indices may be local or global only if they are "
2120  "allocated." << suffix);
2121  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2122  (this->indicesAreLocal_ && this->indicesAreGlobal_,
2123  std::logic_error, "Indices may not be both local and global." << suffix);
2124  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2125  (indicesAreLocal_ && gblInds_wdv.extent (0) != 0,
2126  std::logic_error, "Indices are local, but "
2127  "gblInds_wdv.extent(0) (= " << gblInds_wdv.extent (0)
2128  << ") != 0. In other words, if indices are local, then "
2129  "allocations of global indices should not be present."
2130  << suffix);
2131  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2132  (indicesAreGlobal_ && lclIndsUnpacked_wdv.extent (0) != 0,
2133  std::logic_error, "Indices are global, but "
2134  "lclIndsUnpacked_wdv.extent(0) (= " << lclIndsUnpacked_wdv.extent(0)
2135  << ") != 0. In other words, if indices are global, "
2136  "then allocations for local indices should not be present."
2137  << suffix);
2138  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2139  (indicesAreLocal_ && nodeAllocSize > 0 &&
2140  lclIndsUnpacked_wdv.extent (0) == 0 && getNodeNumRows () > 0,
2141  std::logic_error, "Indices are local and "
2142  "getNodeAllocationSize() = " << nodeAllocSize << " > 0, but "
2143  "lclIndsUnpacked_wdv.extent(0) = 0 and getNodeNumRows() = "
2144  << getNodeNumRows () << " > 0." << suffix);
2145  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2146  (indicesAreGlobal_ && nodeAllocSize > 0 &&
2147  gblInds_wdv.extent (0) == 0 && getNodeNumRows () > 0,
2148  std::logic_error, "Indices are global and "
2149  "getNodeAllocationSize() = " << nodeAllocSize << " > 0, but "
2150  "gblInds_wdv.extent(0) = 0 and getNodeNumRows() = "
2151  << getNodeNumRows () << " > 0." << suffix);
2152  // check the actual allocations
2153  if (this->indicesAreAllocated () &&
2154  this->rowPtrsUnpacked_host_.extent (0) != 0) {
2155  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2156  (static_cast<size_t> (this->rowPtrsUnpacked_host_.extent (0)) !=
2157  this->getNodeNumRows () + 1,
2158  std::logic_error, "Indices are allocated and "
2159  "k_rowPtrs_ has nonzero length, but rowPtrsUnpacked_host_.extent(0) = "
2160  << this->rowPtrsUnpacked_host_.extent (0) << " != getNodeNumRows()+1 = "
2161  << (this->getNodeNumRows () + 1) << "." << suffix);
2162  const size_t actualNumAllocated =
2163  this->rowPtrsUnpacked_host_(this->getNodeNumRows());
2164  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2165  (this->isLocallyIndexed () &&
2166  static_cast<size_t> (this->lclIndsUnpacked_wdv.extent (0)) != actualNumAllocated,
2167  std::logic_error, "Graph is locally indexed, indices are "
2168  "are allocated, and k_rowPtrs_ has nonzero length, but "
2169  "lclIndsUnpacked_wdv.extent(0) = " << this->lclIndsUnpacked_wdv.extent (0)
2170  << " != actualNumAllocated = " << actualNumAllocated << suffix);
2171  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2172  (this->isGloballyIndexed () &&
2173  static_cast<size_t> (this->gblInds_wdv.extent (0)) != actualNumAllocated,
2174  std::logic_error, "Graph is globally indexed, indices "
2175  "are allocated, and k_rowPtrs_ has nonzero length, but "
2176  "gblInds_wdv.extent(0) = " << this->gblInds_wdv.extent (0)
2177  << " != actualNumAllocated = " << actualNumAllocated << suffix);
2178  }
2179 
2180  if (verbose_) {
2181  std::ostringstream os;
2182  os << *prefix << "Done" << endl;
2183  std::cerr << os.str();
2184  }
2185  }
2186  }
2187 
2189  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2190  size_t
2192  getNumEntriesInGlobalRow (GlobalOrdinal globalRow) const
2193  {
2194  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (globalRow);
2195  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2196  return Teuchos::OrdinalTraits<size_t>::invalid ();
2197  }
2198  else {
2199  return rowInfo.numEntries;
2200  }
2201  }
2202 
2203 
2204  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2205  size_t
2207  getNumEntriesInLocalRow (LocalOrdinal localRow) const
2208  {
2209  const RowInfo rowInfo = this->getRowInfo (localRow);
2210  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2211  return Teuchos::OrdinalTraits<size_t>::invalid ();
2212  }
2213  else {
2214  return rowInfo.numEntries;
2215  }
2216  }
2217 
2218 
2219  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2220  size_t
2222  getNumAllocatedEntriesInGlobalRow (GlobalOrdinal globalRow) const
2223  {
2224  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (globalRow);
2225  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2226  return Teuchos::OrdinalTraits<size_t>::invalid ();
2227  }
2228  else {
2229  return rowInfo.allocSize;
2230  }
2231  }
2232 
2233 
2234  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2235  size_t
2237  getNumAllocatedEntriesInLocalRow (LocalOrdinal localRow) const
2238  {
2239  const RowInfo rowInfo = this->getRowInfo (localRow);
2240  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2241  return Teuchos::OrdinalTraits<size_t>::invalid ();
2242  }
2243  else {
2244  return rowInfo.allocSize;
2245  }
2246  }
2247 
2248 
2249  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2250  Teuchos::ArrayRCP<const size_t>
2252  getNodeRowPtrs () const
2253  {
2254  using Kokkos::ViewAllocateWithoutInitializing;
2255  using Teuchos::ArrayRCP;
2256  typedef typename local_graph_device_type::row_map_type row_map_type;
2257  typedef typename row_map_type::non_const_value_type row_offset_type;
2258  const char prefix[] = "Tpetra::CrsGraph::getNodeRowPtrs: ";
2259  const char suffix[] = " Please report this bug to the Tpetra developers.";
2260 
2261  const size_t size = rowPtrsUnpacked_host_.extent (0);
2262  constexpr bool same = std::is_same<size_t, row_offset_type>::value;
2263 
2264  if (size == 0) {
2265  return ArrayRCP<const size_t> ();
2266  }
2267 
2268  ArrayRCP<const row_offset_type> ptr_rot;
2269  ArrayRCP<const size_t> ptr_st;
2270  if (same) { // size_t == row_offset_type
2271  ptr_rot = Kokkos::Compat::persistingView (rowPtrsUnpacked_host_);
2272  }
2273  else { // size_t != row_offset_type
2274  typedef Kokkos::View<size_t*, device_type> ret_view_type;
2275  ret_view_type ptr_d (ViewAllocateWithoutInitializing ("ptr"), size);
2276 
2277  ::Tpetra::Details::copyOffsets (ptr_d, rowPtrsUnpacked_dev_);
2278 
2279  typename ret_view_type::HostMirror ptr_h =
2280  Kokkos::create_mirror_view (ptr_d);
2281  Kokkos::deep_copy(ptr_h, ptr_d);
2282  ptr_st = Kokkos::Compat::persistingView (ptr_h);
2283  }
2284  if (debug_) {
2285  TEUCHOS_TEST_FOR_EXCEPTION
2286  (same && size != 0 && ptr_rot.is_null (), std::logic_error,
2287  prefix << "size_t == row_offset_type and size = " << size
2288  << " != 0, but ptr_rot is null." << suffix);
2289  TEUCHOS_TEST_FOR_EXCEPTION
2290  (! same && size != 0 && ptr_st.is_null (), std::logic_error,
2291  prefix << "size_t != row_offset_type and size = " << size
2292  << " != 0, but ptr_st is null." << suffix);
2293  }
2294 
2295  // If size_t == row_offset_type, return a persisting host view of
2296  // k_rowPtrs_. Otherwise, return a size_t host copy of k_rowPtrs_.
2297  ArrayRCP<const size_t> retval =
2298  Kokkos::Impl::if_c<same,
2299  ArrayRCP<const row_offset_type>,
2300  ArrayRCP<const size_t> >::select (ptr_rot, ptr_st);
2301  if (debug_) {
2302  TEUCHOS_TEST_FOR_EXCEPTION
2303  (size != 0 && retval.is_null (), std::logic_error,
2304  prefix << "size = " << size << " != 0, but retval is null." << suffix);
2305  }
2306  return retval;
2307  }
2308 
2309 
2310  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2311  Teuchos::ArrayRCP<const LocalOrdinal>
2313  getNodePackedIndices () const
2314  {
2315  // KDDKDD UVM REMOVAL: 3/21
2316  // KDDKDD This function used to return k_lclInds1D_.
2317  // KDDKDD I retain its behavior by return lclIndsUnpacked_wdv.getHostView.
2318  // KDDKDD However, k_lclInds1D_ was not necessarily PACKED;
2319  // KDDKDD PACKED indices are in the static graph.
2320  // KDDKDD However, with OptimizeStorage, k_lclInds1D_ was PACKED.
2321  // return Kokkos::Compat::persistingView (k_lclInds1D_);
2322  return Kokkos::Compat::persistingView (
2323  lclIndsUnpacked_wdv.getHostView(Access::ReadOnly));
2324  }
2325 
2326 
2327  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2328  void
2330  getLocalRowCopy (LocalOrdinal localRow,
2331  nonconst_local_inds_host_view_type & indices,
2332  size_t& numEntries) const
2333  {
2334  using Teuchos::ArrayView;
2335  const char tfecfFuncName[] = "getLocalRowCopy: ";
2336 
2337  TEUCHOS_TEST_FOR_EXCEPTION(
2338  isGloballyIndexed () && ! hasColMap (), std::runtime_error,
2339  "Tpetra::CrsGraph::getLocalRowCopy: The graph is globally indexed and "
2340  "does not have a column Map yet. That means we don't have local indices "
2341  "for columns yet, so it doesn't make sense to call this method. If the "
2342  "graph doesn't have a column Map yet, you should call fillComplete on "
2343  "it first.");
2344 
2345  // This does the right thing (reports an empty row) if the input
2346  // row is invalid.
2347  const RowInfo rowinfo = this->getRowInfo (localRow);
2348  // No side effects on error.
2349  const size_t theNumEntries = rowinfo.numEntries;
2350  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2351  (static_cast<size_t> (indices.size ()) < theNumEntries,std::runtime_error,
2352  "Specified storage (size==" << indices.size () << ") does not suffice "
2353  "to hold all " << theNumEntries << " entry/ies for this row.");
2354  numEntries = theNumEntries;
2355 
2356  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2357  if (isLocallyIndexed ()) {
2358  auto lclInds = getLocalIndsViewHost(rowinfo);
2359  for (size_t j = 0; j < theNumEntries; ++j) {
2360  indices[j] = lclInds(j);
2361  }
2362  }
2363  else if (isGloballyIndexed ()) {
2364  auto gblInds = getGlobalIndsViewHost(rowinfo);
2365  for (size_t j = 0; j < theNumEntries; ++j) {
2366  indices[j] = colMap_->getLocalElement (gblInds(j));
2367  }
2368  }
2369  }
2370  }
2371 
2372 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
2373  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2374  void
2376  getLocalRowCopy (LocalOrdinal localRow,
2377  const Teuchos::ArrayView<LocalOrdinal>&indices,
2378  size_t& numEntries) const
2379  {
2380  using Teuchos::ArrayView;
2381  const char tfecfFuncName[] = "getLocalRowCopy: ";
2382 
2383  TEUCHOS_TEST_FOR_EXCEPTION(
2384  isGloballyIndexed () && ! hasColMap (), std::runtime_error,
2385  "Tpetra::CrsGraph::getLocalRowCopy: The graph is globally indexed and "
2386  "does not have a column Map yet. That means we don't have local indices "
2387  "for columns yet, so it doesn't make sense to call this method. If the "
2388  "graph doesn't have a column Map yet, you should call fillComplete on "
2389  "it first.");
2390 
2391  // This does the right thing (reports an empty row) if the input
2392  // row is invalid.
2393  const RowInfo rowinfo = this->getRowInfo (localRow);
2394  // No side effects on error.
2395  const size_t theNumEntries = rowinfo.numEntries;
2396  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2397  (static_cast<size_t> (indices.size ()) < theNumEntries,std::runtime_error,
2398  "Specified storage (size==" << indices.size () << ") does not suffice "
2399  "to hold all " << theNumEntries << " entry/ies for this row.");
2400  numEntries = theNumEntries;
2401 
2402  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2403  if (isLocallyIndexed ()) {
2404  auto lclInds = getLocalIndsViewHost(rowinfo);
2405  for (size_t j = 0; j < theNumEntries; ++j) {
2406  indices[j] = lclInds(j);
2407  }
2408  }
2409  else if (isGloballyIndexed ()) {
2410  auto gblInds = getGlobalIndsViewHost(rowinfo);
2411  for (size_t j = 0; j < theNumEntries; ++j) {
2412  indices[j] = colMap_->getLocalElement (gblInds(j));
2413  }
2414  }
2415  }
2416  }
2417 #endif
2418 
2419 
2420  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2421  void
2423  getGlobalRowCopy (GlobalOrdinal globalRow,
2424  nonconst_global_inds_host_view_type &indices,
2425  size_t& numEntries) const
2426  {
2427  using Teuchos::ArrayView;
2428  const char tfecfFuncName[] = "getGlobalRowCopy: ";
2429 
2430  // This does the right thing (reports an empty row) if the input
2431  // row is invalid.
2432  const RowInfo rowinfo = getRowInfoFromGlobalRowIndex (globalRow);
2433  const size_t theNumEntries = rowinfo.numEntries;
2434  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2435  static_cast<size_t> (indices.size ()) < theNumEntries, std::runtime_error,
2436  "Specified storage (size==" << indices.size () << ") does not suffice "
2437  "to hold all " << theNumEntries << " entry/ies for this row.");
2438  numEntries = theNumEntries; // first side effect
2439 
2440  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2441  if (isLocallyIndexed ()) {
2442  auto lclInds = getLocalIndsViewHost(rowinfo);
2443  for (size_t j = 0; j < theNumEntries; ++j) {
2444  indices[j] = colMap_->getGlobalElement (lclInds(j));
2445  }
2446  }
2447  else if (isGloballyIndexed ()) {
2448  auto gblInds = getGlobalIndsViewHost(rowinfo);
2449  for (size_t j = 0; j < theNumEntries; ++j) {
2450  indices[j] = gblInds(j);
2451  }
2452  }
2453  }
2454  }
2455 
2456 
2457 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
2458  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2459  void
2461  getGlobalRowCopy (GlobalOrdinal globalRow,
2462  const Teuchos::ArrayView<GlobalOrdinal>& indices,
2463  size_t& numEntries) const
2464  {
2465  using Teuchos::ArrayView;
2466  const char tfecfFuncName[] = "getGlobalRowCopy: ";
2467 
2468  // This does the right thing (reports an empty row) if the input
2469  // row is invalid.
2470  const RowInfo rowinfo = getRowInfoFromGlobalRowIndex (globalRow);
2471  const size_t theNumEntries = rowinfo.numEntries;
2472  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2473  static_cast<size_t> (indices.size ()) < theNumEntries, std::runtime_error,
2474  "Specified storage (size==" << indices.size () << ") does not suffice "
2475  "to hold all " << theNumEntries << " entry/ies for this row.");
2476  numEntries = theNumEntries; // first side effect
2477 
2478  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2479  if (isLocallyIndexed ()) {
2480  auto lclInds = getLocalIndsViewHost(rowinfo);
2481  for (size_t j = 0; j < theNumEntries; ++j) {
2482  indices[j] = colMap_->getGlobalElement (lclInds(j));
2483  }
2484  }
2485  else if (isGloballyIndexed ()) {
2486  auto gblInds = getGlobalIndsViewHost(rowinfo);
2487  for (size_t j = 0; j < theNumEntries; ++j) {
2488  indices[j] = gblInds(j);
2489  }
2490  }
2491  }
2492  }
2493 #endif
2494 
2495 
2496  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2497  void
2500  const LocalOrdinal localRow,
2501  local_inds_host_view_type &indices) const
2502  {
2503  const char tfecfFuncName[] = "getLocalRowView: ";
2504 
2505  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2506  (isGloballyIndexed (), std::runtime_error, "The graph's indices are "
2507  "currently stored as global indices, so we cannot return a view with "
2508  "local column indices, whether or not the graph has a column Map. If "
2509  "the graph _does_ have a column Map, use getLocalRowCopy() instead.");
2510 
2511  const RowInfo rowInfo = getRowInfo (localRow);
2512  if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2513  rowInfo.numEntries > 0) {
2514  indices = lclIndsUnpacked_wdv.getHostSubview(rowInfo.offset1D,
2515  rowInfo.numEntries,
2516  Access::ReadOnly);
2517  }
2518  else {
2519  // This does the right thing (reports an empty row) if the input
2520  // row is invalid.
2521  indices = local_inds_host_view_type();
2522  }
2523 
2524  if (debug_) {
2525  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2526  (static_cast<size_t> (indices.size ()) !=
2527  getNumEntriesInLocalRow (localRow), std::logic_error, "indices.size() "
2528  "= " << indices.extent(0) << " != getNumEntriesInLocalRow(localRow=" <<
2529  localRow << ") = " << getNumEntriesInLocalRow(localRow) <<
2530  ". Please report this bug to the Tpetra developers.");
2531  }
2532  }
2533 
2534 
2535  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2536  void
2539  const GlobalOrdinal globalRow,
2540  global_inds_host_view_type &indices) const
2541  {
2542  const char tfecfFuncName[] = "getGlobalRowView: ";
2543 
2544  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2545  (isLocallyIndexed (), std::runtime_error, "The graph's indices are "
2546  "currently stored as local indices, so we cannot return a view with "
2547  "global column indices. Use getGlobalRowCopy() instead.");
2548 
2549  // This does the right thing (reports an empty row) if the input
2550  // row is invalid.
2551  const RowInfo rowInfo = getRowInfoFromGlobalRowIndex (globalRow);
2552  if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2553  rowInfo.numEntries > 0) {
2554  indices = gblInds_wdv.getHostSubview(rowInfo.offset1D,
2555  rowInfo.numEntries,
2556  Access::ReadOnly);
2557  }
2558  else {
2559  indices = typename global_inds_dualv_type::t_host::const_type();
2560  }
2561  if (debug_) {
2562  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2563  (static_cast<size_t> (indices.size ()) !=
2564  getNumEntriesInGlobalRow (globalRow),
2565  std::logic_error, "indices.size() = " << indices.extent(0)
2566  << " != getNumEntriesInGlobalRow(globalRow=" << globalRow << ") = "
2567  << getNumEntriesInGlobalRow (globalRow)
2568  << ". Please report this bug to the Tpetra developers.");
2569  }
2570  }
2571 
2572 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
2573  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2574  void
2575 // TPETRA_DEPRECATED
2577  getLocalRowView (const LocalOrdinal localRow,
2578  Teuchos::ArrayView<const LocalOrdinal>& indices) const
2579  {
2580  const char tfecfFuncName[] = "getLocalRowView: ";
2581 
2582  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2583  (isGloballyIndexed (), std::runtime_error, "The graph's indices are "
2584  "currently stored as global indices, so we cannot return a view with "
2585  "local column indices, whether or not the graph has a column Map. If "
2586  "the graph _does_ have a column Map, use getLocalRowCopy() instead.");
2587 
2588  // This does the right thing (reports an empty row) if the input
2589  // row is invalid.
2590  const RowInfo rowInfo = getRowInfo (localRow);
2591  indices = Teuchos::null;
2592  if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2593  rowInfo.numEntries > 0) {
2594  indices = this->getLocalView (rowInfo);
2595  // getLocalView returns a view of the _entire_ row, including
2596  // any extra space at the end (which 1-D unpacked storage
2597  // might have, for example). That's why we have to take a
2598  // subview of the returned view.
2599  indices = indices (0, rowInfo.numEntries);
2600  }
2601 
2602  if (debug_) {
2603  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2604  (static_cast<size_t> (indices.size ()) !=
2605  getNumEntriesInLocalRow (localRow), std::logic_error, "indices.size() "
2606  "= " << indices.size () << " != getNumEntriesInLocalRow(localRow=" <<
2607  localRow << ") = " << getNumEntriesInLocalRow (localRow) <<
2608  ". Please report this bug to the Tpetra developers.");
2609  }
2610  }
2611 
2612 #endif
2613 
2614 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
2615  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2616  void
2617 // TPETRA_DEPRECATED
2619  getGlobalRowView (const GlobalOrdinal globalRow,
2620  Teuchos::ArrayView<const GlobalOrdinal>& indices) const
2621  {
2622  const char tfecfFuncName[] = "getGlobalRowView: ";
2623 
2624  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2625  (isLocallyIndexed (), std::runtime_error, "The graph's indices are "
2626  "currently stored as local indices, so we cannot return a view with "
2627  "global column indices. Use getGlobalRowCopy() instead.");
2628 
2629  // This does the right thing (reports an empty row) if the input
2630  // row is invalid.
2631  const RowInfo rowInfo = getRowInfoFromGlobalRowIndex (globalRow);
2632  indices = Teuchos::null;
2633  if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2634  rowInfo.numEntries > 0) {
2635  indices = (this->getGlobalView (rowInfo)) (0, rowInfo.numEntries);
2636  }
2637 
2638  if (debug_) {
2639  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2640  (static_cast<size_t> (indices.size ()) !=
2641  getNumEntriesInGlobalRow (globalRow),
2642  std::logic_error, "indices.size() = " << indices.size ()
2643  << " != getNumEntriesInGlobalRow(globalRow=" << globalRow << ") = "
2644  << getNumEntriesInGlobalRow (globalRow)
2645  << ". Please report this bug to the Tpetra developers.");
2646  }
2647  }
2648 #endif
2649 
2650 
2651  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2652  void
2654  insertLocalIndices (const LocalOrdinal localRow,
2655  const Teuchos::ArrayView<const LocalOrdinal>& indices)
2656  {
2657  const char tfecfFuncName[] = "insertLocalIndices: ";
2658 
2659  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2660  (! isFillActive (), std::runtime_error, "Fill must be active.");
2661  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2662  (isGloballyIndexed (), std::runtime_error,
2663  "Graph indices are global; use insertGlobalIndices().");
2664  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2665  (! hasColMap (), std::runtime_error,
2666  "Cannot insert local indices without a column Map.");
2667  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2668  (! rowMap_->isNodeLocalElement (localRow), std::runtime_error,
2669  "Local row index " << localRow << " is not in the row Map "
2670  "on the calling process.");
2671  if (! indicesAreAllocated ()) {
2672  allocateIndices (LocalIndices, verbose_);
2673  }
2674 
2675  if (debug_) {
2676  // In debug mode, if the graph has a column Map, test whether any
2677  // of the given column indices are not in the column Map. Keep
2678  // track of the invalid column indices so we can tell the user
2679  // about them.
2680  if (hasColMap ()) {
2681  using Teuchos::Array;
2682  using Teuchos::toString;
2683  using std::endl;
2684  typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type size_type;
2685 
2686  const map_type& colMap = *colMap_;
2687  Array<LocalOrdinal> badColInds;
2688  bool allInColMap = true;
2689  for (size_type k = 0; k < indices.size (); ++k) {
2690  if (! colMap.isNodeLocalElement (indices[k])) {
2691  allInColMap = false;
2692  badColInds.push_back (indices[k]);
2693  }
2694  }
2695  if (! allInColMap) {
2696  std::ostringstream os;
2697  os << "Tpetra::CrsGraph::insertLocalIndices: You attempted to insert "
2698  "entries in owned row " << localRow << ", at the following column "
2699  "indices: " << toString (indices) << "." << endl;
2700  os << "Of those, the following indices are not in the column Map on "
2701  "this process: " << toString (badColInds) << "." << endl << "Since "
2702  "the graph has a column Map already, it is invalid to insert entries "
2703  "at those locations.";
2704  TEUCHOS_TEST_FOR_EXCEPTION(! allInColMap, std::invalid_argument, os.str ());
2705  }
2706  }
2707  }
2708 
2709  insertLocalIndicesImpl (localRow, indices);
2710 
2711  if (debug_) {
2712  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2713  (! indicesAreAllocated () || ! isLocallyIndexed (), std::logic_error,
2714  "At the end of insertLocalIndices, ! indicesAreAllocated() || "
2715  "! isLocallyIndexed() is true. Please report this bug to the "
2716  "Tpetra developers.");
2717  }
2718  }
2719 
2720  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2721  void
2723  insertLocalIndices (const LocalOrdinal localRow,
2724  const LocalOrdinal numEnt,
2725  const LocalOrdinal inds[])
2726  {
2727  Teuchos::ArrayView<const LocalOrdinal> indsT (inds, numEnt);
2728  this->insertLocalIndices (localRow, indsT);
2729  }
2730 
2731 
2732  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2733  void
2735  insertGlobalIndices (const GlobalOrdinal gblRow,
2736  const LocalOrdinal numInputInds,
2737  const GlobalOrdinal inputGblColInds[])
2738  {
2739  typedef LocalOrdinal LO;
2740  const char tfecfFuncName[] = "insertGlobalIndices: ";
2741 
2742  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2743  (this->isLocallyIndexed (), std::runtime_error,
2744  "graph indices are local; use insertLocalIndices().");
2745  // This can't really be satisfied for now, because if we are
2746  // fillComplete(), then we are local. In the future, this may
2747  // change. However, the rule that modification require active
2748  // fill will not change.
2749  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2750  (! this->isFillActive (), std::runtime_error,
2751  "You are not allowed to call this method if fill is not active. "
2752  "If fillComplete has been called, you must first call resumeFill "
2753  "before you may insert indices.");
2754  if (! indicesAreAllocated ()) {
2755  allocateIndices (GlobalIndices, verbose_);
2756  }
2757  const LO lclRow = this->rowMap_->getLocalElement (gblRow);
2758  if (lclRow != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
2759  if (debug_) {
2760  if (this->hasColMap ()) {
2761  using std::endl;
2762  const map_type& colMap = * (this->colMap_);
2763  // In a debug build, keep track of the nonowned ("bad") column
2764  // indices, so that we can display them in the exception
2765  // message. In a release build, just ditch the loop early if
2766  // we encounter a nonowned column index.
2767  std::vector<GlobalOrdinal> badColInds;
2768  bool allInColMap = true;
2769  for (LO k = 0; k < numInputInds; ++k) {
2770  if (! colMap.isNodeGlobalElement (inputGblColInds[k])) {
2771  allInColMap = false;
2772  badColInds.push_back (inputGblColInds[k]);
2773  }
2774  }
2775  if (! allInColMap) {
2776  std::ostringstream os;
2777  os << "You attempted to insert entries in owned row " << gblRow
2778  << ", at the following column indices: [";
2779  for (LO k = 0; k < numInputInds; ++k) {
2780  os << inputGblColInds[k];
2781  if (k + static_cast<LO> (1) < numInputInds) {
2782  os << ",";
2783  }
2784  }
2785  os << "]." << endl << "Of those, the following indices are not in "
2786  "the column Map on this process: [";
2787  for (size_t k = 0; k < badColInds.size (); ++k) {
2788  os << badColInds[k];
2789  if (k + size_t (1) < badColInds.size ()) {
2790  os << ",";
2791  }
2792  }
2793  os << "]." << endl << "Since the matrix has a column Map already, "
2794  "it is invalid to insert entries at those locations.";
2795  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2796  (true, std::invalid_argument, os.str ());
2797  }
2798  }
2799  } // debug_
2800  this->insertGlobalIndicesImpl (lclRow, inputGblColInds, numInputInds);
2801  }
2802  else { // a nonlocal row
2803  this->insertGlobalIndicesIntoNonownedRows (gblRow, inputGblColInds,
2804  numInputInds);
2805  }
2806  }
2807 
2808 
2809  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2810  void
2812  insertGlobalIndices (const GlobalOrdinal gblRow,
2813  const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds)
2814  {
2815  this->insertGlobalIndices (gblRow, inputGblColInds.size (),
2816  inputGblColInds.getRawPtr ());
2817  }
2818 
2819 
2820  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2821  void
2823  insertGlobalIndicesFiltered (const LocalOrdinal lclRow,
2824  const GlobalOrdinal gblColInds[],
2825  const LocalOrdinal numGblColInds)
2826  {
2827  typedef LocalOrdinal LO;
2828  typedef GlobalOrdinal GO;
2829  const char tfecfFuncName[] = "insertGlobalIndicesFiltered: ";
2830 
2831  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2832  (this->isLocallyIndexed (), std::runtime_error,
2833  "Graph indices are local; use insertLocalIndices().");
2834  // This can't really be satisfied for now, because if we are
2835  // fillComplete(), then we are local. In the future, this may
2836  // change. However, the rule that modification require active
2837  // fill will not change.
2838  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2839  (! this->isFillActive (), std::runtime_error,
2840  "You are not allowed to call this method if fill is not active. "
2841  "If fillComplete has been called, you must first call resumeFill "
2842  "before you may insert indices.");
2843  if (! indicesAreAllocated ()) {
2844  allocateIndices (GlobalIndices, verbose_);
2845  }
2846 
2847  Teuchos::ArrayView<const GO> gblColInds_av (gblColInds, numGblColInds);
2848  // If we have a column Map, use it to filter the entries.
2849  if (! colMap_.is_null ()) {
2850  const map_type& colMap = * (this->colMap_);
2851 
2852  LO curOffset = 0;
2853  while (curOffset < numGblColInds) {
2854  // Find a sequence of input indices that are in the column Map
2855  // on the calling process. Doing a sequence at a time,
2856  // instead of one at a time, amortizes some overhead.
2857  LO endOffset = curOffset;
2858  for ( ; endOffset < numGblColInds; ++endOffset) {
2859  const LO lclCol = colMap.getLocalElement (gblColInds[endOffset]);
2860  if (lclCol == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
2861  break; // first entry, in current sequence, not in the column Map
2862  }
2863  }
2864  // curOffset, endOffset: half-exclusive range of indices in
2865  // the column Map on the calling process. If endOffset ==
2866  // curOffset, the range is empty.
2867  const LO numIndInSeq = (endOffset - curOffset);
2868  if (numIndInSeq != 0) {
2869  this->insertGlobalIndicesImpl (lclRow, gblColInds + curOffset,
2870  numIndInSeq);
2871  }
2872  // Invariant before this line: Either endOffset ==
2873  // numGblColInds, or gblColInds[endOffset] is not in the
2874  // column Map on the calling process.
2875  curOffset = endOffset + 1;
2876  }
2877  }
2878  else {
2879  this->insertGlobalIndicesImpl (lclRow, gblColInds_av.getRawPtr (),
2880  gblColInds_av.size ());
2881  }
2882  }
2883 
2884  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2885  void
2887  insertGlobalIndicesIntoNonownedRows (const GlobalOrdinal gblRow,
2888  const GlobalOrdinal gblColInds[],
2889  const LocalOrdinal numGblColInds)
2890  {
2891  // This creates the std::vector if it doesn't exist yet.
2892  // std::map's operator[] does a lookup each time, so it's better
2893  // to pull nonlocals_[grow] out of the loop.
2894  std::vector<GlobalOrdinal>& nonlocalRow = this->nonlocals_[gblRow];
2895  for (LocalOrdinal k = 0; k < numGblColInds; ++k) {
2896  // FIXME (mfh 20 Jul 2017) Would be better to use a set, in
2897  // order to avoid duplicates. globalAssemble() sorts these
2898  // anyway.
2899  nonlocalRow.push_back (gblColInds[k]);
2900  }
2901  }
2902 
2903  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2904  void
2906  removeLocalIndices (LocalOrdinal lrow)
2907  {
2908  const char tfecfFuncName[] = "removeLocalIndices: ";
2909  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2910  ! isFillActive (), std::runtime_error, "requires that fill is active.");
2911  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2912  isStorageOptimized (), std::runtime_error,
2913  "cannot remove indices after optimizeStorage() has been called.");
2914  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2915  isGloballyIndexed (), std::runtime_error, "graph indices are global.");
2916  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2917  ! rowMap_->isNodeLocalElement (lrow), std::runtime_error,
2918  "Local row " << lrow << " is not in the row Map on the calling process.");
2919  if (! indicesAreAllocated ()) {
2920  allocateIndices (LocalIndices, verbose_);
2921  }
2922 
2923  // FIXME (mfh 13 Aug 2014) What if they haven't been cleared on
2924  // all processes?
2925  clearGlobalConstants ();
2926 
2927  if (k_numRowEntries_.extent (0) != 0) {
2928  this->k_numRowEntries_(lrow) = 0;
2929  }
2930 
2931  if (debug_) {
2932  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2933  (getNumEntriesInLocalRow (lrow) != 0 ||
2934  ! indicesAreAllocated () ||
2935  ! isLocallyIndexed (), std::logic_error,
2936  "Violated stated post-conditions. Please contact Tpetra team.");
2937  }
2938  }
2939 
2940 
2941  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2942  void
2944  setAllIndices (const typename local_graph_device_type::row_map_type& rowPointers,
2945  const typename local_graph_device_type::entries_type::non_const_type& columnIndices)
2946  {
2947  const char tfecfFuncName[] = "setAllIndices: ";
2948  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2949  ! hasColMap () || getColMap ().is_null (), std::runtime_error,
2950  "The graph must have a column Map before you may call this method.");
2951  LocalOrdinal numLocalRows = this->getNodeNumRows ();
2952  {
2953  LocalOrdinal rowPtrLen = rowPointers.size();
2954  if(numLocalRows == 0) {
2955  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2956  rowPtrLen != 0 && rowPtrLen != 1,
2957  std::runtime_error, "Have 0 local rows, but rowPointers.size() is neither 0 nor 1.");
2958  }
2959  else {
2960  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2961  rowPtrLen != numLocalRows + 1,
2962  std::runtime_error, "rowPointers.size() = " << rowPtrLen <<
2963  " != this->getNodeNumRows()+1 = " << (numLocalRows + 1) << ".");
2964  }
2965  }
2966 
2967  if (debug_ && this->isSorted()) {
2968  // Verify that the local indices are actually sorted
2969  int notSorted = 0;
2970  using exec_space = typename local_graph_device_type::execution_space;
2971  using size_type = typename local_graph_device_type::size_type;
2972  Kokkos::parallel_reduce(Kokkos::RangePolicy<exec_space>(0, numLocalRows),
2973  KOKKOS_LAMBDA (const LocalOrdinal i, int& lNotSorted)
2974  {
2975  size_type rowBegin = rowPointers(i);
2976  size_type rowEnd = rowPointers(i + 1);
2977  for(size_type j = rowBegin + 1; j < rowEnd; j++)
2978  {
2979  if(columnIndices(j - 1) > columnIndices(j))
2980  {
2981  lNotSorted = 1;
2982  }
2983  }
2984  }, notSorted);
2985  //All-reduce notSorted to avoid rank divergence
2986  int globalNotSorted = 0;
2987  auto comm = this->getComm();
2988  Teuchos::reduceAll<int, int> (*comm, Teuchos::REDUCE_MAX, notSorted,
2989  Teuchos::outArg (globalNotSorted));
2990  if (globalNotSorted)
2991  {
2992  std::string message;
2993  if (notSorted)
2994  {
2995  //Only print message from ranks with the problem
2996  message = std::string("ERROR, rank ") + std::to_string(comm->getRank()) + ", CrsGraph::setAllIndices(): provided columnIndices are not sorted!\n";
2997  }
2998  Details::gathervPrint(std::cout, message, *comm);
2999  throw std::invalid_argument("CrsGraph::setAllIndices(): provided columnIndices are not sorted within rows on at least one process.");
3000  }
3001  }
3002 
3003  // FIXME (mfh 07 Aug 2014) We need to relax this restriction,
3004  // since the future model will be allocation at construction, not
3005  // lazy allocation on first insert.
3006  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3007  ((this->lclIndsUnpacked_wdv.extent (0) != 0 || this->gblInds_wdv.extent (0) != 0),
3008  std::runtime_error, "You may not call this method if 1-D data "
3009  "structures are already allocated.");
3010 
3011  indicesAreAllocated_ = true;
3012  indicesAreLocal_ = true;
3013  indicesAreSorted_ = true;
3014  noRedundancies_ = true;
3015  lclIndsPacked_wdv= local_inds_wdv_type(columnIndices);
3016  lclIndsUnpacked_wdv = lclIndsPacked_wdv;
3017  setRowPtrsUnpacked(rowPointers);
3018  setRowPtrsPacked(rowPointers);
3019 
3020  set_need_sync_host_uvm_access(); // columnIndices and rowPointers potentially still in a kernel
3021 
3022  // Storage MUST be packed, since the interface doesn't give any
3023  // way to indicate any extra space at the end of each row.
3024  storageStatus_ = Details::STORAGE_1D_PACKED;
3025 
3026  // These normally get cleared out at the end of allocateIndices.
3027  // It makes sense to clear them out here, because at the end of
3028  // this method, the graph is allocated on the calling process.
3029  numAllocForAllRows_ = 0;
3030  k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
3031 
3032  checkInternalState ();
3033  }
3034 
3035 
3036  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3037  void
3039  setAllIndices (const Teuchos::ArrayRCP<size_t>& rowPointers,
3040  const Teuchos::ArrayRCP<LocalOrdinal>& columnIndices)
3041  {
3042  using Kokkos::View;
3043  typedef typename local_graph_device_type::row_map_type row_map_type;
3044  typedef typename row_map_type::array_layout layout_type;
3045  typedef typename row_map_type::non_const_value_type row_offset_type;
3046  typedef View<size_t*, layout_type , Kokkos::HostSpace,
3047  Kokkos::MemoryUnmanaged> input_view_type;
3048  typedef typename row_map_type::non_const_type nc_row_map_type;
3049 
3050  const size_t size = static_cast<size_t> (rowPointers.size ());
3051  constexpr bool same = std::is_same<size_t, row_offset_type>::value;
3052  input_view_type ptr_in (rowPointers.getRawPtr (), size);
3053 
3054  nc_row_map_type ptr_rot ("Tpetra::CrsGraph::ptr", size);
3055 
3056  if (same) { // size_t == row_offset_type
3057  // This compile-time logic ensures that the compiler never sees
3058  // an assignment of View<row_offset_type*, ...> to View<size_t*,
3059  // ...> unless size_t == row_offset_type.
3060  input_view_type ptr_decoy (rowPointers.getRawPtr (), size); // never used
3061  Kokkos::deep_copy (Kokkos::Impl::if_c<same,
3062  nc_row_map_type,
3063  input_view_type>::select (ptr_rot, ptr_decoy),
3064  ptr_in);
3065  }
3066  else { // size_t != row_offset_type
3067  // CudaUvmSpace != HostSpace, so this will be false in that case.
3068  constexpr bool inHostMemory =
3069  std::is_same<typename row_map_type::memory_space,
3070  Kokkos::HostSpace>::value;
3071  if (inHostMemory) {
3072  // Copy (with cast from size_t to row_offset_type, with bounds
3073  // checking if necessary) to ptr_rot.
3074  ::Tpetra::Details::copyOffsets (ptr_rot, ptr_in);
3075  }
3076  else { // Copy input row offsets to device first.
3077  //
3078  // FIXME (mfh 24 Mar 2015) If CUDA UVM, running in the host's
3079  // execution space would avoid the double copy.
3080  //
3081  View<size_t*, layout_type, device_type> ptr_st ("Tpetra::CrsGraph::ptr", size);
3082  Kokkos::deep_copy (ptr_st, ptr_in);
3083  // Copy on device (casting from size_t to row_offset_type,
3084  // with bounds checking if necessary) to ptr_rot. This
3085  // executes in the output View's execution space, which is the
3086  // same as execution_space.
3087  ::Tpetra::Details::copyOffsets (ptr_rot, ptr_st);
3088  }
3089  }
3090 
3091  Kokkos::View<LocalOrdinal*, layout_type, device_type> k_ind =
3092  Kokkos::Compat::getKokkosViewDeepCopy<device_type> (columnIndices ());
3093  setAllIndices (ptr_rot, k_ind);
3094  }
3095 
3096 
3097  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3098  TPETRA_DEPRECATED
3099  void
3101  getNumEntriesPerLocalRowUpperBound (Teuchos::ArrayRCP<const size_t>& boundPerLocalRow,
3102  size_t& boundForAllLocalRows,
3103  bool& boundSameForAllLocalRows) const
3104  {
3105  const char tfecfFuncName[] = "getNumEntriesPerLocalRowUpperBound: ";
3106  const char suffix[] = " Please report this bug to the Tpetra developers.";
3107 
3108  // The three output arguments. We assign them to the actual
3109  // output arguments at the end, in order to implement
3110  // transactional semantics.
3111  Teuchos::ArrayRCP<const size_t> numEntriesPerRow;
3112  size_t numEntriesForAll = 0;
3113  bool allRowsSame = true;
3114 
3115  const ptrdiff_t numRows = static_cast<ptrdiff_t> (this->getNodeNumRows ());
3116 
3117  if (this->indicesAreAllocated ()) {
3118  if (this->isStorageOptimized ()) {
3119  // left with the case that we have optimized storage. in this
3120  // case, we have to construct a list of row sizes.
3121  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3122  (numRows != 0 && rowPtrsUnpacked_host_.extent (0) == 0, std::logic_error,
3123  "The graph has " << numRows << " (> 0) row"
3124  << (numRows != 1 ? "s" : "") << " on the calling process, "
3125  "but the k_rowPtrs_ array has zero entries." << suffix);
3126  Teuchos::ArrayRCP<size_t> numEnt;
3127  if (numRows != 0) {
3128  numEnt = Teuchos::arcp<size_t> (numRows);
3129  }
3130 
3131  // We have to iterate through the row offsets anyway, so we
3132  // might as well check whether all rows' bounds are the same.
3133  bool allRowsReallySame = false;
3134  for (ptrdiff_t i = 0; i < numRows; ++i) {
3135  numEnt[i] = rowPtrsUnpacked_host_(i+1) - rowPtrsUnpacked_host_(i);
3136  if (i != 0 && numEnt[i] != numEnt[i-1]) {
3137  allRowsReallySame = false;
3138  }
3139  }
3140  if (allRowsReallySame) {
3141  if (numRows == 0) {
3142  numEntriesForAll = 0;
3143  } else {
3144  numEntriesForAll = numEnt[1] - numEnt[0];
3145  }
3146  allRowsSame = true;
3147  }
3148  else {
3149  numEntriesPerRow = numEnt; // Teuchos::arcp_const_cast<const size_t> (numEnt);
3150  allRowsSame = false; // conservatively; we don't check the array
3151  }
3152  }
3153  else if (k_numRowEntries_.extent (0) != 0) {
3154  // This is a shallow copy; the ArrayRCP wraps the View in a
3155  // custom destructor, which ensures correct deallocation if
3156  // that is the only reference to the View. Furthermore, this
3157  // View is a host View, so this doesn't assume UVM.
3158  numEntriesPerRow = Kokkos::Compat::persistingView (k_numRowEntries_);
3159  allRowsSame = false; // conservatively; we don't check the array
3160  }
3161  else {
3162  numEntriesForAll = 0;
3163  allRowsSame = true;
3164  }
3165  }
3166  else { // indices not allocated
3167  if (k_numAllocPerRow_.extent (0) != 0) {
3168  // This is a shallow copy; the ArrayRCP wraps the View in a
3169  // custom destructor, which ensures correct deallocation if
3170  // that is the only reference to the View. Furthermore, this
3171  // View is a host View, so this doesn't assume UVM.
3172  numEntriesPerRow = Kokkos::Compat::persistingView (k_numAllocPerRow_);
3173  allRowsSame = false; // conservatively; we don't check the array
3174  }
3175  else {
3176  numEntriesForAll = numAllocForAllRows_;
3177  allRowsSame = true;
3178  }
3179  }
3180 
3181  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3182  (numEntriesForAll != 0 && numEntriesPerRow.size () != 0, std::logic_error,
3183  "numEntriesForAll and numEntriesPerRow are not consistent. The former "
3184  "is nonzero (" << numEntriesForAll << "), but the latter has nonzero "
3185  "size " << numEntriesPerRow.size () << "." << suffix);
3186  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3187  (numEntriesForAll != 0 && ! allRowsSame, std::logic_error,
3188  "numEntriesForAll and allRowsSame are not consistent. The former "
3189  "is nonzero (" << numEntriesForAll << "), but the latter is false."
3190  << suffix);
3191  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3192  (numEntriesPerRow.size () != 0 && allRowsSame, std::logic_error,
3193  "numEntriesPerRow and allRowsSame are not consistent. The former has "
3194  "nonzero length " << numEntriesForAll << ", but the latter is true."
3195  << suffix);
3196 
3197  boundPerLocalRow = numEntriesPerRow;
3198  boundForAllLocalRows = numEntriesForAll;
3199  boundSameForAllLocalRows = allRowsSame;
3200  }
3201 
3202 
3203  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3204  void
3206  globalAssemble ()
3207  {
3208  using Teuchos::Comm;
3209  using Teuchos::outArg;
3210  using Teuchos::RCP;
3211  using Teuchos::rcp;
3212  using Teuchos::REDUCE_MAX;
3213  using Teuchos::REDUCE_MIN;
3214  using Teuchos::reduceAll;
3215  using std::endl;
3216  using crs_graph_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
3217  using LO = local_ordinal_type;
3218  using GO = global_ordinal_type;
3219  using size_type = typename Teuchos::Array<GO>::size_type;
3220  const char tfecfFuncName[] = "globalAssemble: "; // for exception macro
3221 
3222  std::unique_ptr<std::string> prefix;
3223  if (verbose_) {
3224  prefix = this->createPrefix("CrsGraph", "globalAssemble");
3225  std::ostringstream os;
3226  os << *prefix << "Start" << endl;
3227  std::cerr << os.str();
3228  }
3229  RCP<const Comm<int> > comm = getComm ();
3230 
3231  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3232  (! isFillActive (), std::runtime_error, "Fill must be active before "
3233  "you may call this method.");
3234 
3235  const size_t myNumNonlocalRows = this->nonlocals_.size ();
3236 
3237  // If no processes have nonlocal rows, then we don't have to do
3238  // anything. Checking this is probably cheaper than constructing
3239  // the Map of nonlocal rows (see below) and noticing that it has
3240  // zero global entries.
3241  {
3242  const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
3243  int someoneHasNonlocalRows = 0;
3244  reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
3245  outArg (someoneHasNonlocalRows));
3246  if (someoneHasNonlocalRows == 0) {
3247  if (verbose_) {
3248  std::ostringstream os;
3249  os << *prefix << "Done: No nonlocal rows" << endl;
3250  std::cerr << os.str();
3251  }
3252  return;
3253  }
3254  else if (verbose_) {
3255  std::ostringstream os;
3256  os << *prefix << "At least 1 process has nonlocal rows"
3257  << endl;
3258  std::cerr << os.str();
3259  }
3260  }
3261 
3262  // 1. Create a list of the "nonlocal" rows on each process. this
3263  // requires iterating over nonlocals_, so while we do this,
3264  // deduplicate the entries and get a count for each nonlocal
3265  // row on this process.
3266  // 2. Construct a new row Map corresponding to those rows. This
3267  // Map is likely overlapping. We know that the Map is not
3268  // empty on all processes, because the above all-reduce and
3269  // return exclude that case.
3270 
3271  RCP<const map_type> nonlocalRowMap;
3272  // Keep this for CrsGraph's constructor.
3273  Teuchos::Array<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
3274  {
3275  Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
3276  size_type curPos = 0;
3277  for (auto mapIter = this->nonlocals_.begin ();
3278  mapIter != this->nonlocals_.end ();
3279  ++mapIter, ++curPos) {
3280  myNonlocalGblRows[curPos] = mapIter->first;
3281  std::vector<GO>& gblCols = mapIter->second; // by ref; change in place
3282  std::sort (gblCols.begin (), gblCols.end ());
3283  auto vecLast = std::unique (gblCols.begin (), gblCols.end ());
3284  gblCols.erase (vecLast, gblCols.end ());
3285  numEntPerNonlocalRow[curPos] = gblCols.size ();
3286  }
3287 
3288  // Currently, Map requires that its indexBase be the global min
3289  // of all its global indices. Map won't compute this for us, so
3290  // we must do it. If our process has no nonlocal rows, set the
3291  // "min" to the max possible GO value. This ensures that if
3292  // some process has at least one nonlocal row, then it will pick
3293  // that up as the min. We know that at least one process has a
3294  // nonlocal row, since the all-reduce and return at the top of
3295  // this method excluded that case.
3296  GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
3297  {
3298  auto iter = std::min_element (myNonlocalGblRows.begin (),
3299  myNonlocalGblRows.end ());
3300  if (iter != myNonlocalGblRows.end ()) {
3301  myMinNonlocalGblRow = *iter;
3302  }
3303  }
3304  GO gblMinNonlocalGblRow = 0;
3305  reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
3306  outArg (gblMinNonlocalGblRow));
3307  const GO indexBase = gblMinNonlocalGblRow;
3308  const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
3309  nonlocalRowMap = rcp (new map_type (INV, myNonlocalGblRows (), indexBase, comm));
3310  }
3311 
3312  if (verbose_) {
3313  std::ostringstream os;
3314  os << *prefix << "nonlocalRowMap->getIndexBase()="
3315  << nonlocalRowMap->getIndexBase() << endl;
3316  std::cerr << os.str();
3317  }
3318 
3319  // 3. Use the column indices for each nonlocal row, as stored in
3320  // nonlocals_, to construct a CrsGraph corresponding to
3321  // nonlocal rows. We need, but we have, exact counts of the
3322  // number of entries in each nonlocal row.
3323 
3324  RCP<crs_graph_type> nonlocalGraph =
3325  rcp(new crs_graph_type(nonlocalRowMap, numEntPerNonlocalRow(),
3326  StaticProfile));
3327  {
3328  size_type curPos = 0;
3329  for (auto mapIter = this->nonlocals_.begin ();
3330  mapIter != this->nonlocals_.end ();
3331  ++mapIter, ++curPos) {
3332  const GO gblRow = mapIter->first;
3333  std::vector<GO>& gblCols = mapIter->second; // by ref just to avoid copy
3334  const LO numEnt = static_cast<LO> (numEntPerNonlocalRow[curPos]);
3335  nonlocalGraph->insertGlobalIndices (gblRow, numEnt, gblCols.data ());
3336  }
3337  }
3338  if (verbose_) {
3339  std::ostringstream os;
3340  os << *prefix << "Built nonlocal graph" << endl;
3341  std::cerr << os.str();
3342  }
3343  // There's no need to fill-complete the nonlocals graph.
3344  // We just use it as a temporary container for the Export.
3345 
3346  // 4. If the original row Map is one to one, then we can Export
3347  // directly from nonlocalGraph into this. Otherwise, we have
3348  // to create a temporary graph with a one-to-one row Map,
3349  // Export into that, then Import from the temporary graph into
3350  // *this.
3351 
3352  auto origRowMap = this->getRowMap ();
3353  const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
3354 
3355  if (origRowMapIsOneToOne) {
3356  if (verbose_) {
3357  std::ostringstream os;
3358  os << *prefix << "Original row Map is 1-to-1" << endl;
3359  std::cerr << os.str();
3360  }
3361  export_type exportToOrig (nonlocalRowMap, origRowMap);
3362  this->doExport (*nonlocalGraph, exportToOrig, Tpetra::INSERT);
3363  // We're done at this point!
3364  }
3365  else {
3366  if (verbose_) {
3367  std::ostringstream os;
3368  os << *prefix << "Original row Map is NOT 1-to-1" << endl;
3369  std::cerr << os.str();
3370  }
3371  // If you ask a Map whether it is one to one, it does some
3372  // communication and stashes intermediate results for later use
3373  // by createOneToOne. Thus, calling createOneToOne doesn't cost
3374  // much more then the original cost of calling isOneToOne.
3375  auto oneToOneRowMap = Tpetra::createOneToOne (origRowMap);
3376  export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
3377 
3378  // Create a temporary graph with the one-to-one row Map.
3379  //
3380  // TODO (mfh 09 Sep 2016) Estimate the number of entries in each
3381  // row, to avoid reallocation during the Export operation.
3382  crs_graph_type oneToOneGraph (oneToOneRowMap, 0);
3383 
3384  // Export from graph of nonlocals into the temp one-to-one graph.
3385  if (verbose_) {
3386  std::ostringstream os;
3387  os << *prefix << "Export nonlocal graph" << endl;
3388  std::cerr << os.str();
3389  }
3390  oneToOneGraph.doExport (*nonlocalGraph, exportToOneToOne, Tpetra::INSERT);
3391 
3392  // We don't need the graph of nonlocals anymore, so get rid of
3393  // it, to keep the memory high-water mark down.
3394  nonlocalGraph = Teuchos::null;
3395 
3396  // Import from the one-to-one graph to the original graph.
3397  import_type importToOrig (oneToOneRowMap, origRowMap);
3398  if (verbose_) {
3399  std::ostringstream os;
3400  os << *prefix << "Import nonlocal graph" << endl;
3401  std::cerr << os.str();
3402  }
3403  this->doImport (oneToOneGraph, importToOrig, Tpetra::INSERT);
3404  }
3405 
3406  // It's safe now to clear out nonlocals_, since we've already
3407  // committed side effects to *this. The standard idiom for
3408  // clearing a Container like std::map, is to swap it with an empty
3409  // Container and let the swapped Container fall out of scope.
3410  decltype (this->nonlocals_) newNonlocals;
3411  std::swap (this->nonlocals_, newNonlocals);
3412 
3413  checkInternalState ();
3414  if (verbose_) {
3415  std::ostringstream os;
3416  os << *prefix << "Done" << endl;
3417  std::cerr << os.str();
3418  }
3419  }
3420 
3421 
3422  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3423  void
3425  resumeFill (const Teuchos::RCP<Teuchos::ParameterList>& params)
3426  {
3427  clearGlobalConstants();
3428  if (params != Teuchos::null) this->setParameterList (params);
3429  // either still sorted/merged or initially sorted/merged
3430  indicesAreSorted_ = true;
3431  noRedundancies_ = true;
3432  fillComplete_ = false;
3433  }
3434 
3435 
3436  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3437  void
3439  fillComplete (const Teuchos::RCP<Teuchos::ParameterList>& params)
3440  {
3441  // If the graph already has domain and range Maps, don't clobber
3442  // them. If it doesn't, use the current row Map for both the
3443  // domain and range Maps.
3444  //
3445  // NOTE (mfh 28 Sep 2014): If the graph was constructed without a
3446  // column Map, and column indices are inserted which are not in
3447  // the row Map on any process, this will cause troubles. However,
3448  // that is not a common case for most applications that we
3449  // encounter, and checking for it might require more
3450  // communication.
3451  Teuchos::RCP<const map_type> domMap = this->getDomainMap ();
3452  if (domMap.is_null ()) {
3453  domMap = this->getRowMap ();
3454  }
3455  Teuchos::RCP<const map_type> ranMap = this->getRangeMap ();
3456  if (ranMap.is_null ()) {
3457  ranMap = this->getRowMap ();
3458  }
3459  this->fillComplete (domMap, ranMap, params);
3460  }
3461 
3462 
3463  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3464  void
3466  fillComplete (const Teuchos::RCP<const map_type>& domainMap,
3467  const Teuchos::RCP<const map_type>& rangeMap,
3468  const Teuchos::RCP<Teuchos::ParameterList>& params)
3469  {
3470  using std::endl;
3471  const char tfecfFuncName[] = "fillComplete: ";
3472  const bool verbose = verbose_;
3473 
3474  std::unique_ptr<std::string> prefix;
3475  if (verbose) {
3476  prefix = this->createPrefix("CrsGraph", "fillComplete");
3477  std::ostringstream os;
3478  os << *prefix << "Start" << endl;
3479  std::cerr << os.str();
3480  }
3481 
3482  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3483  (! isFillActive () || isFillComplete (), std::runtime_error,
3484  "Graph fill state must be active (isFillActive() "
3485  "must be true) before calling fillComplete().");
3486 
3487  const int numProcs = getComm ()->getSize ();
3488 
3489  //
3490  // Read and set parameters
3491  //
3492 
3493  // Does the caller want to sort remote GIDs (within those owned by
3494  // the same process) in makeColMap()?
3495  if (! params.is_null ()) {
3496  if (params->isParameter ("sort column map ghost gids")) {
3497  sortGhostsAssociatedWithEachProcessor_ =
3498  params->get<bool> ("sort column map ghost gids",
3499  sortGhostsAssociatedWithEachProcessor_);
3500  }
3501  else if (params->isParameter ("Sort column Map ghost GIDs")) {
3502  sortGhostsAssociatedWithEachProcessor_ =
3503  params->get<bool> ("Sort column Map ghost GIDs",
3504  sortGhostsAssociatedWithEachProcessor_);
3505  }
3506  }
3507 
3508  // If true, the caller promises that no process did nonlocal
3509  // changes since the last call to fillComplete.
3510  bool assertNoNonlocalInserts = false;
3511  if (! params.is_null ()) {
3512  assertNoNonlocalInserts =
3513  params->get<bool> ("No Nonlocal Changes", assertNoNonlocalInserts);
3514  }
3515 
3516  //
3517  // Allocate indices, if they haven't already been allocated
3518  //
3519  if (! indicesAreAllocated ()) {
3520  if (hasColMap ()) {
3521  // We have a column Map, so use local indices.
3522  allocateIndices (LocalIndices, verbose);
3523  } else {
3524  // We don't have a column Map, so use global indices.
3525  allocateIndices (GlobalIndices, verbose);
3526  }
3527  }
3528 
3529  //
3530  // Do global assembly, if requested and if the communicator
3531  // contains more than one process.
3532  //
3533  const bool mayNeedGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
3534  if (mayNeedGlobalAssemble) {
3535  // This first checks if we need to do global assembly.
3536  // The check costs a single all-reduce.
3537  globalAssemble ();
3538  }
3539  else {
3540  const size_t numNonlocals = nonlocals_.size();
3541  if (verbose) {
3542  std::ostringstream os;
3543  os << *prefix << "Do not need to call globalAssemble; "
3544  "assertNoNonlocalInserts="
3545  << (assertNoNonlocalInserts ? "true" : "false")
3546  << "numProcs=" << numProcs
3547  << ", nonlocals_.size()=" << numNonlocals << endl;
3548  std::cerr << os.str();
3549  }
3550  const int lclNeededGlobalAssemble =
3551  (numProcs > 1 && numNonlocals != 0) ? 1 : 0;
3552  if (lclNeededGlobalAssemble != 0 && verbose) {
3553  std::ostringstream os;
3554  os << *prefix;
3555  Details::Impl::verbosePrintMap(
3556  os, nonlocals_.begin(), nonlocals_.end(),
3557  nonlocals_.size(), "nonlocals_");
3558  std::cerr << os.str() << endl;
3559  }
3560 
3561  if (debug_) {
3562  auto map = this->getMap();
3563  auto comm = map.is_null() ? Teuchos::null : map->getComm();
3564  int gblNeededGlobalAssemble = lclNeededGlobalAssemble;
3565  if (! comm.is_null()) {
3566  using Teuchos::REDUCE_MAX;
3567  using Teuchos::reduceAll;
3568  reduceAll(*comm, REDUCE_MAX, lclNeededGlobalAssemble,
3569  Teuchos::outArg(gblNeededGlobalAssemble));
3570  }
3571  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3572  (gblNeededGlobalAssemble != 0, std::runtime_error,
3573  "nonlocals_.size()=" << numNonlocals << " != 0 on at "
3574  "least one process in the CrsGraph's communicator. This "
3575  "means either that you incorrectly set the "
3576  "\"No Nonlocal Changes\" fillComplete parameter to true, "
3577  "or that you inserted invalid entries. "
3578  "Rerun with the environment variable TPETRA_VERBOSE="
3579  "CrsGraph set to see the entries of nonlocals_ on every "
3580  "MPI process (WARNING: lots of output).");
3581  }
3582  else {
3583  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3584  (lclNeededGlobalAssemble != 0, std::runtime_error,
3585  "nonlocals_.size()=" << numNonlocals << " != 0 on the "
3586  "calling process. This means either that you incorrectly "
3587  "set the \"No Nonlocal Changes\" fillComplete parameter "
3588  "to true, or that you inserted invalid entries. "
3589  "Rerun with the environment "
3590  "variable TPETRA_VERBOSE=CrsGraph set to see the entries "
3591  "of nonlocals_ on every MPI process (WARNING: lots of "
3592  "output).");
3593  }
3594  }
3595 
3596  // Set domain and range Map. This may clear the Import / Export
3597  // objects if the new Maps differ from any old ones.
3598  setDomainRangeMaps (domainMap, rangeMap);
3599 
3600  // If the graph does not already have a column Map (either from
3601  // the user constructor calling the version of the constructor
3602  // that takes a column Map, or from a previous fillComplete call),
3603  // then create it.
3604  Teuchos::Array<int> remotePIDs (0);
3605  const bool mustBuildColMap = ! this->hasColMap ();
3606  if (mustBuildColMap) {
3607  this->makeColMap (remotePIDs); // resized on output
3608  }
3609 
3610  // Make indices local, if they aren't already.
3611  // The method doesn't do any work if the indices are already local.
3612  const std::pair<size_t, std::string> makeIndicesLocalResult =
3613  this->makeIndicesLocal(verbose);
3614 
3615  if (debug_) {
3616  using Details::gathervPrint;
3617  using Teuchos::RCP;
3618  using Teuchos::REDUCE_MIN;
3619  using Teuchos::reduceAll;
3620  using Teuchos::outArg;
3621 
3622  RCP<const map_type> map = this->getMap ();
3623  RCP<const Teuchos::Comm<int> > comm;
3624  if (! map.is_null ()) {
3625  comm = map->getComm ();
3626  }
3627  if (comm.is_null ()) {
3628  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3629  (makeIndicesLocalResult.first != 0, std::runtime_error,
3630  makeIndicesLocalResult.second);
3631  }
3632  else {
3633  const int lclSuccess = (makeIndicesLocalResult.first == 0);
3634  int gblSuccess = 0; // output argument
3635  reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3636  if (gblSuccess != 1) {
3637  std::ostringstream os;
3638  gathervPrint (os, makeIndicesLocalResult.second, *comm);
3639  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3640  (true, std::runtime_error, os.str ());
3641  }
3642  }
3643  }
3644  else {
3645  // TODO (mfh 20 Jul 2017) Instead of throwing here, pass along
3646  // the error state to makeImportExport or
3647  // computeGlobalConstants, which may do all-reduces and thus may
3648  // have the opportunity to communicate that error state.
3649  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3650  (makeIndicesLocalResult.first != 0, std::runtime_error,
3651  makeIndicesLocalResult.second);
3652  }
3653 
3654  // If this process has no indices, then CrsGraph considers it
3655  // already trivially sorted and merged. Thus, this method need
3656  // not be called on all processes in the row Map's communicator.
3657  this->sortAndMergeAllIndices (this->isSorted (), this->isMerged ());
3658 
3659  // Make Import and Export objects, if they haven't been made
3660  // already. If we made a column Map above, reuse information from
3661  // that process to avoid communiation in the Import setup.
3662  this->makeImportExport (remotePIDs, mustBuildColMap);
3663 
3664  // Create the Kokkos::StaticCrsGraph, if it doesn't already exist.
3665  this->fillLocalGraph (params);
3666 
3667  const bool callComputeGlobalConstants = params.get () == nullptr ||
3668  params->get ("compute global constants", true);
3669  if (callComputeGlobalConstants) {
3670  this->computeGlobalConstants ();
3671  }
3672  else {
3673  this->computeLocalConstants ();
3674  }
3675  this->fillComplete_ = true;
3676  this->checkInternalState ();
3677 
3678  if (verbose) {
3679  std::ostringstream os;
3680  os << *prefix << "Done" << endl;
3681  std::cerr << os.str();
3682  }
3683  }
3684 
3685 
3686  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3687  void
3689  expertStaticFillComplete (const Teuchos::RCP<const map_type>& domainMap,
3690  const Teuchos::RCP<const map_type>& rangeMap,
3691  const Teuchos::RCP<const import_type>& importer,
3692  const Teuchos::RCP<const export_type>& exporter,
3693  const Teuchos::RCP<Teuchos::ParameterList>& params)
3694  {
3695  const char tfecfFuncName[] = "expertStaticFillComplete: ";
3696 #ifdef HAVE_TPETRA_MMM_TIMINGS
3697  std::string label;
3698  if(!params.is_null())
3699  label = params->get("Timer Label",label);
3700  std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
3701  using Teuchos::TimeMonitor;
3702  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Setup"))));
3703 #endif
3704 
3705 
3706  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3707  domainMap.is_null () || rangeMap.is_null (),
3708  std::runtime_error, "The input domain Map and range Map must be nonnull.");
3709  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3710  isFillComplete () || ! hasColMap (), std::runtime_error, "You may not "
3711  "call this method unless the graph has a column Map.");
3712  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3713  getNodeNumRows () > 0 && rowPtrsUnpacked_host_.extent (0) == 0,
3714  std::runtime_error, "The calling process has getNodeNumRows() = "
3715  << getNodeNumRows () << " > 0 rows, but the row offsets array has not "
3716  "been set.");
3717  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3718  static_cast<size_t> (rowPtrsUnpacked_host_.extent (0)) != getNodeNumRows () + 1,
3719  std::runtime_error, "The row offsets array has length " <<
3720  rowPtrsUnpacked_host_.extent (0) << " != getNodeNumRows()+1 = " <<
3721  (getNodeNumRows () + 1) << ".");
3722 
3723  // Note: We don't need to do the following things which are normally done in fillComplete:
3724  // allocateIndices, globalAssemble, makeColMap, makeIndicesLocal, sortAndMergeAllIndices
3725 
3726  // Constants from allocateIndices
3727  //
3728  // mfh 08 Aug 2014: numAllocForAllRows_ and k_numAllocPerRow_ go
3729  // away once the graph is allocated. expertStaticFillComplete
3730  // either presumes that the graph is allocated, or "allocates" it.
3731  //
3732  // FIXME (mfh 08 Aug 2014) The goal for the Kokkos refactor
3733  // version of CrsGraph is to allocate in the constructor, not
3734  // lazily on first insert. That will make both
3735  // numAllocForAllRows_ and k_numAllocPerRow_ obsolete.
3736  numAllocForAllRows_ = 0;
3737  k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
3738  indicesAreAllocated_ = true;
3739 
3740  // Constants from makeIndicesLocal
3741  //
3742  // The graph has a column Map, so its indices had better be local.
3743  indicesAreLocal_ = true;
3744  indicesAreGlobal_ = false;
3745 
3746  // set domain/range map: may clear the import/export objects
3747 #ifdef HAVE_TPETRA_MMM_TIMINGS
3748  MM = Teuchos::null;
3749  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Maps"))));
3750 #endif
3751  setDomainRangeMaps (domainMap, rangeMap);
3752 
3753  // Presume the user sorted and merged the arrays first
3754  indicesAreSorted_ = true;
3755  noRedundancies_ = true;
3756 
3757  // makeImportExport won't create a new importer/exporter if I set one here first.
3758 #ifdef HAVE_TPETRA_MMM_TIMINGS
3759  MM = Teuchos::null;
3760  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckI"))));
3761 #endif
3762 
3763  importer_ = Teuchos::null;
3764  exporter_ = Teuchos::null;
3765  if (importer != Teuchos::null) {
3766  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3767  ! importer->getSourceMap ()->isSameAs (*getDomainMap ()) ||
3768  ! importer->getTargetMap ()->isSameAs (*getColMap ()),
3769  std::invalid_argument,": importer does not match matrix maps.");
3770  importer_ = importer;
3771 
3772  }
3773 
3774 #ifdef HAVE_TPETRA_MMM_TIMINGS
3775  MM = Teuchos::null;
3776  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckE"))));
3777 #endif
3778 
3779  if (exporter != Teuchos::null) {
3780  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3781  ! exporter->getSourceMap ()->isSameAs (*getRowMap ()) ||
3782  ! exporter->getTargetMap ()->isSameAs (*getRangeMap ()),
3783  std::invalid_argument,": exporter does not match matrix maps.");
3784  exporter_ = exporter;
3785  }
3786 
3787 #ifdef HAVE_TPETRA_MMM_TIMINGS
3788  MM = Teuchos::null;
3789  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXmake"))));
3790 #endif
3791  Teuchos::Array<int> remotePIDs (0); // unused output argument
3792  this->makeImportExport (remotePIDs, false);
3793 
3794 #ifdef HAVE_TPETRA_MMM_TIMINGS
3795  MM = Teuchos::null;
3796  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-fLG"))));
3797 #endif
3798  this->fillLocalGraph (params);
3799 
3800  const bool callComputeGlobalConstants = params.get () == nullptr ||
3801  params->get ("compute global constants", true);
3802 
3803  if (callComputeGlobalConstants) {
3804 #ifdef HAVE_TPETRA_MMM_TIMINGS
3805  MM = Teuchos::null;
3806  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (const)"))));
3807 #endif // HAVE_TPETRA_MMM_TIMINGS
3808  this->computeGlobalConstants ();
3809  }
3810  else {
3811 #ifdef HAVE_TPETRA_MMM_TIMINGS
3812  MM = Teuchos::null;
3813  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (noconst)"))));
3814 #endif // HAVE_TPETRA_MMM_TIMINGS
3815  this->computeLocalConstants ();
3816  }
3817 
3818  fillComplete_ = true;
3819 
3820 #ifdef HAVE_TPETRA_MMM_TIMINGS
3821  MM = Teuchos::null;
3822  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cIS"))));
3823 #endif
3824  checkInternalState ();
3825  }
3826 
3827 
3828  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3829  void
3831  fillLocalGraph (const Teuchos::RCP<Teuchos::ParameterList>& params)
3832  {
3834  typedef decltype (k_numRowEntries_) row_entries_type;
3835  typedef typename local_graph_device_type::row_map_type row_map_type;
3836  typedef typename row_map_type::non_const_type non_const_row_map_type;
3837  typedef typename local_graph_device_type::entries_type::non_const_type lclinds_1d_type;
3838  const char tfecfFuncName[] = "fillLocalGraph (called from fillComplete or "
3839  "expertStaticFillComplete): ";
3840  const size_t lclNumRows = this->getNodeNumRows ();
3841 
3842  // This method's goal is to fill in the two arrays (compressed
3843  // sparse row format) that define the sparse graph's structure.
3844 
3845  bool requestOptimizedStorage = true;
3846  if (! params.is_null () && ! params->get ("Optimize Storage", true)) {
3847  requestOptimizedStorage = false;
3848  }
3849 
3850  // The graph's column indices are currently stored in a 1-D
3851  // format, with row offsets in rowPtrsUnpacked_host_ and local column indices
3852  // in k_lclInds1D_.
3853 
3854  if (debug_) {
3855  // The graph's array of row offsets must already be allocated.
3856  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3857  (rowPtrsUnpacked_host_.extent (0) == 0, std::logic_error,
3858  "k_rowPtrs_ has size zero, but shouldn't");
3859  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3860  (rowPtrsUnpacked_host_.extent (0) != lclNumRows + 1, std::logic_error,
3861  "rowPtrsUnpacked_host_.extent(0) = "
3862  << rowPtrsUnpacked_host_.extent (0) << " != (lclNumRows + 1) = "
3863  << (lclNumRows + 1) << ".");
3864  const size_t numOffsets = rowPtrsUnpacked_host_.extent (0);
3865  const auto valToCheck = rowPtrsUnpacked_host_(numOffsets-1);
3866  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3867  (numOffsets != 0 &&
3868  lclIndsUnpacked_wdv.extent (0) != valToCheck,
3869  std::logic_error, "numOffsets=" << numOffsets << " != 0 "
3870  " and lclIndsUnpacked_wdv.extent(0)=" << lclIndsUnpacked_wdv.extent(0)
3871  << " != k_rowPtrs_(" << numOffsets << ")=" << valToCheck
3872  << ".");
3873  }
3874 
3875  size_t allocSize = 0;
3876  try {
3877  allocSize = this->getNodeAllocationSize ();
3878  }
3879  catch (std::logic_error& e) {
3880  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3881  (true, std::logic_error, "getNodeAllocationSize threw "
3882  "std::logic_error: " << e.what ());
3883  }
3884  catch (std::runtime_error& e) {
3885  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3886  (true, std::runtime_error, "getNodeAllocationSize threw "
3887  "std::runtime_error: " << e.what ());
3888  }
3889  catch (std::exception& e) {
3890  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3891  (true, std::runtime_error, "getNodeAllocationSize threw "
3892  "std::exception: " << e.what ());
3893  }
3894  catch (...) {
3895  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3896  (true, std::runtime_error, "getNodeAllocationSize threw "
3897  "an exception not a subclass of std::exception.");
3898  }
3899 
3900  if (this->getNodeNumEntries () != allocSize) {
3901  // Use the nonconst version of row_map_type for ptr_d, because
3902  // the latter is const and we need to modify ptr_d here.
3903  non_const_row_map_type ptr_d;
3904  row_map_type ptr_d_const;
3905 
3906  // The graph's current 1-D storage is "unpacked." This means
3907  // the row offsets may differ from what the final row offsets
3908  // should be. This could happen, for example, if the user set
3909  // an upper bound on the number of entries in each row, but
3910  // didn't fill all those entries.
3911 
3912  if (debug_) {
3913  if (rowPtrsUnpacked_host_.extent (0) != 0) {
3914  const size_t numOffsets =
3915  static_cast<size_t> (rowPtrsUnpacked_host_.extent (0));
3916  const auto valToCheck = rowPtrsUnpacked_host_(numOffsets - 1);
3917  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3918  (valToCheck != size_t(lclIndsUnpacked_wdv.extent(0)),
3919  std::logic_error, "(Unpacked branch) Before allocating "
3920  "or packing, k_rowPtrs_(" << (numOffsets-1) << ")="
3921  << valToCheck << " != lclIndsUnpacked_wdv.extent(0)="
3922  << lclIndsUnpacked_wdv.extent (0) << ".");
3923  }
3924  }
3925 
3926  // Pack the row offsets into ptr_d, by doing a sum-scan of the
3927  // array of valid entry counts per row (k_numRowEntries_).
3928 
3929  // Total number of entries in the matrix on the calling
3930  // process. We will compute this in the loop below. It's
3931  // cheap to compute and useful as a sanity check.
3932  size_t lclTotalNumEntries = 0;
3933  {
3934  // Allocate the packed row offsets array.
3935  ptr_d =
3936  non_const_row_map_type ("Tpetra::CrsGraph::ptr", lclNumRows + 1);
3937  ptr_d_const = ptr_d;
3938 
3939  // It's ok that k_numRowEntries_ is a host View; the
3940  // function can handle this.
3941  typename row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
3942  if (debug_) {
3943  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3944  (size_t(numRowEnt_h.extent (0)) != lclNumRows,
3945  std::logic_error, "(Unpacked branch) "
3946  "numRowEnt_h.extent(0)=" << numRowEnt_h.extent(0)
3947  << " != getNodeNumRows()=" << lclNumRows << "");
3948  }
3949 
3950  lclTotalNumEntries = computeOffsetsFromCounts (ptr_d, numRowEnt_h);
3951 
3952  if (debug_) {
3953  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3954  (static_cast<size_t> (ptr_d.extent (0)) != lclNumRows + 1,
3955  std::logic_error, "(Unpacked branch) After allocating "
3956  "ptr_d, ptr_d.extent(0) = " << ptr_d.extent(0)
3957  << " != lclNumRows+1 = " << (lclNumRows+1) << ".");
3958  const auto valToCheck =
3959  ::Tpetra::Details::getEntryOnHost (ptr_d, lclNumRows);
3960  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3961  (valToCheck != lclTotalNumEntries, std::logic_error,
3962  "Tpetra::CrsGraph::fillLocalGraph: In unpacked branch, "
3963  "after filling ptr_d, ptr_d(lclNumRows=" << lclNumRows
3964  << ") = " << valToCheck << " != total number of entries "
3965  "on the calling process = " << lclTotalNumEntries
3966  << ".");
3967  }
3968  }
3969 
3970  // Allocate the array of packed column indices.
3971  lclinds_1d_type ind_d =
3972  lclinds_1d_type ("Tpetra::CrsGraph::lclInd", lclTotalNumEntries);
3973 
3974  // k_rowPtrs_ and lclIndsUnpacked_wdv are currently unpacked. Pack
3975  // them, using the packed row offsets array ptr_d that we
3976  // created above.
3977  //
3978  // FIXME (mfh 08 Aug 2014) If "Optimize Storage" is false (in
3979  // CrsMatrix?), we need to keep around the unpacked row
3980  // offsets and column indices.
3981 
3982  // Pack the column indices from unpacked lclIndsUnpacked_wdv into
3983  // packed ind_d. We will replace lclIndsUnpacked_wdv below.
3984  typedef pack_functor<
3985  typename local_graph_device_type::entries_type::non_const_type,
3986  typename local_inds_dualv_type::t_dev::const_type,
3987  row_map_type,
3988  typename local_graph_device_type::row_map_type> inds_packer_type;
3989  inds_packer_type f (ind_d,
3990  lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly),
3991  ptr_d, rowPtrsUnpacked_dev_);
3992  {
3993  typedef typename decltype (ind_d)::execution_space exec_space;
3994  typedef Kokkos::RangePolicy<exec_space, LocalOrdinal> range_type;
3995  Kokkos::parallel_for (range_type (0, lclNumRows), f);
3996  }
3997 
3998  if (debug_) {
3999  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4000  (ptr_d.extent (0) == 0, std::logic_error,
4001  "(\"Optimize Storage\"=true branch) After packing, "
4002  "ptr_d.extent(0)=0. This probably means k_rowPtrs_ was "
4003  "never allocated.");
4004  if (ptr_d.extent (0) != 0) {
4005  const size_t numOffsets = static_cast<size_t> (ptr_d.extent (0));
4006  const auto valToCheck =
4007  ::Tpetra::Details::getEntryOnHost (ptr_d, numOffsets - 1);
4008  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4009  (static_cast<size_t> (valToCheck) != ind_d.extent (0),
4010  std::logic_error, "(\"Optimize Storage\"=true branch) "
4011  "After packing, ptr_d(" << (numOffsets-1) << ")="
4012  << valToCheck << " != ind_d.extent(0)="
4013  << ind_d.extent(0) << ".");
4014  }
4015  }
4016  // Build the local graph.
4017  setRowPtrsPacked(ptr_d_const);
4018  lclIndsPacked_wdv = local_inds_wdv_type(ind_d);
4019  }
4020  else { // We don't have to pack, so just set the pointers.
4021  setRowPtrsPacked(rowPtrsUnpacked_dev_);
4022  lclIndsPacked_wdv = lclIndsUnpacked_wdv;
4023 
4024  if (debug_) {
4025  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4026  (rowPtrsPacked_dev_.extent (0) == 0, std::logic_error,
4027  "(\"Optimize Storage\"=false branch) "
4028  "rowPtrsPacked_dev_.extent(0) = 0. "
4029  "This probably means that "
4030  "k_rowPtrs_ was never allocated.");
4031  if (rowPtrsPacked_dev_.extent (0) != 0) {
4032  const size_t numOffsets =
4033  static_cast<size_t> (rowPtrsPacked_dev_.extent (0));
4034  const size_t valToCheck =
4035  rowPtrsPacked_host_(numOffsets - 1);
4036  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4037  (valToCheck != size_t(lclIndsPacked_wdv.extent (0)),
4038  std::logic_error, "(\"Optimize Storage\"=false branch) "
4039  "rowPtrsPacked_dev_(" << (numOffsets-1) << ")="
4040  << valToCheck
4041  << " != lclIndsPacked_wdv.extent(0)="
4042  << lclIndsPacked_wdv.extent (0) << ".");
4043  }
4044  }
4045  }
4046 
4047  if (debug_) {
4048  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4049  (static_cast<size_t> (rowPtrsPacked_dev_.extent (0)) != lclNumRows + 1,
4050  std::logic_error, "After packing, rowPtrsPacked_dev_.extent(0) = " <<
4051  rowPtrsPacked_dev_.extent (0) << " != lclNumRows+1 = " << (lclNumRows+1)
4052  << ".");
4053  if (rowPtrsPacked_dev_.extent (0) != 0) {
4054  const size_t numOffsets = static_cast<size_t> (rowPtrsPacked_dev_.extent (0));
4055  const auto valToCheck = rowPtrsPacked_host_(numOffsets - 1);
4056  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4057  (static_cast<size_t> (valToCheck) != lclIndsPacked_wdv.extent (0),
4058  std::logic_error, "After packing, rowPtrsPacked_dev_(" << (numOffsets-1)
4059  << ") = " << valToCheck << " != lclIndsPacked_wdv.extent(0) = "
4060  << lclIndsPacked_wdv.extent (0) << ".");
4061  }
4062  }
4063 
4064  if (requestOptimizedStorage) {
4065  // With optimized storage, we don't need to store
4066  // the array of row entry counts.
4067 
4068  // Free graph data structures that are only needed for
4069  // unpacked 1-D storage.
4070  k_numRowEntries_ = row_entries_type ();
4071 
4072  // Keep the new 1-D packed allocations.
4073  setRowPtrsUnpacked(rowPtrsPacked_dev_);
4074  lclIndsUnpacked_wdv = lclIndsPacked_wdv;
4075 
4076  storageStatus_ = Details::STORAGE_1D_PACKED;
4077  }
4078 
4079  set_need_sync_host_uvm_access(); // make sure kernel setup of indices is fenced before a host access
4080  }
4081 
4082  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4083  void
4085  replaceColMap (const Teuchos::RCP<const map_type>& newColMap)
4086  {
4087  // NOTE: This safety check matches the code, but not the documentation of Crsgraph
4088  //
4089  // FIXME (mfh 18 Aug 2014) This will break if the calling process
4090  // has no entries, because in that case, currently it is neither
4091  // locally nor globally indexed. This will change once we get rid
4092  // of lazy allocation (so that the constructor allocates indices
4093  // and therefore commits to local vs. global).
4094  const char tfecfFuncName[] = "replaceColMap: ";
4095  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4096  isLocallyIndexed () || isGloballyIndexed (), std::runtime_error,
4097  "Requires matching maps and non-static graph.");
4098  colMap_ = newColMap;
4099  }
4100 
4101  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4102  void
4104  reindexColumns (const Teuchos::RCP<const map_type>& newColMap,
4105  const Teuchos::RCP<const import_type>& newImport,
4106  const bool sortIndicesInEachRow)
4107  {
4108  using Teuchos::REDUCE_MIN;
4109  using Teuchos::reduceAll;
4110  using Teuchos::RCP;
4111  typedef GlobalOrdinal GO;
4112  typedef LocalOrdinal LO;
4113  typedef typename local_inds_dualv_type::t_host col_inds_type;
4114  const char tfecfFuncName[] = "reindexColumns: ";
4115 
4116  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4117  isFillComplete (), std::runtime_error, "The graph is fill complete "
4118  "(isFillComplete() returns true). You must call resumeFill() before "
4119  "you may call this method.");
4120 
4121  // mfh 19 Aug 2014: This method does NOT redistribute data; it
4122  // doesn't claim to do the work of an Import or Export. This
4123  // means that for all processes, the calling process MUST own all
4124  // column indices, in both the old column Map (if it exists) and
4125  // the new column Map. We check this via an all-reduce.
4126  //
4127  // Some processes may be globally indexed, others may be locally
4128  // indexed, and others (that have no graph entries) may be
4129  // neither. This method will NOT change the graph's current
4130  // state. If it's locally indexed, it will stay that way, and
4131  // vice versa. It would easy to add an option to convert indices
4132  // from global to local, so as to save a global-to-local
4133  // conversion pass. However, we don't do this here. The intended
4134  // typical use case is that the graph already has a column Map and
4135  // is locally indexed, and this is the case for which we optimize.
4136 
4137  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
4138 
4139  // Attempt to convert indices to the new column Map's version of
4140  // local. This will fail if on the calling process, the graph has
4141  // indices that are not on that process in the new column Map.
4142  // After the local conversion attempt, we will do an all-reduce to
4143  // see if any processes failed.
4144 
4145  // If this is false, then either the graph contains a column index
4146  // which is invalid in the CURRENT column Map, or the graph is
4147  // locally indexed but currently has no column Map. In either
4148  // case, there is no way to convert the current local indices into
4149  // global indices, so that we can convert them into the new column
4150  // Map's local indices. It's possible for this to be true on some
4151  // processes but not others, due to replaceColMap.
4152  bool allCurColIndsValid = true;
4153  // On the calling process, are all valid current column indices
4154  // also in the new column Map on the calling process? In other
4155  // words, does local reindexing suffice, or should the user have
4156  // done an Import or Export instead?
4157  bool localSuffices = true;
4158 
4159  // Final arrays for the local indices. We will allocate exactly
4160  // one of these ONLY if the graph is locally indexed on the
4161  // calling process, and ONLY if the graph has one or more entries
4162  // (is not empty) on the calling process. In that case, we
4163  // allocate the first (1-D storage) if the graph has a static
4164  // profile, else we allocate the second (2-D storage).
4165  col_inds_type newLclInds1D;
4166  auto oldLclInds1D = lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
4167 
4168  // If indices aren't allocated, that means the calling process
4169  // owns no entries in the graph. Thus, there is nothing to
4170  // convert, and it trivially succeeds locally.
4171  if (indicesAreAllocated ()) {
4172  if (isLocallyIndexed ()) {
4173  if (hasColMap ()) { // locally indexed, and currently has a column Map
4174  const map_type& oldColMap = * (getColMap ());
4175  // Allocate storage for the new local indices.
4176  const size_t allocSize = this->getNodeAllocationSize ();
4177  newLclInds1D = col_inds_type("Tpetra::CrsGraph::lclIndsReindexedHost",
4178  allocSize);
4179  // Attempt to convert the new indices locally.
4180  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4181  const RowInfo rowInfo = this->getRowInfo (lclRow);
4182  const size_t beg = rowInfo.offset1D;
4183  const size_t end = beg + rowInfo.numEntries;
4184  for (size_t k = beg; k < end; ++k) {
4185  const LO oldLclCol = oldLclInds1D(k);
4186  if (oldLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4187  allCurColIndsValid = false;
4188  break; // Stop at the first invalid index
4189  }
4190  const GO gblCol = oldColMap.getGlobalElement (oldLclCol);
4191 
4192  // The above conversion MUST succeed. Otherwise, the
4193  // current local index is invalid, which means that
4194  // the graph was constructed incorrectly.
4195  if (gblCol == Teuchos::OrdinalTraits<GO>::invalid ()) {
4196  allCurColIndsValid = false;
4197  break; // Stop at the first invalid index
4198  }
4199  else {
4200  const LO newLclCol = newColMap->getLocalElement (gblCol);
4201  if (newLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4202  localSuffices = false;
4203  break; // Stop at the first invalid index
4204  }
4205  newLclInds1D(k) = newLclCol;
4206  }
4207  } // for each entry in the current row
4208  } // for each locally owned row
4209  }
4210  else { // locally indexed, but no column Map
4211  // This case is only possible if replaceColMap() was called
4212  // with a null argument on the calling process. It's
4213  // possible, but it means that this method can't possibly
4214  // succeed, since we have no way of knowing how to convert
4215  // the current local indices to global indices.
4216  allCurColIndsValid = false;
4217  }
4218  }
4219  else { // globally indexed
4220  // If the graph is globally indexed, we don't need to save
4221  // local indices, but we _do_ need to know whether the current
4222  // global indices are valid in the new column Map. We may
4223  // need to do a getRemoteIndexList call to find this out.
4224  //
4225  // In this case, it doesn't matter whether the graph currently
4226  // has a column Map. We don't need the old column Map to
4227  // convert from global indices to the _new_ column Map's local
4228  // indices. Furthermore, we can use the same code, whether
4229  // the graph is static or dynamic profile.
4230 
4231  // Test whether the current global indices are in the new
4232  // column Map on the calling process.
4233  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4234  const RowInfo rowInfo = this->getRowInfo (lclRow);
4235  auto oldGblRowView = this->getGlobalIndsViewHost (rowInfo);
4236  for (size_t k = 0; k < rowInfo.numEntries; ++k) {
4237  const GO gblCol = oldGblRowView(k);
4238  if (! newColMap->isNodeGlobalElement (gblCol)) {
4239  localSuffices = false;
4240  break; // Stop at the first invalid index
4241  }
4242  } // for each entry in the current row
4243  } // for each locally owned row
4244  } // locally or globally indexed
4245  } // whether indices are allocated
4246 
4247  // Do an all-reduce to check both possible error conditions.
4248  int lclSuccess[2];
4249  lclSuccess[0] = allCurColIndsValid ? 1 : 0;
4250  lclSuccess[1] = localSuffices ? 1 : 0;
4251  int gblSuccess[2];
4252  gblSuccess[0] = 0;
4253  gblSuccess[1] = 0;
4254  RCP<const Teuchos::Comm<int> > comm =
4255  getRowMap ().is_null () ? Teuchos::null : getRowMap ()->getComm ();
4256  if (! comm.is_null ()) {
4257  reduceAll<int, int> (*comm, REDUCE_MIN, 2, lclSuccess, gblSuccess);
4258  }
4259 
4260  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4261  gblSuccess[0] == 0, std::runtime_error, "It is not possible to continue."
4262  " The most likely reason is that the graph is locally indexed, but the "
4263  "column Map is missing (null) on some processes, due to a previous call "
4264  "to replaceColMap().");
4265 
4266  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4267  gblSuccess[1] == 0, std::runtime_error, "On some process, the graph "
4268  "contains column indices that are in the old column Map, but not in the "
4269  "new column Map (on that process). This method does NOT redistribute "
4270  "data; it does not claim to do the work of an Import or Export operation."
4271  " This means that for all processess, the calling process MUST own all "
4272  "column indices, in both the old column Map and the new column Map. In "
4273  "this case, you will need to do an Import or Export operation to "
4274  "redistribute data.");
4275 
4276  // Commit the results.
4277  if (isLocallyIndexed ()) {
4278  { // scope the device view; sortAndMergeAllIndices needs host
4279  typename local_inds_dualv_type::t_dev newLclInds1D_dev(
4280  Kokkos::view_alloc("Tpetra::CrsGraph::lclIndReindexed",
4281  Kokkos::WithoutInitializing),
4282  newLclInds1D.extent(0));
4283  Kokkos::deep_copy(newLclInds1D_dev, newLclInds1D);
4284  lclIndsUnpacked_wdv = local_inds_wdv_type(newLclInds1D_dev);
4285  }
4286 
4287  // We've reindexed, so we don't know if the indices are sorted.
4288  //
4289  // FIXME (mfh 17 Sep 2014) It could make sense to check this,
4290  // since we're already going through all the indices above. We
4291  // could also sort each row in place; that way, we would only
4292  // have to make one pass over the rows.
4293  indicesAreSorted_ = false;
4294  if (sortIndicesInEachRow) {
4295  // NOTE (mfh 17 Sep 2014) The graph must be locally indexed in
4296  // order to call this method.
4297  //
4298  // FIXME (mfh 17 Sep 2014) This violates the strong exception
4299  // guarantee. It would be better to sort the new index arrays
4300  // before committing them.
4301  const bool sorted = false; // need to resort
4302  const bool merged = true; // no need to merge, since no dups
4303  this->sortAndMergeAllIndices (sorted, merged);
4304  }
4305  }
4306  colMap_ = newColMap;
4307 
4308  if (newImport.is_null ()) {
4309  // FIXME (mfh 19 Aug 2014) Should use the above all-reduce to
4310  // check whether the input Import is null on any process.
4311  //
4312  // If the domain Map hasn't been set yet, we can't compute a new
4313  // Import object. Leave it what it is; it should be null, but
4314  // it doesn't matter. If the domain Map _has_ been set, then
4315  // compute a new Import object if necessary.
4316  if (! domainMap_.is_null ()) {
4317  if (! domainMap_->isSameAs (* newColMap)) {
4318  importer_ = Teuchos::rcp (new import_type (domainMap_, newColMap));
4319  } else {
4320  importer_ = Teuchos::null; // don't need an Import
4321  }
4322  }
4323  } else {
4324  // The caller gave us an Import object. Assume that it's valid.
4325  importer_ = newImport;
4326  }
4327  }
4328 
4329  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4330  void
4332  replaceDomainMap (const Teuchos::RCP<const map_type>& newDomainMap)
4333  {
4334  const char prefix[] = "Tpetra::CrsGraph::replaceDomainMap: ";
4335  TEUCHOS_TEST_FOR_EXCEPTION(
4336  colMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4337  "this method unless the graph already has a column Map.");
4338  TEUCHOS_TEST_FOR_EXCEPTION(
4339  newDomainMap.is_null (), std::invalid_argument,
4340  prefix << "The new domain Map must be nonnull.");
4341 
4342  // Create a new importer, if needed
4343  Teuchos::RCP<const import_type> newImporter = Teuchos::null;
4344  if (newDomainMap != colMap_ && (! newDomainMap->isSameAs (*colMap_))) {
4345  newImporter = rcp(new import_type(newDomainMap, colMap_));
4346  }
4347  this->replaceDomainMapAndImporter(newDomainMap, newImporter);
4348  }
4349 
4350  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4351  void
4353  replaceDomainMapAndImporter (const Teuchos::RCP<const map_type>& newDomainMap,
4354  const Teuchos::RCP<const import_type>& newImporter)
4355  {
4356  const char prefix[] = "Tpetra::CrsGraph::replaceDomainMapAndImporter: ";
4357  TEUCHOS_TEST_FOR_EXCEPTION(
4358  colMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4359  "this method unless the graph already has a column Map.");
4360  TEUCHOS_TEST_FOR_EXCEPTION(
4361  newDomainMap.is_null (), std::invalid_argument,
4362  prefix << "The new domain Map must be nonnull.");
4363 
4364  if (debug_) {
4365  if (newImporter.is_null ()) {
4366  // It's not a good idea to put expensive operations in a macro
4367  // clause, even if they are side effect - free, because macros
4368  // don't promise that they won't evaluate their arguments more
4369  // than once. It's polite for them to do so, but not required.
4370  const bool colSameAsDom = colMap_->isSameAs (*newDomainMap);
4371  TEUCHOS_TEST_FOR_EXCEPTION
4372  (!colSameAsDom, std::invalid_argument, "If the new Import is null, "
4373  "then the new domain Map must be the same as the current column Map.");
4374  }
4375  else {
4376  const bool colSameAsTgt =
4377  colMap_->isSameAs (* (newImporter->getTargetMap ()));
4378  const bool newDomSameAsSrc =
4379  newDomainMap->isSameAs (* (newImporter->getSourceMap ()));
4380  TEUCHOS_TEST_FOR_EXCEPTION
4381  (! colSameAsTgt || ! newDomSameAsSrc, std::invalid_argument, "If the "
4382  "new Import is nonnull, then the current column Map must be the same "
4383  "as the new Import's target Map, and the new domain Map must be the "
4384  "same as the new Import's source Map.");
4385  }
4386  }
4387 
4388  domainMap_ = newDomainMap;
4389  importer_ = Teuchos::rcp_const_cast<import_type> (newImporter);
4390  }
4391 
4392  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4393  void
4395  replaceRangeMap (const Teuchos::RCP<const map_type>& newRangeMap)
4396  {
4397  const char prefix[] = "Tpetra::CrsGraph::replaceRangeMap: ";
4398  TEUCHOS_TEST_FOR_EXCEPTION(
4399  rowMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4400  "this method unless the graph already has a row Map.");
4401  TEUCHOS_TEST_FOR_EXCEPTION(
4402  newRangeMap.is_null (), std::invalid_argument,
4403  prefix << "The new range Map must be nonnull.");
4404 
4405  // Create a new exporter, if needed
4406  Teuchos::RCP<const export_type> newExporter = Teuchos::null;
4407  if (newRangeMap != rowMap_ && (! newRangeMap->isSameAs (*rowMap_))) {
4408  newExporter = rcp(new export_type(rowMap_, newRangeMap));
4409  }
4410  this->replaceRangeMapAndExporter(newRangeMap, newExporter);
4411  }
4412 
4413  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4414  void
4416  replaceRangeMapAndExporter (const Teuchos::RCP<const map_type>& newRangeMap,
4417  const Teuchos::RCP<const export_type>& newExporter)
4418  {
4419  const char prefix[] = "Tpetra::CrsGraph::replaceRangeMapAndExporter: ";
4420  TEUCHOS_TEST_FOR_EXCEPTION(
4421  rowMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4422  "this method unless the graph already has a column Map.");
4423  TEUCHOS_TEST_FOR_EXCEPTION(
4424  newRangeMap.is_null (), std::invalid_argument,
4425  prefix << "The new domain Map must be nonnull.");
4426 
4427  if (debug_) {
4428  if (newExporter.is_null ()) {
4429  // It's not a good idea to put expensive operations in a macro
4430  // clause, even if they are side effect - free, because macros
4431  // don't promise that they won't evaluate their arguments more
4432  // than once. It's polite for them to do so, but not required.
4433  const bool rowSameAsRange = rowMap_->isSameAs (*newRangeMap);
4434  TEUCHOS_TEST_FOR_EXCEPTION
4435  (!rowSameAsRange, std::invalid_argument, "If the new Export is null, "
4436  "then the new range Map must be the same as the current row Map.");
4437  }
4438  else {
4439  const bool newRangeSameAsTgt =
4440  newRangeMap->isSameAs (* (newExporter->getTargetMap ()));
4441  const bool rowSameAsSrc =
4442  rowMap_->isSameAs (* (newExporter->getSourceMap ()));
4443  TEUCHOS_TEST_FOR_EXCEPTION
4444  (! rowSameAsSrc || ! newRangeSameAsTgt, std::invalid_argument, "If the "
4445  "new Export is nonnull, then the current row Map must be the same "
4446  "as the new Export's source Map, and the new range Map must be the "
4447  "same as the new Export's target Map.");
4448  }
4449  }
4450 
4451  rangeMap_ = newRangeMap;
4452  exporter_ = Teuchos::rcp_const_cast<export_type> (newExporter);
4453  }
4454 
4455 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
4456  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4459  getLocalGraph () const
4460  {
4461  return getLocalGraphDevice();
4462  }
4463 #endif
4464 
4465  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4468  getLocalGraphDevice () const
4469  {
4470  return local_graph_device_type(
4471  lclIndsPacked_wdv.getDeviceView(Access::ReadWrite),
4472  rowPtrsPacked_dev_);
4473  }
4474 
4475  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4478  getLocalGraphHost () const
4479  {
4480  return local_graph_host_type(
4481  lclIndsPacked_wdv.getHostView(Access::ReadWrite),
4482  rowPtrsPacked_host_);
4483  }
4484 
4485  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4486  void
4489  {
4490  using ::Tpetra::Details::ProfilingRegion;
4491  using Teuchos::ArrayView;
4492  using Teuchos::outArg;
4493  using Teuchos::reduceAll;
4494  typedef global_size_t GST;
4495 
4496  ProfilingRegion regionCGC ("Tpetra::CrsGraph::computeGlobalConstants");
4497 
4498  this->computeLocalConstants ();
4499 
4500  // Compute global constants from local constants. Processes that
4501  // already have local constants still participate in the
4502  // all-reduces, using their previously computed values.
4503  if (! this->haveGlobalConstants_) {
4504  const Teuchos::Comm<int>& comm = * (this->getComm ());
4505  // Promote all the nodeNum* and nodeMaxNum* quantities from
4506  // size_t to global_size_t, when doing the all-reduces for
4507  // globalNum* / globalMaxNum* results.
4508  //
4509  // FIXME (mfh 07 May 2013) Unfortunately, we either have to do
4510  // this in two all-reduces (one for the sum and the other for
4511  // the max), or use a custom MPI_Op that combines the sum and
4512  // the max. The latter might even be slower than two
4513  // all-reduces on modern network hardware. It would also be a
4514  // good idea to use nonblocking all-reduces (MPI 3), so that we
4515  // don't have to wait around for the first one to finish before
4516  // starting the second one.
4517  GST lcl, gbl;
4518  lcl = static_cast<GST> (this->getNodeNumEntries ());
4519 
4520  reduceAll<int,GST> (comm, Teuchos::REDUCE_SUM, 1, &lcl, &gbl);
4521  this->globalNumEntries_ = gbl;
4522 
4523  const GST lclMaxNumRowEnt = static_cast<GST> (this->nodeMaxNumRowEntries_);
4524  reduceAll<int, GST> (comm, Teuchos::REDUCE_MAX, lclMaxNumRowEnt,
4525  outArg (this->globalMaxNumRowEntries_));
4526  this->haveGlobalConstants_ = true;
4527  }
4528  }
4529 
4530 
4531  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4532  void
4535  {
4536  using ::Tpetra::Details::ProfilingRegion;
4537 
4538  ProfilingRegion regionCLC ("Tpetra::CrsGraph::computeLocalConstants");
4539  if (this->haveLocalConstants_) {
4540  return;
4541  }
4542 
4543  // Reset local properties
4544  this->nodeMaxNumRowEntries_ =
4545  Teuchos::OrdinalTraits<size_t>::invalid();
4546 
4547  using LO = local_ordinal_type;
4548 
4549  auto ptr = this->rowPtrsPacked_dev_;
4550  const LO lclNumRows = ptr.extent(0) == 0 ?
4551  static_cast<LO> (0) :
4552  (static_cast<LO> (ptr.extent(0)) - static_cast<LO> (1));
4553 
4554  const LO lclMaxNumRowEnt =
4555  ::Tpetra::Details::maxDifference ("Tpetra::CrsGraph: nodeMaxNumRowEntries",
4556  ptr, lclNumRows);
4557  this->nodeMaxNumRowEntries_ = static_cast<size_t> (lclMaxNumRowEnt);
4558  this->haveLocalConstants_ = true;
4559  }
4560 
4561 
4562  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4563  std::pair<size_t, std::string>
4565  makeIndicesLocal (const bool verbose)
4566  {
4568  using Teuchos::arcp;
4569  using Teuchos::Array;
4570  using std::endl;
4571  typedef LocalOrdinal LO;
4572  typedef GlobalOrdinal GO;
4573  typedef device_type DT;
4574  typedef typename local_graph_device_type::row_map_type::non_const_value_type offset_type;
4575  typedef decltype (k_numRowEntries_) row_entries_type;
4576  typedef typename row_entries_type::non_const_value_type num_ent_type;
4577  const char tfecfFuncName[] = "makeIndicesLocal: ";
4578  ProfilingRegion regionMakeIndicesLocal ("Tpetra::CrsGraph::makeIndicesLocal");
4579 
4580  std::unique_ptr<std::string> prefix;
4581  if (verbose) {
4582  prefix = this->createPrefix("CrsGraph", "makeIndicesLocal");
4583  std::ostringstream os;
4584  os << *prefix << "lclNumRows: " << getNodeNumRows() << endl;
4585  std::cerr << os.str();
4586  }
4587 
4588  // These are somewhat global properties, so it's safe to have
4589  // exception checks for them, rather than returning an error code.
4590  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4591  (! this->hasColMap (), std::logic_error, "The graph does not have a "
4592  "column Map yet. This method should never be called in that case. "
4593  "Please report this bug to the Tpetra developers.");
4594  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4595  (this->getColMap ().is_null (), std::logic_error, "The graph claims "
4596  "that it has a column Map, because hasColMap() returns true. However, "
4597  "the result of getColMap() is null. This should never happen. Please "
4598  "report this bug to the Tpetra developers.");
4599 
4600  // Return value 1: The number of column indices (counting
4601  // duplicates) that could not be converted to local indices,
4602  // because they were not in the column Map on the calling process.
4603  size_t lclNumErrs = 0;
4604  std::ostringstream errStrm; // for return value 2 (error string)
4605 
4606  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
4607  const map_type& colMap = * (this->getColMap ());
4608 
4609  if (this->isGloballyIndexed () && lclNumRows != 0) {
4610  // This is a host-accessible View.
4611  typename row_entries_type::const_type h_numRowEnt =
4612  this->k_numRowEntries_;
4613 
4614  // Allocate space for local indices.
4615  if (rowPtrsUnpacked_host_.extent (0) == 0) {
4616  errStrm << "k_rowPtrs_.extent(0) == 0. This should never "
4617  "happen here. Please report this bug to the Tpetra developers."
4618  << endl;
4619  // Need to return early.
4620  return std::make_pair(Tpetra::Details::OrdinalTraits<size_t>::invalid (),
4621  errStrm.str ());
4622  }
4623  const auto numEnt = rowPtrsUnpacked_host_(lclNumRows);
4624 
4625  // mfh 17 Dec 2016: We don't need initial zero-fill of
4626  // lclIndsUnpacked_wdv, because we will fill it below anyway.
4627  // AllowPadding would only help for aligned access (e.g.,
4628  // for vectorization) if we also were to pad each row to the
4629  // same alignment, so we'll skip AllowPadding for now.
4630 
4631  // using Kokkos::AllowPadding;
4632  using Kokkos::view_alloc;
4633  using Kokkos::WithoutInitializing;
4634 
4635  // When giving the label as an argument to
4636  // Kokkos::view_alloc, the label must be a string and not a
4637  // char*, else the code won't compile. This is because
4638  // view_alloc also allows a raw pointer as its first
4639  // argument. See
4640  // https://github.com/kokkos/kokkos/issues/434. This is a
4641  // large allocation typically, so the overhead of creating
4642  // an std::string is minor.
4643  const std::string label ("Tpetra::CrsGraph::lclInd");
4644  if (verbose) {
4645  std::ostringstream os;
4646  os << *prefix << "(Re)allocate lclInd_wdv: old="
4647  << lclIndsUnpacked_wdv.extent(0) << ", new=" << numEnt << endl;
4648  std::cerr << os.str();
4649  }
4650 
4651  local_inds_dualv_type lclInds_dualv =
4652  local_inds_dualv_type(view_alloc(label, WithoutInitializing),
4653  numEnt);
4654  lclIndsUnpacked_wdv = local_inds_wdv_type(lclInds_dualv);
4655 
4656  auto lclColMap = colMap.getLocalMap ();
4657  // This is a "device mirror" of the host View h_numRowEnt.
4658  //
4659  // NOTE (mfh 27 Sep 2016) Currently, the right way to get a
4660  // Device instance is to use its default constructor. See the
4661  // following Kokkos issue:
4662  //
4663  // https://github.com/kokkos/kokkos/issues/442
4664  if (verbose) {
4665  std::ostringstream os;
4666  os << *prefix << "Allocate device mirror k_numRowEnt: "
4667  << h_numRowEnt.extent(0) << endl;
4668  std::cerr << os.str();
4669  }
4670  auto k_numRowEnt =
4671  Kokkos::create_mirror_view_and_copy (device_type (), h_numRowEnt);
4672 
4674  lclNumErrs =
4675  convertColumnIndicesFromGlobalToLocal<LO, GO, DT, offset_type, num_ent_type> (
4676  lclIndsUnpacked_wdv.getDeviceView(Access::OverwriteAll),
4677  gblInds_wdv.getDeviceView(Access::ReadOnly),
4678  rowPtrsUnpacked_dev_,
4679  lclColMap,
4680  k_numRowEnt);
4681  if (lclNumErrs != 0) {
4682  const int myRank = [this] () {
4683  auto map = this->getMap ();
4684  if (map.is_null ()) {
4685  return 0;
4686  }
4687  else {
4688  auto comm = map->getComm ();
4689  return comm.is_null () ? 0 : comm->getRank ();
4690  }
4691  } ();
4692  const bool pluralNumErrs = (lclNumErrs != static_cast<size_t> (1));
4693  errStrm << "(Process " << myRank << ") When converting column "
4694  "indices from global to local, we encountered " << lclNumErrs
4695  << " ind" << (pluralNumErrs ? "ices" : "ex")
4696  << " that do" << (pluralNumErrs ? "es" : "")
4697  << " not live in the column Map on this process." << endl;
4698  }
4699 
4700  // We've converted column indices from global to local, so we
4701  // can deallocate the global column indices (which we know are
4702  // in 1-D storage, because the graph has static profile).
4703  if (verbose) {
4704  std::ostringstream os;
4705  os << *prefix << "Free gblInds_wdv: "
4706  << gblInds_wdv.extent(0) << endl;
4707  std::cerr << os.str();
4708  }
4709  gblInds_wdv = global_inds_wdv_type ();
4710  } // globallyIndexed() && lclNumRows > 0
4711 
4712  this->indicesAreLocal_ = true;
4713  this->indicesAreGlobal_ = false;
4714  this->checkInternalState ();
4715 
4716  return std::make_pair (lclNumErrs, errStrm.str ());
4717  }
4718 
4719  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4720  void
4722  makeColMap (Teuchos::Array<int>& remotePIDs)
4723  {
4725  using std::endl;
4726  const char tfecfFuncName[] = "makeColMap";
4727 
4728  ProfilingRegion regionSortAndMerge ("Tpetra::CrsGraph::makeColMap");
4729  std::unique_ptr<std::string> prefix;
4730  if (verbose_) {
4731  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4732  std::ostringstream os;
4733  os << *prefix << "Start" << endl;
4734  std::cerr << os.str();
4735  }
4736 
4737  // this->colMap_ should be null at this point, but we accept the
4738  // future possibility that it might not be (esp. if we decide
4739  // later to support graph structure changes after first
4740  // fillComplete, which CrsGraph does not currently (as of 12 Feb
4741  // 2017) support).
4742  Teuchos::RCP<const map_type> colMap = this->colMap_;
4743  const bool sortEachProcsGids =
4744  this->sortGhostsAssociatedWithEachProcessor_;
4745 
4746  // FIXME (mfh 12 Feb 2017) ::Tpetra::Details::makeColMap returns a
4747  // per-process error code. If an error does occur on a process,
4748  // ::Tpetra::Details::makeColMap does NOT promise that all processes will
4749  // notice that error. This is the caller's responsibility. For
4750  // now, we only propagate (to all processes) and report the error
4751  // in debug mode. In the future, we need to add the local/global
4752  // error handling scheme used in BlockCrsMatrix to this class.
4753  if (debug_) {
4754  using Teuchos::outArg;
4755  using Teuchos::REDUCE_MIN;
4756  using Teuchos::reduceAll;
4757 
4758  std::ostringstream errStrm;
4759  const int lclErrCode =
4760  Details::makeColMap (colMap, remotePIDs,
4761  getDomainMap (), *this, sortEachProcsGids, &errStrm);
4762  auto comm = this->getComm ();
4763  if (! comm.is_null ()) {
4764  const int lclSuccess = (lclErrCode == 0) ? 1 : 0;
4765  int gblSuccess = 0; // output argument
4766  reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess,
4767  outArg (gblSuccess));
4768  if (gblSuccess != 1) {
4769  std::ostringstream os;
4770  Details::gathervPrint (os, errStrm.str (), *comm);
4771  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4772  (true, std::runtime_error, ": An error happened on at "
4773  "least one process in the CrsGraph's communicator. "
4774  "Here are all processes' error messages:" << std::endl
4775  << os.str ());
4776  }
4777  }
4778  }
4779  else {
4780  (void) Details::makeColMap (colMap, remotePIDs,
4781  getDomainMap (), *this, sortEachProcsGids, nullptr);
4782  }
4783  // See above. We want to admit the possibility of makeColMap
4784  // actually revising an existing column Map, even though that
4785  // doesn't currently (as of 10 May 2017) happen.
4786  this->colMap_ = colMap;
4787 
4788  checkInternalState ();
4789  if (verbose_) {
4790  std::ostringstream os;
4791  os << *prefix << "Done" << endl;
4792  std::cerr << os.str();
4793  }
4794  }
4795 
4796 
4797  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4798  void
4800  sortAndMergeAllIndices (const bool sorted, const bool merged)
4801  {
4802  using std::endl;
4803  using LO = LocalOrdinal;
4804  using host_execution_space =
4805  typename Kokkos::View<LO*, device_type>::HostMirror::
4806  execution_space;
4807  using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
4808  const char tfecfFuncName[] = "sortAndMergeAllIndices";
4809  Details::ProfilingRegion regionSortAndMerge
4810  ("Tpetra::CrsGraph::sortAndMergeAllIndices");
4811 
4812  std::unique_ptr<std::string> prefix;
4813  if (verbose_) {
4814  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4815  std::ostringstream os;
4816  os << *prefix << "Start: "
4817  << "sorted=" << (sorted ? "true" : "false")
4818  << ", merged=" << (merged ? "true" : "false") << endl;
4819  std::cerr << os.str();
4820  }
4821  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4822  (this->isGloballyIndexed(), std::logic_error,
4823  "This method may only be called after makeIndicesLocal." );
4824  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4825  (! merged && this->isStorageOptimized(), std::logic_error,
4826  "The graph is already storage optimized, so we shouldn't be "
4827  "merging any indices. "
4828  "Please report this bug to the Tpetra developers.");
4829 
4830  if (! sorted || ! merged) {
4831  const LO lclNumRows(this->getNodeNumRows());
4832  auto range = range_type(0, lclNumRows);
4833 
4834  if (verbose_) {
4835  size_t totalNumDups = 0;
4836  Kokkos::parallel_reduce(range,
4837  [this, sorted, merged] (const LO lclRow, size_t& numDups)
4838  {
4839  const RowInfo rowInfo = this->getRowInfo(lclRow);
4840  numDups += this->sortAndMergeRowIndices(rowInfo, sorted, merged);
4841  },
4842  totalNumDups);
4843  std::ostringstream os;
4844  os << *prefix << "totalNumDups=" << totalNumDups << endl;
4845  std::cerr << os.str();
4846  }
4847  else {
4848  Kokkos::parallel_for(range,
4849  [this, sorted, merged] (const LO lclRow)
4850  {
4851  const RowInfo rowInfo = this->getRowInfo(lclRow);
4852  this->sortAndMergeRowIndices(rowInfo, sorted, merged);
4853  });
4854  }
4855  this->indicesAreSorted_ = true; // we just sorted every row
4856  this->noRedundancies_ = true; // we just merged every row
4857  }
4858 
4859  if (verbose_) {
4860  std::ostringstream os;
4861  os << *prefix << "Done" << endl;
4862  std::cerr << os.str();
4863  }
4864  }
4865 
4866  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4867  void
4869  makeImportExport (Teuchos::Array<int>& remotePIDs,
4870  const bool useRemotePIDs)
4871  {
4872  using ::Tpetra::Details::ProfilingRegion;
4873  using Teuchos::ParameterList;
4874  using Teuchos::RCP;
4875  using Teuchos::rcp;
4876  const char tfecfFuncName[] = "makeImportExport: ";
4877  ProfilingRegion regionMIE ("Tpetra::CrsGraph::makeImportExport");
4878 
4879  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4880  (! this->hasColMap (), std::logic_error,
4881  "This method may not be called unless the graph has a column Map.");
4882  RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
4883 
4884  // Don't do any checks to see if we need to create the Import, if
4885  // it exists already.
4886  //
4887  // FIXME (mfh 25 Mar 2013) This will become incorrect if we
4888  // change CrsGraph in the future to allow changing the column
4889  // Map after fillComplete. For now, the column Map is fixed
4890  // after the first fillComplete call.
4891  if (importer_.is_null ()) {
4892  // Create the Import instance if necessary.
4893  if (domainMap_ != colMap_ && (! domainMap_->isSameAs (*colMap_))) {
4894  if (params.is_null () || ! params->isSublist ("Import")) {
4895  if (useRemotePIDs) {
4896  importer_ = rcp (new import_type (domainMap_, colMap_, remotePIDs));
4897  }
4898  else {
4899  importer_ = rcp (new import_type (domainMap_, colMap_));
4900  }
4901  }
4902  else {
4903  RCP<ParameterList> importSublist = sublist (params, "Import", true);
4904  if (useRemotePIDs) {
4905  RCP<import_type> newImp =
4906  rcp (new import_type (domainMap_, colMap_, remotePIDs,
4907  importSublist));
4908  importer_ = newImp;
4909  }
4910  else {
4911  importer_ = rcp (new import_type (domainMap_, colMap_, importSublist));
4912  }
4913  }
4914  }
4915  }
4916 
4917  // Don't do any checks to see if we need to create the Export, if
4918  // it exists already.
4919  if (exporter_.is_null ()) {
4920  // Create the Export instance if necessary.
4921  if (rangeMap_ != rowMap_ && ! rangeMap_->isSameAs (*rowMap_)) {
4922  if (params.is_null () || ! params->isSublist ("Export")) {
4923  exporter_ = rcp (new export_type (rowMap_, rangeMap_));
4924  }
4925  else {
4926  RCP<ParameterList> exportSublist = sublist (params, "Export", true);
4927  exporter_ = rcp (new export_type (rowMap_, rangeMap_, exportSublist));
4928  }
4929  }
4930  }
4931  }
4932 
4933 
4934  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4935  std::string
4937  description () const
4938  {
4939  std::ostringstream oss;
4940  oss << dist_object_type::description ();
4941  if (isFillComplete ()) {
4942  oss << "{status = fill complete"
4943  << ", global rows = " << getGlobalNumRows()
4944  << ", global cols = " << getGlobalNumCols()
4945  << ", global num entries = " << getGlobalNumEntries()
4946  << "}";
4947  }
4948  else {
4949  oss << "{status = fill not complete"
4950  << ", global rows = " << getGlobalNumRows()
4951  << "}";
4952  }
4953  return oss.str();
4954  }
4955 
4956 
4957  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4958  void
4960  describe (Teuchos::FancyOStream &out,
4961  const Teuchos::EVerbosityLevel verbLevel) const
4962  {
4963  using Teuchos::ArrayView;
4964  using Teuchos::Comm;
4965  using Teuchos::RCP;
4966  using Teuchos::VERB_DEFAULT;
4967  using Teuchos::VERB_NONE;
4968  using Teuchos::VERB_LOW;
4969  using Teuchos::VERB_MEDIUM;
4970  using Teuchos::VERB_HIGH;
4971  using Teuchos::VERB_EXTREME;
4972  using std::endl;
4973  using std::setw;
4974 
4975  Teuchos::EVerbosityLevel vl = verbLevel;
4976  if (vl == VERB_DEFAULT) vl = VERB_LOW;
4977  RCP<const Comm<int> > comm = this->getComm();
4978  const int myImageID = comm->getRank(),
4979  numImages = comm->getSize();
4980  size_t width = 1;
4981  for (size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
4982  ++width;
4983  }
4984  width = std::max<size_t> (width, static_cast<size_t> (11)) + 2;
4985  Teuchos::OSTab tab (out);
4986  // none: print nothing
4987  // low: print O(1) info from node 0
4988  // medium: print O(P) info, num entries per node
4989  // high: print O(N) info, num entries per row
4990  // extreme: print O(NNZ) info: print graph indices
4991  //
4992  // for medium and higher, print constituent objects at specified verbLevel
4993  if (vl != VERB_NONE) {
4994  if (myImageID == 0) out << this->description() << std::endl;
4995  // O(1) globals, minus what was already printed by description()
4996  if (isFillComplete() && myImageID == 0) {
4997  out << "Global max number of row entries = " << globalMaxNumRowEntries_ << std::endl;
4998  }
4999  // constituent objects
5000  if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
5001  if (myImageID == 0) out << "\nRow map: " << std::endl;
5002  rowMap_->describe(out,vl);
5003  if (colMap_ != Teuchos::null) {
5004  if (myImageID == 0) out << "\nColumn map: " << std::endl;
5005  colMap_->describe(out,vl);
5006  }
5007  if (domainMap_ != Teuchos::null) {
5008  if (myImageID == 0) out << "\nDomain map: " << std::endl;
5009  domainMap_->describe(out,vl);
5010  }
5011  if (rangeMap_ != Teuchos::null) {
5012  if (myImageID == 0) out << "\nRange map: " << std::endl;
5013  rangeMap_->describe(out,vl);
5014  }
5015  }
5016  // O(P) data
5017  if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
5018  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
5019  if (myImageID == imageCtr) {
5020  out << "Node ID = " << imageCtr << std::endl
5021  << "Node number of entries = " << this->getNodeNumEntries () << std::endl
5022  << "Node max number of entries = " << nodeMaxNumRowEntries_ << std::endl;
5023  if (! indicesAreAllocated ()) {
5024  out << "Indices are not allocated." << std::endl;
5025  }
5026  }
5027  comm->barrier();
5028  comm->barrier();
5029  comm->barrier();
5030  }
5031  }
5032  // O(N) and O(NNZ) data
5033  if (vl == VERB_HIGH || vl == VERB_EXTREME) {
5034  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
5035  if (myImageID == imageCtr) {
5036  out << std::setw(width) << "Node ID"
5037  << std::setw(width) << "Global Row"
5038  << std::setw(width) << "Num Entries";
5039  if (vl == VERB_EXTREME) {
5040  out << " Entries";
5041  }
5042  out << std::endl;
5043  const LocalOrdinal lclNumRows =
5044  static_cast<LocalOrdinal> (this->getNodeNumRows ());
5045  for (LocalOrdinal r=0; r < lclNumRows; ++r) {
5046  const RowInfo rowinfo = this->getRowInfo (r);
5047  GlobalOrdinal gid = rowMap_->getGlobalElement(r);
5048  out << std::setw(width) << myImageID
5049  << std::setw(width) << gid
5050  << std::setw(width) << rowinfo.numEntries;
5051  if (vl == VERB_EXTREME) {
5052  out << " ";
5053  if (isGloballyIndexed()) {
5054  auto rowview = gblInds_wdv.getHostView(Access::ReadOnly);
5055  for (size_t j=0; j < rowinfo.numEntries; ++j){
5056  GlobalOrdinal colgid = rowview[j] + rowinfo.offset1D;
5057  out << colgid << " ";
5058  }
5059  }
5060  else if (isLocallyIndexed()) {
5061  auto rowview = lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
5062  for (size_t j=0; j < rowinfo.numEntries; ++j) {
5063  LocalOrdinal collid = rowview[j] + rowinfo.offset1D;
5064  out << colMap_->getGlobalElement(collid) << " ";
5065  }
5066  }
5067  }
5068  out << std::endl;
5069  }
5070  }
5071  comm->barrier();
5072  comm->barrier();
5073  comm->barrier();
5074  }
5075  }
5076  }
5077  }
5078 
5079 
5080  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5081  bool
5083  checkSizes (const SrcDistObject& /* source */)
5084  {
5085  // It's not clear what kind of compatibility checks on sizes can
5086  // be performed here. Epetra_CrsGraph doesn't check any sizes for
5087  // compatibility.
5088  return true;
5089  }
5090 
5091  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5092  void
5095  (const SrcDistObject& source,
5096  const size_t numSameIDs,
5097  const Kokkos::DualView<const local_ordinal_type*,
5098  buffer_device_type>& permuteToLIDs,
5099  const Kokkos::DualView<const local_ordinal_type*,
5100  buffer_device_type>& permuteFromLIDs,
5101  const CombineMode /*CM*/)
5102  {
5103  using std::endl;
5104  using LO = local_ordinal_type;
5105  using GO = global_ordinal_type;
5106  using this_type = CrsGraph<LO, GO, node_type>;
5107  const char tfecfFuncName[] = "copyAndPermute: ";
5108  const bool verbose = verbose_;
5109 
5110  std::unique_ptr<std::string> prefix;
5111  if (verbose) {
5112  prefix = this->createPrefix("CrsGraph", "copyAndPermute");
5113  std::ostringstream os;
5114  os << *prefix << endl;
5115  std::cerr << os.str ();
5116  }
5117 
5118  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5119  (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0),
5120  std::runtime_error, "permuteToLIDs.extent(0) = "
5121  << permuteToLIDs.extent (0) << " != permuteFromLIDs.extent(0) = "
5122  << permuteFromLIDs.extent (0) << ".");
5123 
5124  // We know from checkSizes that the source object is a
5125  // row_graph_type, so we don't need to check again.
5126  const row_graph_type& srcRowGraph =
5127  dynamic_cast<const row_graph_type&> (source);
5128 
5129  if (verbose) {
5130  std::ostringstream os;
5131  os << *prefix << "Compute padding" << endl;
5132  std::cerr << os.str ();
5133  }
5134  auto padding = computeCrsPadding(srcRowGraph, numSameIDs,
5135  permuteToLIDs, permuteFromLIDs, verbose);
5136  applyCrsPadding(*padding, verbose);
5137 
5138  // If the source object is actually a CrsGraph, we can use view
5139  // mode instead of copy mode to access the entries in each row,
5140  // if the graph is not fill complete.
5141  const this_type* srcCrsGraph =
5142  dynamic_cast<const this_type*> (&source);
5143 
5144  const map_type& srcRowMap = *(srcRowGraph.getRowMap());
5145  const map_type& tgtRowMap = *(getRowMap());
5146  const bool src_filled = srcRowGraph.isFillComplete();
5147  nonconst_global_inds_host_view_type row_copy;
5148  LO myid = 0;
5149 
5150  //
5151  // "Copy" part of "copy and permute."
5152  //
5153  if (src_filled || srcCrsGraph == nullptr) {
5154  if (verbose) {
5155  std::ostringstream os;
5156  os << *prefix << "src_filled || srcCrsGraph == nullptr" << endl;
5157  std::cerr << os.str ();
5158  }
5159  // If the source graph is fill complete, we can't use view mode,
5160  // because the data might be stored in a different format not
5161  // compatible with the expectations of view mode. Also, if the
5162  // source graph is not a CrsGraph, we can't use view mode,
5163  // because RowGraph only provides copy mode access to the data.
5164  for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
5165  const GO gid = srcRowMap.getGlobalElement (myid);
5166  size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (gid);
5167  Kokkos::resize(row_copy,row_length);
5168  size_t check_row_length = 0;
5169  srcRowGraph.getGlobalRowCopy (gid, row_copy, check_row_length);
5170  this->insertGlobalIndices (gid, row_length, row_copy.data());
5171  }
5172  } else {
5173  if (verbose) {
5174  std::ostringstream os;
5175  os << *prefix << "! src_filled && srcCrsGraph != nullptr" << endl;
5176  std::cerr << os.str ();
5177  }
5178  for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
5179  const GO gid = srcRowMap.getGlobalElement (myid);
5180  global_inds_host_view_type row;
5181  srcCrsGraph->getGlobalRowView (gid, row);
5182  this->insertGlobalIndices (gid, row.extent(0), row.data());
5183  }
5184  }
5185 
5186  //
5187  // "Permute" part of "copy and permute."
5188  //
5189  auto permuteToLIDs_h = permuteToLIDs.view_host ();
5190  auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
5191 
5192  if (src_filled || srcCrsGraph == nullptr) {
5193  for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
5194  const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
5195  const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
5196  size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (srcgid);
5197  Kokkos::resize(row_copy,row_length);
5198  size_t check_row_length = 0;
5199  srcRowGraph.getGlobalRowCopy (srcgid, row_copy, check_row_length);
5200  this->insertGlobalIndices (mygid, row_length, row_copy.data());
5201  }
5202  } else {
5203  for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
5204  const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
5205  const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
5206  global_inds_host_view_type row;
5207  srcCrsGraph->getGlobalRowView (srcgid, row);
5208  this->insertGlobalIndices (mygid, row.extent(0), row.data());
5209  }
5210  }
5211 
5212  if (verbose) {
5213  std::ostringstream os;
5214  os << *prefix << "Done" << endl;
5215  std::cerr << os.str ();
5216  }
5217  }
5218 
5219  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5220  void
5221  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5222  applyCrsPadding(const padding_type& padding,
5223  const bool verbose)
5224  {
5225  using Details::ProfilingRegion;
5226  using Details::padCrsArrays;
5227  using std::endl;
5228  using LO = local_ordinal_type;
5229  using row_ptrs_type =
5230  typename local_graph_device_type::row_map_type::non_const_type;
5231  using range_policy =
5232  Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
5233  const char tfecfFuncName[] = "applyCrsPadding";
5234  ProfilingRegion regionCAP("Tpetra::CrsGraph::applyCrsPadding");
5235 
5236  std::unique_ptr<std::string> prefix;
5237  if (verbose) {
5238  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5239  std::ostringstream os;
5240  os << *prefix << "padding: ";
5241  padding.print(os);
5242  os << endl;
5243  std::cerr << os.str();
5244  }
5245  const int myRank = ! verbose ? -1 : [&] () {
5246  auto map = this->getMap();
5247  if (map.is_null()) {
5248  return -1;
5249  }
5250  auto comm = map->getComm();
5251  if (comm.is_null()) {
5252  return -1;
5253  }
5254  return comm->getRank();
5255  } ();
5256 
5257  // FIXME (mfh 10 Feb 2020) We shouldn't actually reallocate
5258  // row_ptrs_beg or allocate row_ptrs_end unless the allocation
5259  // size needs to increase. That should be the job of
5260  // padCrsArrays.
5261 
5262  // Assume global indexing we don't have any indices yet
5263  if (! indicesAreAllocated()) {
5264  if (verbose) {
5265  std::ostringstream os;
5266  os << *prefix << "Call allocateIndices" << endl;
5267  std::cerr << os.str();
5268  }
5269  allocateIndices(GlobalIndices, verbose);
5270  }
5271  TEUCHOS_ASSERT( indicesAreAllocated() );
5272 
5273  // Making copies here because k_rowPtrs_ has a const type. Otherwise, we
5274  // would use it directly.
5275 
5276  if (verbose) {
5277  std::ostringstream os;
5278  os << *prefix << "Allocate row_ptrs_beg: "
5279  << rowPtrsUnpacked_dev_.extent(0) << endl;
5280  std::cerr << os.str();
5281  }
5282  using Kokkos::view_alloc;
5283  using Kokkos::WithoutInitializing;
5284  row_ptrs_type row_ptrs_beg(
5285  view_alloc("row_ptrs_beg", WithoutInitializing),
5286  rowPtrsUnpacked_dev_.extent(0));
5287  Kokkos::deep_copy(row_ptrs_beg, rowPtrsUnpacked_dev_);
5288 
5289  const size_t N = row_ptrs_beg.extent(0) == 0 ? size_t(0) :
5290  size_t(row_ptrs_beg.extent(0) - 1);
5291  if (verbose) {
5292  std::ostringstream os;
5293  os << *prefix << "Allocate row_ptrs_end: " << N << endl;
5294  std::cerr << os.str();
5295  }
5296  row_ptrs_type row_ptrs_end(
5297  view_alloc("row_ptrs_end", WithoutInitializing), N);
5298  row_ptrs_type num_row_entries;
5299 
5300  const bool refill_num_row_entries = k_numRowEntries_.extent(0) != 0;
5301  if (refill_num_row_entries) { // Case 1: Unpacked storage
5302  // We can't assume correct *this capture until C++17, and it's
5303  // likely more efficient just to capture what we need anyway.
5304  num_row_entries =
5305  row_ptrs_type(view_alloc("num_row_entries", WithoutInitializing), N);
5306  Kokkos::deep_copy(num_row_entries, this->k_numRowEntries_);
5307  Kokkos::parallel_for
5308  ("Fill end row pointers", range_policy(0, N),
5309  KOKKOS_LAMBDA (const size_t i) {
5310  row_ptrs_end(i) = row_ptrs_beg(i) + num_row_entries(i);
5311  });
5312  }
5313  else {
5314  // FIXME (mfh 10 Feb 2020) Fix padCrsArrays so that if packed
5315  // storage, we don't need row_ptr_end to be separate allocation;
5316  // could just have it alias row_ptr_beg+1.
5317  Kokkos::parallel_for
5318  ("Fill end row pointers", range_policy(0, N),
5319  KOKKOS_LAMBDA (const size_t i) {
5320  row_ptrs_end(i) = row_ptrs_beg(i+1);
5321  });
5322  }
5323 
5324  if (isGloballyIndexed()) {
5325  padCrsArrays(row_ptrs_beg, row_ptrs_end, gblInds_wdv,
5326  padding, myRank, verbose);
5327  }
5328  else {
5329  padCrsArrays(row_ptrs_beg, row_ptrs_end, lclIndsUnpacked_wdv,
5330  padding, myRank, verbose);
5331  }
5332 
5333  if (refill_num_row_entries) {
5334  Kokkos::parallel_for
5335  ("Fill num entries", range_policy(0, N),
5336  KOKKOS_LAMBDA (const size_t i) {
5337  num_row_entries(i) = row_ptrs_end(i) - row_ptrs_beg(i);
5338  });
5339  Kokkos::deep_copy(this->k_numRowEntries_, num_row_entries);
5340  }
5341  if (verbose) {
5342  std::ostringstream os;
5343  os << *prefix << "Reassign k_rowPtrs_; old size: "
5344  << rowPtrsUnpacked_dev_.extent(0) << ", new size: "
5345  << row_ptrs_beg.extent(0) << endl;
5346  std::cerr << os.str();
5347  TEUCHOS_ASSERT( rowPtrsUnpacked_dev_.extent(0) == row_ptrs_beg.extent(0) );
5348  }
5349 
5350  setRowPtrsUnpacked(row_ptrs_beg);
5351 
5352  set_need_sync_host_uvm_access(); // need fence before host UVM access of k_rowPtrs_
5353  }
5354 
5355  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5356  std::unique_ptr<
5357  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5358  >
5359  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5360  computeCrsPadding(
5361  const RowGraph<LocalOrdinal,GlobalOrdinal,Node>& source,
5362  const size_t numSameIDs,
5363  const Kokkos::DualView<const local_ordinal_type*,
5364  buffer_device_type>& permuteToLIDs,
5365  const Kokkos::DualView<const local_ordinal_type*,
5366  buffer_device_type>& permuteFromLIDs,
5367  const bool verbose) const
5368  {
5369  using LO = local_ordinal_type;
5370  using std::endl;
5371 
5372  std::unique_ptr<std::string> prefix;
5373  if (verbose) {
5374  prefix = this->createPrefix("CrsGraph",
5375  "computeCrsPadding(same & permute)");
5376  std::ostringstream os;
5377  os << *prefix << "{numSameIDs: " << numSameIDs
5378  << ", numPermutes: " << permuteFromLIDs.extent(0) << "}"
5379  << endl;
5380  std::cerr << os.str();
5381  }
5382 
5383  const int myRank = [&] () {
5384  auto comm = rowMap_.is_null() ? Teuchos::null :
5385  rowMap_->getComm();
5386  return comm.is_null() ? -1 : comm->getRank();
5387  } ();
5388  std::unique_ptr<padding_type> padding(
5389  new padding_type(myRank, numSameIDs,
5390  permuteFromLIDs.extent(0)));
5391 
5392  computeCrsPaddingForSameIDs(*padding, source,
5393  static_cast<LO>(numSameIDs));
5394  computeCrsPaddingForPermutedIDs(*padding, source, permuteToLIDs,
5395  permuteFromLIDs);
5396  return padding;
5397  }
5398 
5399  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5400  void
5401  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5402  computeCrsPaddingForSameIDs(
5403  padding_type& padding,
5404  const RowGraph<local_ordinal_type, global_ordinal_type,
5405  node_type>& source,
5406  const local_ordinal_type numSameIDs) const
5407  {
5408  using LO = local_ordinal_type;
5409  using GO = global_ordinal_type;
5410  using Details::Impl::getRowGraphGlobalRow;
5411  using std::endl;
5412  const char tfecfFuncName[] = "computeCrsPaddingForSameIds";
5413 
5414  std::unique_ptr<std::string> prefix;
5415  const bool verbose = verbose_;
5416  if (verbose) {
5417  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5418  std::ostringstream os;
5419  os << *prefix << "numSameIDs: " << numSameIDs << endl;
5420  std::cerr << os.str();
5421  }
5422 
5423  if (numSameIDs == 0) {
5424  return;
5425  }
5426 
5427  const map_type& srcRowMap = *(source.getRowMap());
5428  const map_type& tgtRowMap = *rowMap_;
5429  using this_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
5430  const this_type* srcCrs = dynamic_cast<const this_type*>(&source);
5431  const bool src_is_unique =
5432  srcCrs == nullptr ? false : srcCrs->isMerged();
5433  const bool tgt_is_unique = this->isMerged();
5434 
5435  std::vector<GO> srcGblColIndsScratch;
5436  std::vector<GO> tgtGblColIndsScratch;
5437 
5438  execute_sync_host_uvm_access(); // protect host UVM access
5439  for (LO lclRowInd = 0; lclRowInd < numSameIDs; ++lclRowInd) {
5440  const GO srcGblRowInd = srcRowMap.getGlobalElement(lclRowInd);
5441  const GO tgtGblRowInd = tgtRowMap.getGlobalElement(lclRowInd);
5442  auto srcGblColInds = getRowGraphGlobalRow(
5443  srcGblColIndsScratch, source, srcGblRowInd);
5444  auto tgtGblColInds = getRowGraphGlobalRow(
5445  tgtGblColIndsScratch, *this, tgtGblRowInd);
5446  padding.update_same(lclRowInd, tgtGblColInds.getRawPtr(),
5447  tgtGblColInds.size(), tgt_is_unique,
5448  srcGblColInds.getRawPtr(),
5449  srcGblColInds.size(), src_is_unique);
5450  }
5451  if (verbose) {
5452  std::ostringstream os;
5453  os << *prefix << "Done" << endl;
5454  std::cerr << os.str();
5455  }
5456  }
5457 
5458  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5459  void
5460  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5461  computeCrsPaddingForPermutedIDs(
5462  padding_type& padding,
5463  const RowGraph<local_ordinal_type, global_ordinal_type,
5464  node_type>& source,
5465  const Kokkos::DualView<const local_ordinal_type*,
5466  buffer_device_type>& permuteToLIDs,
5467  const Kokkos::DualView<const local_ordinal_type*,
5468  buffer_device_type>& permuteFromLIDs) const
5469  {
5470  using LO = local_ordinal_type;
5471  using GO = global_ordinal_type;
5472  using Details::Impl::getRowGraphGlobalRow;
5473  using std::endl;
5474  const char tfecfFuncName[] = "computeCrsPaddingForPermutedIds";
5475 
5476  std::unique_ptr<std::string> prefix;
5477  const bool verbose = verbose_;
5478  if (verbose) {
5479  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5480  std::ostringstream os;
5481  os << *prefix << "permuteToLIDs.extent(0): "
5482  << permuteToLIDs.extent(0)
5483  << ", permuteFromLIDs.extent(0): "
5484  << permuteFromLIDs.extent(0) << endl;
5485  std::cerr << os.str();
5486  }
5487 
5488  if (permuteToLIDs.extent(0) == 0) {
5489  return;
5490  }
5491 
5492  const map_type& srcRowMap = *(source.getRowMap());
5493  const map_type& tgtRowMap = *rowMap_;
5494  using this_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
5495  const this_type* srcCrs = dynamic_cast<const this_type*>(&source);
5496  const bool src_is_unique =
5497  srcCrs == nullptr ? false : srcCrs->isMerged();
5498  const bool tgt_is_unique = this->isMerged();
5499 
5500  TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host() );
5501  auto permuteToLIDs_h = permuteToLIDs.view_host();
5502  TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host() );
5503  auto permuteFromLIDs_h = permuteFromLIDs.view_host();
5504 
5505  std::vector<GO> srcGblColIndsScratch;
5506  std::vector<GO> tgtGblColIndsScratch;
5507  const LO numPermutes = static_cast<LO>(permuteToLIDs_h.extent(0));
5508 
5509  execute_sync_host_uvm_access(); // protect host UVM access
5510  for (LO whichPermute = 0; whichPermute < numPermutes; ++whichPermute) {
5511  const LO srcLclRowInd = permuteFromLIDs_h[whichPermute];
5512  const GO srcGblRowInd = srcRowMap.getGlobalElement(srcLclRowInd);
5513  auto srcGblColInds = getRowGraphGlobalRow(
5514  srcGblColIndsScratch, source, srcGblRowInd);
5515  const LO tgtLclRowInd = permuteToLIDs_h[whichPermute];
5516  const GO tgtGblRowInd = tgtRowMap.getGlobalElement(tgtLclRowInd);
5517  auto tgtGblColInds = getRowGraphGlobalRow(
5518  tgtGblColIndsScratch, *this, tgtGblRowInd);
5519  padding.update_permute(whichPermute, tgtLclRowInd,
5520  tgtGblColInds.getRawPtr(),
5521  tgtGblColInds.size(), tgt_is_unique,
5522  srcGblColInds.getRawPtr(),
5523  srcGblColInds.size(), src_is_unique);
5524  }
5525 
5526  if (verbose) {
5527  std::ostringstream os;
5528  os << *prefix << "Done" << endl;
5529  std::cerr << os.str();
5530  }
5531  }
5532 
5533  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5534  std::unique_ptr<
5535  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5536  >
5537  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5538  computeCrsPaddingForImports(
5539  const Kokkos::DualView<const local_ordinal_type*,
5540  buffer_device_type>& importLIDs,
5541  Kokkos::DualView<packet_type*, buffer_device_type> imports,
5542  Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5543  const bool verbose) const
5544  {
5545  using Details::Impl::getRowGraphGlobalRow;
5546  using std::endl;
5547  using LO = local_ordinal_type;
5548  using GO = global_ordinal_type;
5549  const char tfecfFuncName[] = "computeCrsPaddingForImports";
5550 
5551  std::unique_ptr<std::string> prefix;
5552  if (verbose) {
5553  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5554  std::ostringstream os;
5555  os << *prefix << "importLIDs.extent(0): "
5556  << importLIDs.extent(0)
5557  << ", imports.extent(0): "
5558  << imports.extent(0)
5559  << ", numPacketsPerLID.extent(0): "
5560  << numPacketsPerLID.extent(0) << endl;
5561  std::cerr << os.str();
5562  }
5563 
5564  const LO numImports = static_cast<LO>(importLIDs.extent(0));
5565  const int myRank = [&] () {
5566  auto comm = rowMap_.is_null() ? Teuchos::null :
5567  rowMap_->getComm();
5568  return comm.is_null() ? -1 : comm->getRank();
5569  } ();
5570  std::unique_ptr<padding_type> padding(
5571  new padding_type(myRank, numImports));
5572 
5573  if (imports.need_sync_host()) {
5574  imports.sync_host();
5575  }
5576  auto imports_h = imports.view_host();
5577  if (numPacketsPerLID.need_sync_host ()) {
5578  numPacketsPerLID.sync_host();
5579  }
5580  auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5581 
5582  TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
5583  auto importLIDs_h = importLIDs.view_host();
5584 
5585  const map_type& tgtRowMap = *rowMap_;
5586  // Always merge source column indices, since isMerged() is
5587  // per-process state, and we don't know its value on other
5588  // processes that sent us data.
5589  constexpr bool src_is_unique = false;
5590  const bool tgt_is_unique = isMerged();
5591 
5592  std::vector<GO> tgtGblColIndsScratch;
5593  size_t offset = 0;
5594  execute_sync_host_uvm_access(); // protect host UVM access
5595  for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5596  // CrsGraph packs just global column indices, while CrsMatrix
5597  // packs bytes (first the number of entries in the row, then the
5598  // global column indices, then other stuff like the matrix
5599  // values in that row).
5600  const LO origSrcNumEnt =
5601  static_cast<LO>(numPacketsPerLID_h[whichImport]);
5602  GO* const srcGblColInds = imports_h.data() + offset;
5603 
5604  const LO tgtLclRowInd = importLIDs_h[whichImport];
5605  const GO tgtGblRowInd =
5606  tgtRowMap.getGlobalElement(tgtLclRowInd);
5607  auto tgtGblColInds = getRowGraphGlobalRow(
5608  tgtGblColIndsScratch, *this, tgtGblRowInd);
5609  const size_t origTgtNumEnt(tgtGblColInds.size());
5610 
5611  padding->update_import(whichImport, tgtLclRowInd,
5612  tgtGblColInds.getRawPtr(),
5613  origTgtNumEnt, tgt_is_unique,
5614  srcGblColInds,
5615  origSrcNumEnt, src_is_unique);
5616  offset += origSrcNumEnt;
5617  }
5618 
5619  if (verbose) {
5620  std::ostringstream os;
5621  os << *prefix << "Done" << endl;
5622  std::cerr << os.str();
5623  }
5624  return padding;
5625  }
5626 
5627  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5628  std::unique_ptr<
5629  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5630  >
5631  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5632  computePaddingForCrsMatrixUnpack(
5633  const Kokkos::DualView<const local_ordinal_type*,
5634  buffer_device_type>& importLIDs,
5635  Kokkos::DualView<char*, buffer_device_type> imports,
5636  Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5637  const bool verbose) const
5638  {
5639  using Details::Impl::getRowGraphGlobalRow;
5640  using Details::PackTraits;
5641  using std::endl;
5642  using LO = local_ordinal_type;
5643  using GO = global_ordinal_type;
5644  const char tfecfFuncName[] = "computePaddingForCrsMatrixUnpack";
5645 
5646  std::unique_ptr<std::string> prefix;
5647  if (verbose) {
5648  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5649  std::ostringstream os;
5650  os << *prefix << "importLIDs.extent(0): "
5651  << importLIDs.extent(0)
5652  << ", imports.extent(0): "
5653  << imports.extent(0)
5654  << ", numPacketsPerLID.extent(0): "
5655  << numPacketsPerLID.extent(0) << endl;
5656  std::cerr << os.str();
5657  }
5658  const bool extraVerbose =
5659  verbose && Details::Behavior::verbose("CrsPadding");
5660 
5661  const LO numImports = static_cast<LO>(importLIDs.extent(0));
5662  TEUCHOS_ASSERT( LO(numPacketsPerLID.extent(0)) >= numImports );
5663  const int myRank = [&] () {
5664  auto comm = rowMap_.is_null() ? Teuchos::null :
5665  rowMap_->getComm();
5666  return comm.is_null() ? -1 : comm->getRank();
5667  } ();
5668  std::unique_ptr<padding_type> padding(
5669  new padding_type(myRank, numImports));
5670 
5671  if (imports.need_sync_host()) {
5672  imports.sync_host();
5673  }
5674  auto imports_h = imports.view_host();
5675  if (numPacketsPerLID.need_sync_host ()) {
5676  numPacketsPerLID.sync_host();
5677  }
5678  auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5679 
5680  TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
5681  auto importLIDs_h = importLIDs.view_host();
5682 
5683  const map_type& tgtRowMap = *rowMap_;
5684  // Always merge source column indices, since isMerged() is
5685  // per-process state, and we don't know its value on other
5686  // processes that sent us data.
5687  constexpr bool src_is_unique = false;
5688  const bool tgt_is_unique = isMerged();
5689 
5690  std::vector<GO> srcGblColIndsScratch;
5691  std::vector<GO> tgtGblColIndsScratch;
5692  size_t offset = 0;
5693  execute_sync_host_uvm_access(); // protect host UVM access
5694  for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5695  // CrsGraph packs just global column indices, while CrsMatrix
5696  // packs bytes (first the number of entries in the row, then the
5697  // global column indices, then other stuff like the matrix
5698  // values in that row).
5699  const size_t numBytes = numPacketsPerLID_h[whichImport];
5700  if (extraVerbose) {
5701  std::ostringstream os;
5702  os << *prefix << "whichImport=" << whichImport
5703  << ", numImports=" << numImports
5704  << ", numBytes=" << numBytes << endl;
5705  std::cerr << os.str();
5706  }
5707  if (numBytes == 0) {
5708  continue; // special case: no entries to unpack for this row
5709  }
5710  LO origSrcNumEnt = 0;
5711  const size_t numEntBeg = offset;
5712  const size_t numEntLen =
5713  PackTraits<LO>::packValueCount(origSrcNumEnt);
5714  TEUCHOS_ASSERT( numBytes >= numEntLen );
5715  TEUCHOS_ASSERT( imports_h.extent(0) >= numEntBeg + numEntLen );
5716  PackTraits<LO>::unpackValue(origSrcNumEnt,
5717  imports_h.data() + numEntBeg);
5718  if (extraVerbose) {
5719  std::ostringstream os;
5720  os << *prefix << "whichImport=" << whichImport
5721  << ", numImports=" << numImports
5722  << ", origSrcNumEnt=" << origSrcNumEnt << endl;
5723  std::cerr << os.str();
5724  }
5725  TEUCHOS_ASSERT( origSrcNumEnt >= LO(0) );
5726  TEUCHOS_ASSERT( numBytes >= size_t(numEntLen + origSrcNumEnt * sizeof(GO)) );
5727  const size_t gidsBeg = numEntBeg + numEntLen;
5728  if (srcGblColIndsScratch.size() < size_t(origSrcNumEnt)) {
5729  srcGblColIndsScratch.resize(origSrcNumEnt);
5730  }
5731  GO* const srcGblColInds = srcGblColIndsScratch.data();
5732  PackTraits<GO>::unpackArray(srcGblColInds,
5733  imports_h.data() + gidsBeg,
5734  origSrcNumEnt);
5735  const LO tgtLclRowInd = importLIDs_h[whichImport];
5736  const GO tgtGblRowInd =
5737  tgtRowMap.getGlobalElement(tgtLclRowInd);
5738  auto tgtGblColInds = getRowGraphGlobalRow(
5739  tgtGblColIndsScratch, *this, tgtGblRowInd);
5740  const size_t origNumTgtEnt(tgtGblColInds.size());
5741 
5742  if (extraVerbose) {
5743  std::ostringstream os;
5744  os << *prefix << "whichImport=" << whichImport
5745  << ", numImports=" << numImports
5746  << ": Call padding->update_import" << endl;
5747  std::cerr << os.str();
5748  }
5749  padding->update_import(whichImport, tgtLclRowInd,
5750  tgtGblColInds.getRawPtr(),
5751  origNumTgtEnt, tgt_is_unique,
5752  srcGblColInds,
5753  origSrcNumEnt, src_is_unique);
5754  offset += numBytes;
5755  }
5756 
5757  if (verbose) {
5758  std::ostringstream os;
5759  os << *prefix << "Done" << endl;
5760  std::cerr << os.str();
5761  }
5762  return padding;
5763  }
5764 
5765  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5766  void
5767  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5768  packAndPrepare
5769  (const SrcDistObject& source,
5770  const Kokkos::DualView<const local_ordinal_type*,
5771  buffer_device_type>& exportLIDs,
5772  Kokkos::DualView<packet_type*,
5773  buffer_device_type>& exports,
5774  Kokkos::DualView<size_t*,
5775  buffer_device_type> numPacketsPerLID,
5776  size_t& constantNumPackets)
5777  {
5779  using GO = global_ordinal_type;
5780  using std::endl;
5781  using crs_graph_type =
5782  CrsGraph<local_ordinal_type, global_ordinal_type, node_type>;
5783  const char tfecfFuncName[] = "packAndPrepare: ";
5784  ProfilingRegion region_papn ("Tpetra::CrsGraph::packAndPrepare");
5785 
5786  const bool verbose = verbose_;
5787  std::unique_ptr<std::string> prefix;
5788  if (verbose) {
5789  prefix = this->createPrefix("CrsGraph", "packAndPrepare");
5790  std::ostringstream os;
5791  os << *prefix << "Start" << endl;
5792  std::cerr << os.str();
5793  }
5794 
5795  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5796  (exportLIDs.extent (0) != numPacketsPerLID.extent (0),
5797  std::runtime_error,
5798  "exportLIDs.extent(0) = " << exportLIDs.extent (0)
5799  << " != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent (0)
5800  << ".");
5801  const row_graph_type* srcRowGraphPtr =
5802  dynamic_cast<const row_graph_type*> (&source);
5803  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5804  (srcRowGraphPtr == nullptr, std::invalid_argument, "Source of an Export "
5805  "or Import operation to a CrsGraph must be a RowGraph with the same "
5806  "template parameters.");
5807  // We don't check whether src_graph has had fillComplete called,
5808  // because it doesn't matter whether the *source* graph has been
5809  // fillComplete'd. The target graph can not be fillComplete'd yet.
5810  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5811  (this->isFillComplete (), std::runtime_error,
5812  "The target graph of an Import or Export must not be fill complete.");
5813 
5814  const crs_graph_type* srcCrsGraphPtr =
5815  dynamic_cast<const crs_graph_type*> (&source);
5816 
5817  if (srcCrsGraphPtr == nullptr) {
5818  using Teuchos::ArrayView;
5819  using LO = local_ordinal_type;
5820 
5821  if (verbose) {
5822  std::ostringstream os;
5823  os << *prefix << "Source is a RowGraph but not a CrsGraph"
5824  << endl;
5825  std::cerr << os.str();
5826  }
5827  // RowGraph::pack serves the "old" DistObject interface. It
5828  // takes Teuchos::ArrayView and Teuchos::Array&. The latter
5829  // entails deep-copying the exports buffer on output. RowGraph
5830  // is a convenience interface when not a CrsGraph, so we accept
5831  // the performance hit.
5832  TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
5833  auto exportLIDs_h = exportLIDs.view_host ();
5834  ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
5835  exportLIDs_h.extent (0));
5836  Teuchos::Array<GO> exports_a;
5837 
5838  numPacketsPerLID.clear_sync_state ();
5839  numPacketsPerLID.modify_host ();
5840  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
5841  ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
5842  numPacketsPerLID_h.extent (0));
5843  srcRowGraphPtr->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
5844  constantNumPackets);
5845  const size_t newSize = static_cast<size_t> (exports_a.size ());
5846  if (static_cast<size_t> (exports.extent (0)) != newSize) {
5847  using exports_dv_type = Kokkos::DualView<packet_type*, buffer_device_type>;
5848  exports = exports_dv_type ("exports", newSize);
5849  }
5850  Kokkos::View<const packet_type*, Kokkos::HostSpace,
5851  Kokkos::MemoryUnmanaged> exports_a_h (exports_a.getRawPtr (), newSize);
5852  exports.clear_sync_state ();
5853  exports.modify_host ();
5854  Kokkos::deep_copy (exports.view_host (), exports_a_h);
5855  }
5856  // packCrsGraphNew requires k_rowPtrsPacked_ to be set
5857  else if (! getColMap ().is_null () &&
5858  (rowPtrsPacked_dev_.extent (0) != 0 ||
5859  getRowMap ()->getNodeNumElements () == 0)) {
5860  if (verbose) {
5861  std::ostringstream os;
5862  os << *prefix << "packCrsGraphNew path" << endl;
5863  std::cerr << os.str();
5864  }
5865  using export_pids_type =
5866  Kokkos::DualView<const int*, buffer_device_type>;
5867  export_pids_type exportPIDs; // not filling it; needed for syntax
5868  using LO = local_ordinal_type;
5869  using NT = node_type;
5871  packCrsGraphNew<LO,GO,NT> (*srcCrsGraphPtr, exportLIDs, exportPIDs,
5872  exports, numPacketsPerLID,
5873  constantNumPackets, false);
5874  }
5875  else {
5876  srcCrsGraphPtr->packFillActiveNew (exportLIDs, exports, numPacketsPerLID,
5877  constantNumPackets);
5878  }
5879 
5880  if (verbose) {
5881  std::ostringstream os;
5882  os << *prefix << "Done" << endl;
5883  std::cerr << os.str();
5884  }
5885  }
5886 
5887  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5888  void
5890  pack (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5891  Teuchos::Array<GlobalOrdinal>& exports,
5892  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5893  size_t& constantNumPackets) const
5894  {
5895  auto col_map = this->getColMap();
5896  // packCrsGraph requires k_rowPtrsPacked to be set
5897  if( !col_map.is_null() && (rowPtrsPacked_dev_.extent(0) != 0 || getRowMap()->getNodeNumElements() ==0)) {
5899  packCrsGraph<LocalOrdinal,GlobalOrdinal,Node>(*this, exports, numPacketsPerLID,
5900  exportLIDs, constantNumPackets);
5901  }
5902  else {
5903  this->packFillActive(exportLIDs, exports, numPacketsPerLID,
5904  constantNumPackets);
5905  }
5906  }
5907 
5908  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5909  void
5911  packFillActive (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5912  Teuchos::Array<GlobalOrdinal>& exports,
5913  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5914  size_t& constantNumPackets) const
5915  {
5916  using std::endl;
5917  using LO = LocalOrdinal;
5918  using GO = GlobalOrdinal;
5919  using host_execution_space =
5920  typename Kokkos::View<size_t*, device_type>::
5921  HostMirror::execution_space;
5922  const char tfecfFuncName[] = "packFillActive: ";
5923  const bool verbose = verbose_;
5924 
5925  const auto numExportLIDs = exportLIDs.size ();
5926  std::unique_ptr<std::string> prefix;
5927  if (verbose) {
5928  prefix = this->createPrefix("CrsGraph", "allocateIndices");
5929  std::ostringstream os;
5930  os << *prefix << "numExportLIDs=" << numExportLIDs << endl;
5931  std::cerr << os.str();
5932  }
5933  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5934  (numExportLIDs != numPacketsPerLID.size (), std::runtime_error,
5935  "exportLIDs.size() = " << numExportLIDs << " != numPacketsPerLID.size()"
5936  " = " << numPacketsPerLID.size () << ".");
5937 
5938  const map_type& rowMap = * (this->getRowMap ());
5939  const map_type* const colMapPtr = this->colMap_.getRawPtr ();
5940  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5941  (this->isLocallyIndexed () && colMapPtr == nullptr, std::logic_error,
5942  "This graph claims to be locally indexed, but its column Map is nullptr. "
5943  "This should never happen. Please report this bug to the Tpetra "
5944  "developers.");
5945 
5946  // We may pack different amounts of data for different rows.
5947  constantNumPackets = 0;
5948 
5949  // mfh 20 Sep 2017: Teuchos::ArrayView isn't thread safe (well,
5950  // it might be now, but we might as well be safe).
5951  size_t* const numPacketsPerLID_raw = numPacketsPerLID.getRawPtr ();
5952  const LO* const exportLIDs_raw = exportLIDs.getRawPtr ();
5953 
5954  // Count the total number of packets (column indices, in the case
5955  // of a CrsGraph) to pack. While doing so, set
5956  // numPacketsPerLID[i] to the number of entries owned by the
5957  // calling process in (local) row exportLIDs[i] of the graph, that
5958  // the caller wants us to send out.
5959  Kokkos::RangePolicy<host_execution_space, LO> inputRange (0, numExportLIDs);
5960  size_t totalNumPackets = 0;
5961  size_t errCount = 0;
5962  // lambdas turn what they capture const, so we can't
5963  // atomic_add(&errCount,1). Instead, we need a View to modify.
5964  typedef Kokkos::Device<host_execution_space, Kokkos::HostSpace>
5965  host_device_type;
5966  Kokkos::View<size_t, host_device_type> errCountView (&errCount);
5967  constexpr size_t ONE = 1;
5968 
5969  execute_sync_host_uvm_access(); // protect host UVM access
5970  Kokkos::parallel_reduce ("Tpetra::CrsGraph::pack: totalNumPackets",
5971  inputRange,
5972  [=] (const LO& i, size_t& curTotalNumPackets) {
5973  const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]);
5974  if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
5975  Kokkos::atomic_add (&errCountView(), ONE);
5976  numPacketsPerLID_raw[i] = 0;
5977  }
5978  else {
5979  const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
5980  numPacketsPerLID_raw[i] = numEnt;
5981  curTotalNumPackets += numEnt;
5982  }
5983  },
5984  totalNumPackets);
5985 
5986  if (verbose) {
5987  std::ostringstream os;
5988  os << *prefix << "totalNumPackets=" << totalNumPackets << endl;
5989  std::cerr << os.str();
5990  }
5991  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5992  (errCount != 0, std::logic_error, "totalNumPackets count encountered "
5993  "one or more errors! errCount = " << errCount
5994  << ", totalNumPackets = " << totalNumPackets << ".");
5995  errCount = 0;
5996 
5997  // Allocate space for all the column indices to pack.
5998  exports.resize (totalNumPackets);
5999 
6000  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6001  (! this->supportsRowViews (), std::logic_error,
6002  "this->supportsRowViews() returns false; this should never happen. "
6003  "Please report this bug to the Tpetra developers.");
6004 
6005  // Loop again over the rows to export, and pack rows of indices
6006  // into the output buffer.
6007 
6008  if (verbose) {
6009  std::ostringstream os;
6010  os << *prefix << "Pack into exports" << endl;
6011  std::cerr << os.str();
6012  }
6013 
6014  // Teuchos::ArrayView may not be thread safe, or may not be
6015  // efficiently thread safe. Better to use the raw pointer.
6016  GO* const exports_raw = exports.getRawPtr ();
6017  errCount = 0;
6018  Kokkos::parallel_scan ("Tpetra::CrsGraph::pack: pack from views",
6019  inputRange, [=, &prefix]
6020  (const LO i, size_t& exportsOffset, const bool final) {
6021  const size_t curOffset = exportsOffset;
6022  const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]);
6023  const RowInfo rowInfo =
6024  this->getRowInfoFromGlobalRowIndex (gblRow);
6025 
6026  using TDO = Tpetra::Details::OrdinalTraits<size_t>;
6027  if (rowInfo.localRow == TDO::invalid ()) {
6028  if (verbose) {
6029  std::ostringstream os;
6030  os << *prefix << ": INVALID rowInfo: i=" << i
6031  << ", lclRow=" << exportLIDs_raw[i] << endl;
6032  std::cerr << os.str();
6033  }
6034  Kokkos::atomic_add (&errCountView(), ONE);
6035  }
6036  else if (curOffset + rowInfo.numEntries > totalNumPackets) {
6037  if (verbose) {
6038  std::ostringstream os;
6039  os << *prefix << ": UH OH! For i=" << i << ", lclRow="
6040  << exportLIDs_raw[i] << ", gblRow=" << gblRow << ", curOffset "
6041  "(= " << curOffset << ") + numEnt (= " << rowInfo.numEntries
6042  << ") > totalNumPackets (= " << totalNumPackets << ")."
6043  << endl;
6044  std::cerr << os.str();
6045  }
6046  Kokkos::atomic_add (&errCountView(), ONE);
6047  }
6048  else {
6049  const LO numEnt = static_cast<LO> (rowInfo.numEntries);
6050  if (this->isLocallyIndexed ()) {
6051  auto lclColInds = getLocalIndsViewHost (rowInfo);
6052  if (final) {
6053  for (LO k = 0; k < numEnt; ++k) {
6054  const LO lclColInd = lclColInds(k);
6055  const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
6056  // Pack it, even if it's wrong. Let the receiving
6057  // process deal with it. Otherwise, we'll miss out
6058  // on any correct data.
6059  exports_raw[curOffset + k] = gblColInd;
6060  } // for each entry in the row
6061  } // final pass?
6062  exportsOffset = curOffset + numEnt;
6063  }
6064  else if (this->isGloballyIndexed ()) {
6065  auto gblColInds = getGlobalIndsViewHost (rowInfo);
6066  if (final) {
6067  for (LO k = 0; k < numEnt; ++k) {
6068  const GO gblColInd = gblColInds(k);
6069  // Pack it, even if it's wrong. Let the receiving
6070  // process deal with it. Otherwise, we'll miss out
6071  // on any correct data.
6072  exports_raw[curOffset + k] = gblColInd;
6073  } // for each entry in the row
6074  } // final pass?
6075  exportsOffset = curOffset + numEnt;
6076  }
6077  // If neither globally nor locally indexed, then the graph
6078  // has no entries in this row (or indeed, in any row on this
6079  // process) to pack.
6080  }
6081  });
6082 
6083  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6084  (errCount != 0, std::logic_error, "Packing encountered "
6085  "one or more errors! errCount = " << errCount
6086  << ", totalNumPackets = " << totalNumPackets << ".");
6087 
6088  if (verbose) {
6089  std::ostringstream os;
6090  os << *prefix << "Done" << endl;
6091  std::cerr << os.str();
6092  }
6093  }
6094 
6095  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6096  void
6097  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
6098  packFillActiveNew (const Kokkos::DualView<const local_ordinal_type*,
6099  buffer_device_type>& exportLIDs,
6100  Kokkos::DualView<packet_type*,
6101  buffer_device_type>& exports,
6102  Kokkos::DualView<size_t*,
6103  buffer_device_type> numPacketsPerLID,
6104  size_t& constantNumPackets) const
6105  {
6106  using std::endl;
6107  using LO = local_ordinal_type;
6108  using GO = global_ordinal_type;
6109  using host_execution_space = typename Kokkos::View<size_t*,
6110  device_type>::HostMirror::execution_space;
6111  using host_device_type =
6112  Kokkos::Device<host_execution_space, Kokkos::HostSpace>;
6113  using exports_dv_type =
6114  Kokkos::DualView<packet_type*, buffer_device_type>;
6115  const char tfecfFuncName[] = "packFillActiveNew: ";
6116  const bool verbose = verbose_;
6117 
6118  const auto numExportLIDs = exportLIDs.extent (0);
6119  std::unique_ptr<std::string> prefix;
6120  if (verbose) {
6121  prefix = this->createPrefix("CrsGraph", "packFillActiveNew");
6122  std::ostringstream os;
6123  os << *prefix << "numExportLIDs: " << numExportLIDs
6124  << ", numPacketsPerLID.extent(0): "
6125  << numPacketsPerLID.extent(0) << endl;
6126  std::cerr << os.str();
6127  }
6128  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6129  (numExportLIDs != numPacketsPerLID.extent (0), std::runtime_error,
6130  "exportLIDs.extent(0) = " << numExportLIDs
6131  << " != numPacketsPerLID.extent(0) = "
6132  << numPacketsPerLID.extent (0) << ".");
6133  TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6134  auto exportLIDs_h = exportLIDs.view_host ();
6135 
6136  const map_type& rowMap = * (this->getRowMap ());
6137  const map_type* const colMapPtr = this->colMap_.getRawPtr ();
6138  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6139  (this->isLocallyIndexed () && colMapPtr == nullptr, std::logic_error,
6140  "This graph claims to be locally indexed, but its column Map is nullptr. "
6141  "This should never happen. Please report this bug to the Tpetra "
6142  "developers.");
6143 
6144  // We may pack different amounts of data for different rows.
6145  constantNumPackets = 0;
6146 
6147  numPacketsPerLID.clear_sync_state ();
6148  numPacketsPerLID.modify_host ();
6149  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6150 
6151  // Count the total number of packets (column indices, in the case
6152  // of a CrsGraph) to pack. While doing so, set
6153  // numPacketsPerLID[i] to the number of entries owned by the
6154  // calling process in (local) row exportLIDs[i] of the graph, that
6155  // the caller wants us to send out.
6156  using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
6157  range_type inputRange (0, numExportLIDs);
6158  size_t totalNumPackets = 0;
6159  size_t errCount = 0;
6160  // lambdas turn what they capture const, so we can't
6161  // atomic_add(&errCount,1). Instead, we need a View to modify.
6162  Kokkos::View<size_t, host_device_type> errCountView (&errCount);
6163  constexpr size_t ONE = 1;
6164 
6165  if (verbose) {
6166  std::ostringstream os;
6167  os << *prefix << "Compute totalNumPackets" << endl;
6168  std::cerr << os.str ();
6169  }
6170 
6171  execute_sync_host_uvm_access(); // protect host UVM access
6172  Kokkos::parallel_reduce
6173  ("Tpetra::CrsGraph::pack: totalNumPackets",
6174  inputRange,
6175  [=, &prefix] (const LO i, size_t& curTotalNumPackets) {
6176  const LO lclRow = exportLIDs_h[i];
6177  const GO gblRow = rowMap.getGlobalElement (lclRow);
6178  if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
6179  if (verbose) {
6180  std::ostringstream os;
6181  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
6182  << " not in row Map on this process" << endl;
6183  std::cerr << os.str();
6184  }
6185  Kokkos::atomic_add (&errCountView(), ONE);
6186  numPacketsPerLID_h(i) = 0;
6187  }
6188  else {
6189  const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
6190  numPacketsPerLID_h(i) = numEnt;
6191  curTotalNumPackets += numEnt;
6192  }
6193  },
6194  totalNumPackets);
6195 
6196  if (verbose) {
6197  std::ostringstream os;
6198  os << *prefix << "totalNumPackets: " << totalNumPackets
6199  << ", errCount: " << errCount << endl;
6200  std::cerr << os.str ();
6201  }
6202  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6203  (errCount != 0, std::logic_error, "totalNumPackets count encountered "
6204  "one or more errors! totalNumPackets: " << totalNumPackets
6205  << ", errCount: " << errCount << ".");
6206 
6207  // Allocate space for all the column indices to pack.
6208  if (size_t(exports.extent (0)) < totalNumPackets) {
6209  // FIXME (mfh 09 Apr 2019) Create without initializing.
6210  exports = exports_dv_type ("exports", totalNumPackets);
6211  }
6212 
6213  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6214  (! this->supportsRowViews (), std::logic_error,
6215  "this->supportsRowViews() returns false; this should never happen. "
6216  "Please report this bug to the Tpetra developers.");
6217 
6218  // Loop again over the rows to export, and pack rows of indices
6219  // into the output buffer.
6220 
6221  if (verbose) {
6222  std::ostringstream os;
6223  os << *prefix << "Pack into exports buffer" << endl;
6224  std::cerr << os.str();
6225  }
6226 
6227  exports.clear_sync_state ();
6228  exports.modify_host ();
6229  auto exports_h = exports.view_host ();
6230 
6231  errCount = 0;
6232  Kokkos::parallel_scan
6233  ("Tpetra::CrsGraph::packFillActiveNew: Pack exports",
6234  inputRange, [=, &prefix]
6235  (const LO i, size_t& exportsOffset, const bool final) {
6236  const size_t curOffset = exportsOffset;
6237  const LO lclRow = exportLIDs_h(i);
6238  const GO gblRow = rowMap.getGlobalElement (lclRow);
6239  if (gblRow == Details::OrdinalTraits<GO>::invalid ()) {
6240  if (verbose) {
6241  std::ostringstream os;
6242  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
6243  << " not in row Map on this process" << endl;
6244  std::cerr << os.str();
6245  }
6246  Kokkos::atomic_add (&errCountView(), ONE);
6247  return;
6248  }
6249 
6250  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (gblRow);
6251  if (rowInfo.localRow == Details::OrdinalTraits<size_t>::invalid ()) {
6252  if (verbose) {
6253  std::ostringstream os;
6254  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
6255  << ", gblRow=" << gblRow << ": invalid rowInfo"
6256  << endl;
6257  std::cerr << os.str();
6258  }
6259  Kokkos::atomic_add (&errCountView(), ONE);
6260  return;
6261  }
6262 
6263  if (curOffset + rowInfo.numEntries > totalNumPackets) {
6264  if (verbose) {
6265  std::ostringstream os;
6266  os << *prefix << "For i=" << i << ", lclRow=" << lclRow
6267  << ", gblRow=" << gblRow << ", curOffset (= "
6268  << curOffset << ") + numEnt (= " << rowInfo.numEntries
6269  << ") > totalNumPackets (= " << totalNumPackets
6270  << ")." << endl;
6271  std::cerr << os.str();
6272  }
6273  Kokkos::atomic_add (&errCountView(), ONE);
6274  return;
6275  }
6276 
6277  const LO numEnt = static_cast<LO> (rowInfo.numEntries);
6278  if (this->isLocallyIndexed ()) {
6279  auto lclColInds = getLocalIndsViewHost(rowInfo);
6280  if (final) {
6281  for (LO k = 0; k < numEnt; ++k) {
6282  const LO lclColInd = lclColInds(k);
6283  const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
6284  // Pack it, even if it's wrong. Let the receiving
6285  // process deal with it. Otherwise, we'll miss out
6286  // on any correct data.
6287  exports_h(curOffset + k) = gblColInd;
6288  } // for each entry in the row
6289  } // final pass?
6290  exportsOffset = curOffset + numEnt;
6291  }
6292  else if (this->isGloballyIndexed ()) {
6293  auto gblColInds = getGlobalIndsViewHost(rowInfo);
6294  if (final) {
6295  for (LO k = 0; k < numEnt; ++k) {
6296  const GO gblColInd = gblColInds(k);
6297  // Pack it, even if it's wrong. Let the receiving
6298  // process deal with it. Otherwise, we'll miss out
6299  // on any correct data.
6300  exports_h(curOffset + k) = gblColInd;
6301  } // for each entry in the row
6302  } // final pass?
6303  exportsOffset = curOffset + numEnt;
6304  }
6305  // If neither globally nor locally indexed, then the graph
6306  // has no entries in this row (or indeed, in any row on this
6307  // process) to pack.
6308  });
6309 
6310  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6311  // (errCount != 0, std::logic_error, "Packing encountered "
6312  // "one or more errors! errCount = " << errCount
6313  // << ", totalNumPackets = " << totalNumPackets << ".");
6314 
6315  if (verbose) {
6316  std::ostringstream os;
6317  os << *prefix << "errCount=" << errCount << "; Done" << endl;
6318  std::cerr << os.str();
6319  }
6320  }
6321 
6322  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6323  void
6324  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
6325  unpackAndCombine
6326  (const Kokkos::DualView<const local_ordinal_type*,
6327  buffer_device_type>& importLIDs,
6328  Kokkos::DualView<packet_type*,
6329  buffer_device_type> imports,
6330  Kokkos::DualView<size_t*,
6331  buffer_device_type> numPacketsPerLID,
6332  const size_t /* constantNumPackets */,
6333  const CombineMode /* combineMode */ )
6334  {
6335  using Details::ProfilingRegion;
6336  using std::endl;
6337  using LO = local_ordinal_type;
6338  using GO = global_ordinal_type;
6339  const char tfecfFuncName[] = "unpackAndCombine";
6340 
6341  ProfilingRegion regionCGC("Tpetra::CrsGraph::unpackAndCombine");
6342  const bool verbose = verbose_;
6343 
6344  std::unique_ptr<std::string> prefix;
6345  if (verbose) {
6346  prefix = this->createPrefix("CrsGraph", tfecfFuncName);
6347  std::ostringstream os;
6348  os << *prefix << "Start" << endl;
6349  std::cerr << os.str ();
6350  }
6351  {
6352  auto padding = computeCrsPaddingForImports(
6353  importLIDs, imports, numPacketsPerLID, verbose);
6354  applyCrsPadding(*padding, verbose);
6355  if (verbose) {
6356  std::ostringstream os;
6357  os << *prefix << "Done computing & applying padding" << endl;
6358  std::cerr << os.str ();
6359  }
6360  }
6361 
6362  // FIXME (mfh 02 Apr 2012) REPLACE combine mode has a perfectly
6363  // reasonable meaning, whether or not the matrix is fill complete.
6364  // It's just more work to implement.
6365 
6366  // We are not checking the value of the CombineMode input
6367  // argument. For CrsGraph, we only support import/export
6368  // operations if fillComplete has not yet been called. Any
6369  // incoming column-indices are inserted into the target graph. In
6370  // this context, CombineMode values of ADD vs INSERT are
6371  // equivalent. What is the meaning of REPLACE for CrsGraph? If a
6372  // duplicate column-index is inserted, it will be compressed out
6373  // when fillComplete is called.
6374  //
6375  // Note: I think REPLACE means that an existing row is replaced by
6376  // the imported row, i.e., the existing indices are cleared. CGB,
6377  // 6/17/2010
6378 
6379  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6380  (importLIDs.extent (0) != numPacketsPerLID.extent (0),
6381  std::runtime_error, ": importLIDs.extent(0) = "
6382  << importLIDs.extent (0) << " != numPacketsPerLID.extent(0) = "
6383  << numPacketsPerLID.extent (0) << ".");
6384  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6385  (isFillComplete (), std::runtime_error,
6386  ": Import or Export operations are not allowed on a target "
6387  "CrsGraph that is fillComplete.");
6388 
6389  const size_t numImportLIDs(importLIDs.extent(0));
6390  if (numPacketsPerLID.need_sync_host()) {
6391  numPacketsPerLID.sync_host();
6392  }
6393  auto numPacketsPerLID_h = numPacketsPerLID.view_host();
6394  if (imports.need_sync_host()) {
6395  imports.sync_host();
6396  }
6397  auto imports_h = imports.view_host();
6398  TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
6399  auto importLIDs_h = importLIDs.view_host();
6400 
6401  // If we're inserting in local indices, let's pre-allocate
6402  Teuchos::Array<LO> lclColInds;
6403  if (isLocallyIndexed()) {
6404  if (verbose) {
6405  std::ostringstream os;
6406  os << *prefix << "Preallocate local indices scratch" << endl;
6407  std::cerr << os.str();
6408  }
6409  size_t maxNumInserts = 0;
6410  for (size_t i = 0; i < numImportLIDs; ++i) {
6411  maxNumInserts = std::max (maxNumInserts, numPacketsPerLID_h[i]);
6412  }
6413  if (verbose) {
6414  std::ostringstream os;
6415  os << *prefix << "Local indices scratch size: "
6416  << maxNumInserts << endl;
6417  std::cerr << os.str();
6418  }
6419  lclColInds.resize (maxNumInserts);
6420  }
6421  else {
6422  if (verbose) {
6423  std::ostringstream os;
6424  os << *prefix;
6425  if (isGloballyIndexed()) {
6426  os << "Graph is globally indexed";
6427  }
6428  else {
6429  os << "Graph is neither locally nor globally indexed";
6430  }
6431  os << endl;
6432  std::cerr << os.str();
6433  }
6434  }
6435 
6436  TEUCHOS_ASSERT( ! rowMap_.is_null() );
6437  const map_type& rowMap = *rowMap_;
6438 
6439  try {
6440  size_t importsOffset = 0;
6441  for (size_t i = 0; i < numImportLIDs; ++i) {
6442  if (verbose) {
6443  std::ostringstream os;
6444  os << *prefix << "i=" << i << ", numImportLIDs="
6445  << numImportLIDs << endl;
6446  std::cerr << os.str();
6447  }
6448  // We can only unpack into owned rows, since we only have
6449  // local row indices.
6450  const LO lclRow = importLIDs_h[i];
6451  const GO gblRow = rowMap.getGlobalElement(lclRow);
6452  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6453  (gblRow == Teuchos::OrdinalTraits<GO>::invalid(),
6454  std::logic_error, "importLIDs[i=" << i << "]="
6455  << lclRow << " is not in the row Map on the calling "
6456  "process.");
6457  const LO numEnt = numPacketsPerLID_h[i];
6458  const GO* const gblColInds = (numEnt == 0) ? nullptr :
6459  imports_h.data() + importsOffset;
6460  if (! isLocallyIndexed()) {
6461  insertGlobalIndicesFiltered(lclRow, gblColInds, numEnt);
6462  }
6463  else {
6464  // FIXME (mfh 09 Feb 2020) Now would be a good time to do
6465  // column Map filtering.
6466  for (LO j = 0; j < numEnt; j++) {
6467  lclColInds[j] = colMap_->getLocalElement(gblColInds[j]);
6468  }
6469  insertLocalIndices(lclRow, numEnt, lclColInds.data());
6470  }
6471  importsOffset += numEnt;
6472  }
6473  }
6474  catch (std::exception& e) {
6475  TEUCHOS_TEST_FOR_EXCEPTION
6476  (true, std::runtime_error,
6477  "Tpetra::CrsGraph::unpackAndCombine: Insert loop threw an "
6478  "exception: " << endl << e.what());
6479  }
6480 
6481  if (verbose) {
6482  std::ostringstream os;
6483  os << *prefix << "Done" << endl;
6484  std::cerr << os.str();
6485  }
6486  }
6487 
6488  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6489  void
6491  removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& newMap)
6492  {
6493  using Teuchos::Comm;
6494  using Teuchos::null;
6495  using Teuchos::ParameterList;
6496  using Teuchos::RCP;
6497 
6498  // We'll set all the state "transactionally," so that this method
6499  // satisfies the strong exception guarantee. This object's state
6500  // won't be modified until the end of this method.
6501  RCP<const map_type> rowMap, domainMap, rangeMap, colMap;
6502  RCP<import_type> importer;
6503  RCP<export_type> exporter;
6504 
6505  rowMap = newMap;
6506  RCP<const Comm<int> > newComm =
6507  (newMap.is_null ()) ? null : newMap->getComm ();
6508 
6509  if (! domainMap_.is_null ()) {
6510  if (domainMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6511  // Common case: original domain and row Maps are identical.
6512  // In that case, we need only replace the original domain Map
6513  // with the new Map. This ensures that the new domain and row
6514  // Maps _stay_ identical.
6515  domainMap = newMap;
6516  } else {
6517  domainMap = domainMap_->replaceCommWithSubset (newComm);
6518  }
6519  }
6520  if (! rangeMap_.is_null ()) {
6521  if (rangeMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6522  // Common case: original range and row Maps are identical. In
6523  // that case, we need only replace the original range Map with
6524  // the new Map. This ensures that the new range and row Maps
6525  // _stay_ identical.
6526  rangeMap = newMap;
6527  } else {
6528  rangeMap = rangeMap_->replaceCommWithSubset (newComm);
6529  }
6530  }
6531  if (! colMap_.is_null ()) {
6532  colMap = colMap_->replaceCommWithSubset (newComm);
6533  }
6534 
6535  // (Re)create the Export and / or Import if necessary.
6536  if (! newComm.is_null ()) {
6537  RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
6538  //
6539  // The operations below are collective on the new communicator.
6540  //
6541  // (Re)create the Export object if necessary. If I haven't
6542  // called fillComplete yet, I don't have a rangeMap, so I must
6543  // first check if the _original_ rangeMap is not null. Ditto
6544  // for the Import object and the domain Map.
6545  if (! rangeMap_.is_null () &&
6546  rangeMap != rowMap &&
6547  ! rangeMap->isSameAs (*rowMap)) {
6548  if (params.is_null () || ! params->isSublist ("Export")) {
6549  exporter = rcp (new export_type (rowMap, rangeMap));
6550  }
6551  else {
6552  RCP<ParameterList> exportSublist = sublist (params, "Export", true);
6553  exporter = rcp (new export_type (rowMap, rangeMap, exportSublist));
6554  }
6555  }
6556  // (Re)create the Import object if necessary.
6557  if (! domainMap_.is_null () &&
6558  domainMap != colMap &&
6559  ! domainMap->isSameAs (*colMap)) {
6560  if (params.is_null () || ! params->isSublist ("Import")) {
6561  importer = rcp (new import_type (domainMap, colMap));
6562  } else {
6563  RCP<ParameterList> importSublist = sublist (params, "Import", true);
6564  importer = rcp (new import_type (domainMap, colMap, importSublist));
6565  }
6566  }
6567  } // if newComm is not null
6568 
6569  // Defer side effects until the end. If no destructors throw
6570  // exceptions (they shouldn't anyway), then this method satisfies
6571  // the strong exception guarantee.
6572  exporter_ = exporter;
6573  importer_ = importer;
6574  rowMap_ = rowMap;
6575  // mfh 31 Mar 2013: DistObject's map_ is the row Map of a CrsGraph
6576  // or CrsMatrix. CrsGraph keeps a redundant pointer (rowMap_) to
6577  // the same object. We might want to get rid of this redundant
6578  // pointer sometime, but for now, we'll leave it alone and just
6579  // set map_ to the same object.
6580  this->map_ = rowMap;
6581  domainMap_ = domainMap;
6582  rangeMap_ = rangeMap;
6583  colMap_ = colMap;
6584  }
6585 
6586  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6587  void
6589  getLocalDiagOffsets (const Kokkos::View<size_t*, device_type, Kokkos::MemoryUnmanaged>& offsets) const
6590  {
6591  using std::endl;
6592  using LO = LocalOrdinal;
6593  using GO = GlobalOrdinal;
6594  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
6595  const bool verbose = verbose_;
6596 
6597  std::unique_ptr<std::string> prefix;
6598  if (verbose) {
6599  prefix = this->createPrefix("CrsGraph", "getLocalDiagOffsets");
6600  std::ostringstream os;
6601  os << *prefix << "offsets.extent(0)=" << offsets.extent(0)
6602  << endl;
6603  std::cerr << os.str();
6604  }
6605 
6606  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6607  (! hasColMap (), std::runtime_error, "The graph must have a column Map.");
6608  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
6609  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6610  (static_cast<LO> (offsets.extent (0)) < lclNumRows,
6611  std::invalid_argument, "offsets.extent(0) = " <<
6612  offsets.extent (0) << " < getNodeNumRows() = " << lclNumRows << ".");
6613 
6614  const map_type& rowMap = * (this->getRowMap ());
6615  const map_type& colMap = * (this->getColMap ());
6616 
6617  // We only use these in debug mode, but since debug mode is a
6618  // run-time option, they need to exist here. That's why we create
6619  // the vector with explicit size zero, to avoid overhead if debug
6620  // mode is off.
6621  bool allRowMapDiagEntriesInColMap = true;
6622  bool allDiagEntriesFound = true;
6623  bool allOffsetsCorrect = true;
6624  bool noOtherWeirdness = true;
6625  using wrong_offsets_type = std::vector<std::pair<LO, size_t> >;
6626  wrong_offsets_type wrongOffsets(0);
6627 
6628  // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
6629  // the subset of Map functionality that we need below.
6630  auto lclRowMap = rowMap.getLocalMap ();
6631  auto lclColMap = colMap.getLocalMap ();
6632 
6633  // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
6634  // setup, at least on the host. For CUDA, we have to use LocalMap
6635  // (that comes from each of the two Maps).
6636 
6637  const bool sorted = this->isSorted ();
6638  if (isFillComplete ()) {
6639  auto lclGraph = this->getLocalGraphDevice ();
6640  ::Tpetra::Details::getGraphDiagOffsets (offsets, lclRowMap, lclColMap,
6641  lclGraph.row_map,
6642  lclGraph.entries, sorted);
6643  }
6644  else {
6645  // NOTE (mfh 22 Feb 2017): We have to run this code on host,
6646  // since the graph is not fill complete. The previous version
6647  // of this code assumed UVM; this version does not.
6648  auto offsets_h = Kokkos::create_mirror_view (offsets);
6649 
6650  for (LO lclRowInd = 0; lclRowInd < lclNumRows; ++lclRowInd) {
6651  // Find the diagonal entry. Since the row Map and column Map
6652  // may differ, we have to compare global row and column
6653  // indices, not local.
6654  const GO gblRowInd = lclRowMap.getGlobalElement (lclRowInd);
6655  const GO gblColInd = gblRowInd;
6656  const LO lclColInd = lclColMap.getLocalElement (gblColInd);
6657 
6658  if (lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
6659  allRowMapDiagEntriesInColMap = false;
6660  offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
6661  }
6662  else {
6663  const RowInfo rowInfo = this->getRowInfo (lclRowInd);
6664  if (static_cast<LO> (rowInfo.localRow) == lclRowInd &&
6665  rowInfo.numEntries > 0) {
6666 
6667  auto colInds = this->getLocalIndsViewHost (rowInfo);
6668  const size_t hint = 0; // not needed for this algorithm
6669  const size_t offset =
6670  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
6671  lclColInd, hint, sorted);
6672  offsets_h(lclRowInd) = offset;
6673 
6674  if (debug_) {
6675  // Now that we have what we think is an offset, make sure
6676  // that it really does point to the diagonal entry. Offsets
6677  // are _relative_ to each row, not absolute (for the whole
6678  // (local) graph).
6679  typename local_inds_dualv_type::t_host::const_type lclColInds;
6680  try {
6681  lclColInds = this->getLocalIndsViewHost (rowInfo);
6682  }
6683  catch (...) {
6684  noOtherWeirdness = false;
6685  }
6686  // Don't continue with error checking if the above failed.
6687  if (noOtherWeirdness) {
6688  const size_t numEnt = lclColInds.extent (0);
6689  if (offset >= numEnt) {
6690  // Offsets are relative to each row, so this means that
6691  // the offset is out of bounds.
6692  allOffsetsCorrect = false;
6693  wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
6694  } else {
6695  const LO actualLclColInd = lclColInds(offset);
6696  const GO actualGblColInd = lclColMap.getGlobalElement (actualLclColInd);
6697  if (actualGblColInd != gblColInd) {
6698  allOffsetsCorrect = false;
6699  wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
6700  }
6701  }
6702  }
6703  } // debug_
6704  }
6705  else { // either row is empty, or something went wrong w/ getRowInfo()
6706  offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
6707  allDiagEntriesFound = false;
6708  }
6709  } // whether lclColInd is a valid local column index
6710  } // for each local row
6711 
6712  Kokkos::deep_copy (offsets, offsets_h);
6713  } // whether the graph is fill complete
6714 
6715  if (verbose && wrongOffsets.size () != 0) {
6716  std::ostringstream os;
6717  os << *prefix << "Wrong offsets: [";
6718  for (size_t k = 0; k < wrongOffsets.size (); ++k) {
6719  os << "(" << wrongOffsets[k].first << ","
6720  << wrongOffsets[k].second << ")";
6721  if (k + 1 < wrongOffsets.size ()) {
6722  os << ", ";
6723  }
6724  }
6725  os << "]" << endl;
6726  std::cerr << os.str();
6727  }
6728 
6729  if (debug_) {
6730  using Teuchos::reduceAll;
6731  using std::endl;
6732  Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
6733  const bool localSuccess =
6734  allRowMapDiagEntriesInColMap && allDiagEntriesFound && allOffsetsCorrect;
6735  const int numResults = 5;
6736  int lclResults[5];
6737  lclResults[0] = allRowMapDiagEntriesInColMap ? 1 : 0;
6738  lclResults[1] = allDiagEntriesFound ? 1 : 0;
6739  lclResults[2] = allOffsetsCorrect ? 1 : 0;
6740  lclResults[3] = noOtherWeirdness ? 1 : 0;
6741  // min-all-reduce will compute least rank of all the processes
6742  // that didn't succeed.
6743  lclResults[4] = ! localSuccess ? comm->getRank () : comm->getSize ();
6744 
6745  int gblResults[5];
6746  gblResults[0] = 0;
6747  gblResults[1] = 0;
6748  gblResults[2] = 0;
6749  gblResults[3] = 0;
6750  gblResults[4] = 0;
6751  reduceAll<int, int> (*comm, Teuchos::REDUCE_MIN,
6752  numResults, lclResults, gblResults);
6753 
6754  if (gblResults[0] != 1 || gblResults[1] != 1 || gblResults[2] != 1
6755  || gblResults[3] != 1) {
6756  std::ostringstream os; // build error message
6757  os << "Issue(s) that we noticed (on Process " << gblResults[4] << ", "
6758  "possibly among others): " << endl;
6759  if (gblResults[0] == 0) {
6760  os << " - The column Map does not contain at least one diagonal entry "
6761  "of the graph." << endl;
6762  }
6763  if (gblResults[1] == 0) {
6764  os << " - On one or more processes, some row does not contain a "
6765  "diagonal entry." << endl;
6766  }
6767  if (gblResults[2] == 0) {
6768  os << " - On one or more processes, some offsets are incorrect."
6769  << endl;
6770  }
6771  if (gblResults[3] == 0) {
6772  os << " - One or more processes had some other error."
6773  << endl;
6774  }
6775  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
6776  }
6777  } // debug_
6778  }
6779 
6780  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6781  void
6783  getLocalOffRankOffsets (offset_device_view_type& offsets) const
6784  {
6785  using std::endl;
6786  const char tfecfFuncName[] = "getLocalOffRankOffsets: ";
6787  const bool verbose = verbose_;
6788 
6789  std::unique_ptr<std::string> prefix;
6790  if (verbose) {
6791  prefix = this->createPrefix("CrsGraph", "getLocalOffRankOffsets");
6792  std::ostringstream os;
6793  os << *prefix << "offsets.extent(0)=" << offsets.extent(0)
6794  << endl;
6795  std::cerr << os.str();
6796  }
6797 
6798  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6799  (! hasColMap (), std::runtime_error, "The graph must have a column Map.");
6800  // Instead of throwing, we could also copy the rowPtr to k_offRankOffsets_.
6801 
6802  const size_t lclNumRows = this->getNodeNumRows ();
6803 
6804  if (haveLocalOffRankOffsets_ && k_offRankOffsets_.extent(0) == lclNumRows+1) {
6805  offsets = k_offRankOffsets_;
6806  return;
6807  }
6808  haveLocalOffRankOffsets_ = false;
6809  k_offRankOffsets_ = offset_device_view_type(Kokkos::ViewAllocateWithoutInitializing("offRankOffset"), lclNumRows+1);
6810  offsets = k_offRankOffsets_;
6811 
6812  const map_type& colMap = * (this->getColMap ());
6813  const map_type& domMap = * (this->getDomainMap ());
6814 
6815  // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
6816  // the subset of Map functionality that we need below.
6817  auto lclColMap = colMap.getLocalMap ();
6818  auto lclDomMap = domMap.getLocalMap ();
6819 
6820  // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
6821  // setup, at least on the host. For CUDA, we have to use LocalMap
6822  // (that comes from each of the two Maps).
6823 
6824  TEUCHOS_ASSERT(this->isSorted ());
6825  if (isFillComplete ()) {
6826  auto lclGraph = this->getLocalGraphDevice ();
6827  ::Tpetra::Details::getGraphOffRankOffsets (k_offRankOffsets_,
6828  lclColMap, lclDomMap,
6829  lclGraph);
6830  haveLocalOffRankOffsets_ = true;
6831  }
6832  }
6833 
6834  namespace { // (anonymous)
6835 
6836  // mfh 21 Jan 2016: This is useful for getLocalDiagOffsets (see
6837  // below). The point is to avoid the deep copy between the input
6838  // Teuchos::ArrayRCP and the internally used Kokkos::View. We
6839  // can't use UVM to avoid the deep copy with CUDA, because the
6840  // ArrayRCP is a host pointer, while the input to the graph's
6841  // getLocalDiagOffsets method is a device pointer. Assigning a
6842  // host pointer to a device pointer is incorrect unless the host
6843  // pointer points to host pinned memory. The goal is to get rid
6844  // of the Teuchos::ArrayRCP overload anyway, so we accept the deep
6845  // copy for backwards compatibility.
6846  //
6847  // We have to use template magic because
6848  // "staticGraph_->getLocalDiagOffsets(offsetsHosts)" won't compile
6849  // if device_type::memory_space is not Kokkos::HostSpace (as is
6850  // the case with CUDA).
6851 
6852  template<class DeviceType,
6853  const bool memSpaceIsHostSpace =
6854  std::is_same<typename DeviceType::memory_space,
6855  Kokkos::HostSpace>::value>
6856  struct HelpGetLocalDiagOffsets {};
6857 
6858  template<class DeviceType>
6859  struct HelpGetLocalDiagOffsets<DeviceType, true> {
6860  typedef DeviceType device_type;
6861  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6862  Kokkos::MemoryUnmanaged> device_offsets_type;
6863  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6864  Kokkos::MemoryUnmanaged> host_offsets_type;
6865 
6866  static device_offsets_type
6867  getDeviceOffsets (const host_offsets_type& hostOffsets)
6868  {
6869  // Host and device are the same; no need to allocate a
6870  // temporary device View.
6871  return hostOffsets;
6872  }
6873 
6874  static void
6875  copyBackIfNeeded (const host_offsets_type& /* hostOffsets */,
6876  const device_offsets_type& /* deviceOffsets */)
6877  { /* copy back not needed; host and device are the same */ }
6878  };
6879 
6880  template<class DeviceType>
6881  struct HelpGetLocalDiagOffsets<DeviceType, false> {
6882  typedef DeviceType device_type;
6883  // We have to do a deep copy, since host memory space != device
6884  // memory space. Thus, the device View is managed (we need to
6885  // allocate a temporary device View).
6886  typedef Kokkos::View<size_t*, device_type> device_offsets_type;
6887  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6888  Kokkos::MemoryUnmanaged> host_offsets_type;
6889 
6890  static device_offsets_type
6891  getDeviceOffsets (const host_offsets_type& hostOffsets)
6892  {
6893  // Host memory space != device memory space, so we must
6894  // allocate a temporary device View for the graph.
6895  return device_offsets_type ("offsets", hostOffsets.extent (0));
6896  }
6897 
6898  static void
6899  copyBackIfNeeded (const host_offsets_type& hostOffsets,
6900  const device_offsets_type& deviceOffsets)
6901  {
6902  Kokkos::deep_copy (hostOffsets, deviceOffsets);
6903  }
6904  };
6905  } // namespace (anonymous)
6906 
6907 
6908  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6909  void
6911  getLocalDiagOffsets (Teuchos::ArrayRCP<size_t>& offsets) const
6912  {
6913  typedef LocalOrdinal LO;
6914  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
6915  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6916  (! this->hasColMap (), std::runtime_error,
6917  "The graph does not yet have a column Map.");
6918  const LO myNumRows = static_cast<LO> (this->getNodeNumRows ());
6919  if (static_cast<LO> (offsets.size ()) != myNumRows) {
6920  // NOTE (mfh 21 Jan 2016) This means that the method does not
6921  // satisfy the strong exception guarantee (no side effects
6922  // unless successful).
6923  offsets.resize (myNumRows);
6924  }
6925 
6926  // mfh 21 Jan 2016: This method unfortunately takes a
6927  // Teuchos::ArrayRCP, which is host memory. The graph wants a
6928  // device pointer. We can't access host memory from the device;
6929  // that's the wrong direction for UVM. (It's the right direction
6930  // for inefficient host pinned memory, but we don't want to use
6931  // that here.) Thus, if device memory space != host memory space,
6932  // we allocate and use a temporary device View to get the offsets.
6933  // If the two spaces are equal, the template magic makes the deep
6934  // copy go away.
6935  typedef HelpGetLocalDiagOffsets<device_type> helper_type;
6936  typedef typename helper_type::host_offsets_type host_offsets_type;
6937  // Unmanaged host View that views the output array.
6938  host_offsets_type hostOffsets (offsets.getRawPtr (), myNumRows);
6939  // Allocate temp device View if host != device, else reuse host array.
6940  auto deviceOffsets = helper_type::getDeviceOffsets (hostOffsets);
6941  // NOT recursion; this calls the overload that takes a device View.
6942  this->getLocalDiagOffsets (deviceOffsets);
6943  helper_type::copyBackIfNeeded (hostOffsets, deviceOffsets);
6944  }
6945 
6946  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6947  bool
6949  supportsRowViews () const {
6950  return true;
6951  }
6952 
6953  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6954  void
6957  const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
6958  const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > & domainTransfer,
6959  const Teuchos::RCP<const map_type>& domainMap,
6960  const Teuchos::RCP<const map_type>& rangeMap,
6961  const Teuchos::RCP<Teuchos::ParameterList>& params) const
6962  {
6967  using Teuchos::ArrayRCP;
6968  using Teuchos::ArrayView;
6969  using Teuchos::Comm;
6970  using Teuchos::ParameterList;
6971  using Teuchos::rcp;
6972  using Teuchos::RCP;
6973 #ifdef HAVE_TPETRA_MMM_TIMINGS
6974  using std::string;
6975  using Teuchos::TimeMonitor;
6976 #endif
6977 
6978  using LO = LocalOrdinal;
6979  using GO = GlobalOrdinal;
6980  using NT = node_type;
6981  using this_type = CrsGraph<LO, GO, NT>;
6982  using ivector_type = Vector<int, LO, GO, NT>;
6983 
6984  const char* prefix = "Tpetra::CrsGraph::transferAndFillComplete: ";
6985 
6986 #ifdef HAVE_TPETRA_MMM_TIMINGS
6987  string label;
6988  if(!params.is_null()) label = params->get("Timer Label", label);
6989  string prefix2 = string("Tpetra ")+ label + std::string(": CrsGraph TAFC ");
6990  RCP<TimeMonitor> MM =
6991  rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Pack-1"))));
6992 #endif
6993 
6994  // Make sure that the input argument rowTransfer is either an
6995  // Import or an Export. Import and Export are the only two
6996  // subclasses of Transfer that we defined, but users might
6997  // (unwisely, for now at least) decide to implement their own
6998  // subclasses. Exclude this possibility.
6999  const import_type* xferAsImport = dynamic_cast<const import_type*>(&rowTransfer);
7000  const export_type* xferAsExport = dynamic_cast<const export_type*>(&rowTransfer);
7001  TEUCHOS_TEST_FOR_EXCEPTION(
7002  xferAsImport == nullptr && xferAsExport == nullptr, std::invalid_argument,
7003  prefix << "The 'rowTransfer' input argument must be either an Import or "
7004  "an Export, and its template parameters must match the corresponding "
7005  "template parameters of the CrsGraph.");
7006 
7007  // Make sure that the input argument domainTransfer is either an
7008  // Import or an Export. Import and Export are the only two
7009  // subclasses of Transfer that we defined, but users might
7010  // (unwisely, for now at least) decide to implement their own
7011  // subclasses. Exclude this possibility.
7012  Teuchos::RCP<const import_type> xferDomainAsImport =
7013  Teuchos::rcp_dynamic_cast<const import_type>(domainTransfer);
7014  Teuchos::RCP<const export_type> xferDomainAsExport =
7015  Teuchos::rcp_dynamic_cast<const export_type>(domainTransfer);
7016 
7017  if(! domainTransfer.is_null()) {
7018 
7019  TEUCHOS_TEST_FOR_EXCEPTION(
7020  (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
7021  prefix << "The 'domainTransfer' input argument must be either an "
7022  "Import or an Export, and its template parameters must match the "
7023  "corresponding template parameters of the CrsGraph.");
7024 
7025  TEUCHOS_TEST_FOR_EXCEPTION(
7026  ( xferAsImport != nullptr || ! xferDomainAsImport.is_null() ) &&
7027  (( xferAsImport != nullptr && xferDomainAsImport.is_null() ) ||
7028  ( xferAsImport == nullptr && ! xferDomainAsImport.is_null() )), std::invalid_argument,
7029  prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
7030  "must be of the same type (either Import or Export).");
7031 
7032  TEUCHOS_TEST_FOR_EXCEPTION(
7033  ( xferAsExport != nullptr || ! xferDomainAsExport.is_null() ) &&
7034  (( xferAsExport != nullptr && xferDomainAsExport.is_null() ) ||
7035  ( xferAsExport == nullptr && ! xferDomainAsExport.is_null() )), std::invalid_argument,
7036  prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
7037  "must be of the same type (either Import or Export).");
7038 
7039  } // domainTransfer != null
7040 
7041 
7042  // FIXME (mfh 15 May 2014) Wouldn't communication still be needed,
7043  // if the source Map is not distributed but the target Map is?
7044  const bool communication_needed = rowTransfer.getSourceMap()->isDistributed();
7045 
7046  //
7047  // Get the caller's parameters
7048  //
7049 
7050  bool reverseMode = false; // Are we in reverse mode?
7051  bool restrictComm = false; // Do we need to restrict the communicator?
7052  RCP<ParameterList> graphparams; // parameters for the destination graph
7053  if (! params.is_null()) {
7054  reverseMode = params->get("Reverse Mode", reverseMode);
7055  restrictComm = params->get("Restrict Communicator", restrictComm);
7056  graphparams = sublist(params, "CrsGraph");
7057  }
7058 
7059  // Get the new domain and range Maps. We need some of them for error
7060  // checking, now that we have the reverseMode parameter.
7061  RCP<const map_type> MyRowMap = reverseMode ?
7062  rowTransfer.getSourceMap() : rowTransfer.getTargetMap();
7063  RCP<const map_type> MyColMap; // create this below
7064  RCP<const map_type> MyDomainMap = ! domainMap.is_null() ? domainMap : getDomainMap();
7065  RCP<const map_type> MyRangeMap = ! rangeMap.is_null() ? rangeMap : getRangeMap();
7066  RCP<const map_type> BaseRowMap = MyRowMap;
7067  RCP<const map_type> BaseDomainMap = MyDomainMap;
7068 
7069  // If the user gave us a nonnull destGraph, then check whether it's
7070  // "pristine." That means that it has no entries.
7071  //
7072  // FIXME (mfh 15 May 2014) If this is not true on all processes,
7073  // then this exception test may hang. It would be better to
7074  // forward an error flag to the next communication phase.
7075  if (! destGraph.is_null()) {
7076  // FIXME (mfh 15 May 2014): The Epetra idiom for checking
7077  // whether a graph or matrix has no entries on the calling
7078  // process, is that it is neither locally nor globally indexed.
7079  // This may change eventually with the Kokkos refactor version
7080  // of Tpetra, so it would be better just to check the quantity
7081  // of interest directly. Note that with the Kokkos refactor
7082  // version of Tpetra, asking for the total number of entries in
7083  // a graph or matrix that is not fill complete might require
7084  // computation (kernel launch), since it is not thread scalable
7085  // to update a count every time an entry is inserted.
7086  const bool NewFlag =
7087  ! destGraph->isLocallyIndexed() && ! destGraph->isGloballyIndexed();
7088  TEUCHOS_TEST_FOR_EXCEPTION(! NewFlag, std::invalid_argument,
7089  prefix << "The input argument 'destGraph' is only allowed to be nonnull, "
7090  "if its graph is empty (neither locally nor globally indexed).");
7091 
7092  // FIXME (mfh 15 May 2014) At some point, we want to change
7093  // graphs and matrices so that their DistObject Map
7094  // (this->getMap()) may differ from their row Map. This will
7095  // make redistribution for 2-D distributions more efficient. I
7096  // hesitate to change this check, because I'm not sure how much
7097  // the code here depends on getMap() and getRowMap() being the
7098  // same.
7099  TEUCHOS_TEST_FOR_EXCEPTION(
7100  ! destGraph->getRowMap()->isSameAs(*MyRowMap), std::invalid_argument,
7101  prefix << "The (row) Map of the input argument 'destGraph' is not the "
7102  "same as the (row) Map specified by the input argument 'rowTransfer'.");
7103 
7104  TEUCHOS_TEST_FOR_EXCEPTION(
7105  ! destGraph->checkSizes(*this), std::invalid_argument,
7106  prefix << "You provided a nonnull destination graph, but checkSizes() "
7107  "indicates that it is not a legal legal target for redistribution from "
7108  "the source graph (*this). This may mean that they do not have the "
7109  "same dimensions.");
7110  }
7111 
7112  // If forward mode (the default), then *this's (row) Map must be
7113  // the same as the source Map of the Transfer. If reverse mode,
7114  // then *this's (row) Map must be the same as the target Map of
7115  // the Transfer.
7116  //
7117  // FIXME (mfh 15 May 2014) At some point, we want to change graphs
7118  // and matrices so that their DistObject Map (this->getMap()) may
7119  // differ from their row Map. This will make redistribution for
7120  // 2-D distributions more efficient. I hesitate to change this
7121  // check, because I'm not sure how much the code here depends on
7122  // getMap() and getRowMap() being the same.
7123  TEUCHOS_TEST_FOR_EXCEPTION(
7124  ! (reverseMode || getRowMap()->isSameAs(*rowTransfer.getSourceMap())),
7125  std::invalid_argument, prefix <<
7126  "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
7127 
7128  TEUCHOS_TEST_FOR_EXCEPTION(
7129  ! (! reverseMode || getRowMap()->isSameAs(*rowTransfer.getTargetMap())),
7130  std::invalid_argument, prefix <<
7131  "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
7132 
7133  // checks for domainTransfer
7134  TEUCHOS_TEST_FOR_EXCEPTION(
7135  ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
7136  std::invalid_argument,
7137  prefix << "The target map of the 'domainTransfer' input argument must be "
7138  "the same as the rebalanced domain map 'domainMap'");
7139 
7140  TEUCHOS_TEST_FOR_EXCEPTION(
7141  ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
7142  std::invalid_argument,
7143  prefix << "The source map of the 'domainTransfer' input argument must be "
7144  "the same as the rebalanced domain map 'domainMap'");
7145 
7146  // The basic algorithm here is:
7147  //
7148  // 1. Call the moral equivalent of "Distor.do" to handle the import.
7149  // 2. Copy all the Imported and Copy/Permuted data into the raw
7150  // CrsGraph pointers, still using GIDs.
7151  // 3. Call an optimized version of MakeColMap that avoids the
7152  // Directory lookups (since the importer knows who owns all the
7153  // GIDs) AND reindexes to LIDs.
7154  // 4. Call expertStaticFillComplete()
7155 
7156  // Get information from the Importer
7157  const size_t NumSameIDs = rowTransfer.getNumSameIDs();
7158  ArrayView<const LO> ExportLIDs = reverseMode ?
7159  rowTransfer.getRemoteLIDs() : rowTransfer.getExportLIDs();
7160  ArrayView<const LO> RemoteLIDs = reverseMode ?
7161  rowTransfer.getExportLIDs() : rowTransfer.getRemoteLIDs();
7162  ArrayView<const LO> PermuteToLIDs = reverseMode ?
7163  rowTransfer.getPermuteFromLIDs() : rowTransfer.getPermuteToLIDs();
7164  ArrayView<const LO> PermuteFromLIDs = reverseMode ?
7165  rowTransfer.getPermuteToLIDs() : rowTransfer.getPermuteFromLIDs();
7166  Distributor& Distor = rowTransfer.getDistributor();
7167 
7168  // Owning PIDs
7169  Teuchos::Array<int> SourcePids;
7170  Teuchos::Array<int> TargetPids;
7171  int MyPID = getComm()->getRank();
7172 
7173  // Temp variables for sub-communicators
7174  RCP<const map_type> ReducedRowMap, ReducedColMap,
7175  ReducedDomainMap, ReducedRangeMap;
7176  RCP<const Comm<int> > ReducedComm;
7177 
7178  // If the user gave us a null destGraph, then construct the new
7179  // destination graph. We will replace its column Map later.
7180  if (destGraph.is_null()) {
7181  destGraph = rcp(new this_type(MyRowMap, 0, StaticProfile, graphparams));
7182  }
7183 
7184  /***************************************************/
7185  /***** 1) First communicator restriction phase ****/
7186  /***************************************************/
7187  if (restrictComm) {
7188  ReducedRowMap = MyRowMap->removeEmptyProcesses();
7189  ReducedComm = ReducedRowMap.is_null() ?
7190  Teuchos::null :
7191  ReducedRowMap->getComm();
7192  destGraph->removeEmptyProcessesInPlace(ReducedRowMap);
7193 
7194  ReducedDomainMap = MyRowMap.getRawPtr() == MyDomainMap.getRawPtr() ?
7195  ReducedRowMap :
7196  MyDomainMap->replaceCommWithSubset(ReducedComm);
7197  ReducedRangeMap = MyRowMap.getRawPtr() == MyRangeMap.getRawPtr() ?
7198  ReducedRowMap :
7199  MyRangeMap->replaceCommWithSubset(ReducedComm);
7200 
7201  // Reset the "my" maps
7202  MyRowMap = ReducedRowMap;
7203  MyDomainMap = ReducedDomainMap;
7204  MyRangeMap = ReducedRangeMap;
7205 
7206  // Update my PID, if we've restricted the communicator
7207  if (! ReducedComm.is_null()) {
7208  MyPID = ReducedComm->getRank();
7209  }
7210  else {
7211  MyPID = -2; // For debugging
7212  }
7213  }
7214  else {
7215  ReducedComm = MyRowMap->getComm();
7216  }
7217 
7218  /***************************************************/
7219  /***** 2) From Tpera::DistObject::doTransfer() ****/
7220  /***************************************************/
7221 #ifdef HAVE_TPETRA_MMM_TIMINGS
7222  MM = Teuchos::null;
7223  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("ImportSetup"))));
7224 #endif
7225  // Get the owning PIDs
7226  RCP<const import_type> MyImporter = getImporter();
7227 
7228  // check whether domain maps of source graph and base domain map is the same
7229  bool bSameDomainMap = BaseDomainMap->isSameAs(*getDomainMap());
7230 
7231  if (! restrictComm && ! MyImporter.is_null() && bSameDomainMap ) {
7232  // Same domain map as source graph
7233  //
7234  // NOTE: This won't work for restrictComm (because the Import
7235  // doesn't know the restricted PIDs), though writing an
7236  // optimized version for that case would be easy (Import an
7237  // IntVector of the new PIDs). Might want to add this later.
7238  Import_Util::getPids(*MyImporter, SourcePids, false);
7239  }
7240  else if (restrictComm && ! MyImporter.is_null() && bSameDomainMap) {
7241  // Same domain map as source graph (restricted communicator)
7242  // We need one import from the domain to the column map
7243  ivector_type SourceDomain_pids(getDomainMap(),true);
7244  ivector_type SourceCol_pids(getColMap());
7245  // SourceDomain_pids contains the restricted pids
7246  SourceDomain_pids.putScalar(MyPID);
7247 
7248  SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
7249  SourcePids.resize(getColMap()->getNodeNumElements());
7250  SourceCol_pids.get1dCopy(SourcePids());
7251  }
7252  else if (MyImporter.is_null() && bSameDomainMap) {
7253  // Graph has no off-process entries
7254  SourcePids.resize(getColMap()->getNodeNumElements());
7255  SourcePids.assign(getColMap()->getNodeNumElements(), MyPID);
7256  }
7257  else if ( ! MyImporter.is_null() &&
7258  ! domainTransfer.is_null() ) {
7259  // general implementation for rectangular matrices with
7260  // domain map different than SourceGraph domain map.
7261  // User has to provide a DomainTransfer object. We need
7262  // to communications (import/export)
7263 
7264  // TargetDomain_pids lives on the rebalanced new domain map
7265  ivector_type TargetDomain_pids(domainMap);
7266  TargetDomain_pids.putScalar(MyPID);
7267 
7268  // SourceDomain_pids lives on the non-rebalanced old domain map
7269  ivector_type SourceDomain_pids(getDomainMap());
7270 
7271  // SourceCol_pids lives on the non-rebalanced old column map
7272  ivector_type SourceCol_pids(getColMap());
7273 
7274  if (! reverseMode && ! xferDomainAsImport.is_null() ) {
7275  SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsImport, INSERT);
7276  }
7277  else if (reverseMode && ! xferDomainAsExport.is_null() ) {
7278  SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsExport, INSERT);
7279  }
7280  else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
7281  SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsExport, INSERT);
7282  }
7283  else if (reverseMode && ! xferDomainAsImport.is_null() ) {
7284  SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsImport, INSERT);
7285  }
7286  else {
7287  TEUCHOS_TEST_FOR_EXCEPTION(
7288  true, std::logic_error,
7289  prefix << "Should never get here! Please report this bug to a Tpetra developer.");
7290  }
7291  SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
7292  SourcePids.resize(getColMap()->getNodeNumElements());
7293  SourceCol_pids.get1dCopy(SourcePids());
7294  }
7295  else if (BaseDomainMap->isSameAs(*BaseRowMap) &&
7296  getDomainMap()->isSameAs(*getRowMap())) {
7297  // We can use the rowTransfer + SourceGraph's Import to find out who owns what.
7298  ivector_type TargetRow_pids(domainMap);
7299  ivector_type SourceRow_pids(getRowMap());
7300  ivector_type SourceCol_pids(getColMap());
7301 
7302  TargetRow_pids.putScalar(MyPID);
7303  if (! reverseMode && xferAsImport != nullptr) {
7304  SourceRow_pids.doExport(TargetRow_pids, *xferAsImport, INSERT);
7305  }
7306  else if (reverseMode && xferAsExport != nullptr) {
7307  SourceRow_pids.doExport(TargetRow_pids, *xferAsExport, INSERT);
7308  }
7309  else if (! reverseMode && xferAsExport != nullptr) {
7310  SourceRow_pids.doImport(TargetRow_pids, *xferAsExport, INSERT);
7311  }
7312  else if (reverseMode && xferAsImport != nullptr) {
7313  SourceRow_pids.doImport(TargetRow_pids, *xferAsImport, INSERT);
7314  }
7315  else {
7316  TEUCHOS_TEST_FOR_EXCEPTION(
7317  true, std::logic_error,
7318  prefix << "Should never get here! Please report this bug to a Tpetra developer.");
7319  }
7320  SourceCol_pids.doImport(SourceRow_pids, *MyImporter, INSERT);
7321  SourcePids.resize(getColMap()->getNodeNumElements());
7322  SourceCol_pids.get1dCopy(SourcePids());
7323  }
7324  else {
7325  TEUCHOS_TEST_FOR_EXCEPTION(
7326  true, std::invalid_argument,
7327  prefix << "This method only allows either domainMap == getDomainMap(), "
7328  "or (domainMap == rowTransfer.getTargetMap() and getDomainMap() == getRowMap()).");
7329  }
7330 
7331  // Tpetra-specific stuff
7332  size_t constantNumPackets = destGraph->constantNumberOfPackets();
7333  if (constantNumPackets == 0) {
7334  destGraph->reallocArraysForNumPacketsPerLid(ExportLIDs.size(),
7335  RemoteLIDs.size());
7336  }
7337  else {
7338  // There are a constant number of packets per element. We
7339  // already know (from the number of "remote" (incoming)
7340  // elements) how many incoming elements we expect, so we can
7341  // resize the buffer accordingly.
7342  const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
7343  destGraph->reallocImportsIfNeeded(rbufLen, false, nullptr);
7344  }
7345 
7346  {
7347  // packAndPrepare* methods modify numExportPacketsPerLID_.
7348  destGraph->numExportPacketsPerLID_.modify_host();
7349  Teuchos::ArrayView<size_t> numExportPacketsPerLID =
7350  getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7351 
7352  // Pack & Prepare w/ owning PIDs
7353  packCrsGraphWithOwningPIDs(*this, destGraph->exports_,
7354  numExportPacketsPerLID, ExportLIDs,
7355  SourcePids, constantNumPackets);
7356  }
7357 
7358  // Do the exchange of remote data.
7359 #ifdef HAVE_TPETRA_MMM_TIMINGS
7360  MM = Teuchos::null;
7361  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Transfer"))));
7362 #endif
7363 
7364  if (communication_needed) {
7365  if (reverseMode) {
7366  if (constantNumPackets == 0) { // variable number of packets per LID
7367  // Make sure that host has the latest version, since we're
7368  // using the version on host. If host has the latest
7369  // version, syncing to host does nothing.
7370  destGraph->numExportPacketsPerLID_.sync_host();
7371  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7372  getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7373  destGraph->numImportPacketsPerLID_.sync_host();
7374  Teuchos::ArrayView<size_t> numImportPacketsPerLID =
7375  getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7376  Distor.doReversePostsAndWaits(numExportPacketsPerLID, 1,
7377  numImportPacketsPerLID);
7378  size_t totalImportPackets = 0;
7379  for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
7380  totalImportPackets += numImportPacketsPerLID[i];
7381  }
7382 
7383  // Reallocation MUST go before setting the modified flag,
7384  // because it may clear out the flags.
7385  destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
7386  destGraph->imports_.modify_host();
7387  Teuchos::ArrayView<packet_type> hostImports =
7388  getArrayViewFromDualView(destGraph->imports_);
7389  // This is a legacy host pack/unpack path, so use the host
7390  // version of exports_.
7391  destGraph->exports_.sync_host();
7392  Teuchos::ArrayView<const packet_type> hostExports =
7393  getArrayViewFromDualView(destGraph->exports_);
7394  Distor.doReversePostsAndWaits(hostExports,
7395  numExportPacketsPerLID,
7396  hostImports,
7397  numImportPacketsPerLID);
7398  }
7399  else { // constant number of packets per LI
7400  destGraph->imports_.modify_host();
7401  Teuchos::ArrayView<packet_type> hostImports =
7402  getArrayViewFromDualView(destGraph->imports_);
7403  // This is a legacy host pack/unpack path, so use the host
7404  // version of exports_.
7405  destGraph->exports_.sync_host();
7406  Teuchos::ArrayView<const packet_type> hostExports =
7407  getArrayViewFromDualView(destGraph->exports_);
7408  Distor.doReversePostsAndWaits(hostExports,
7409  constantNumPackets,
7410  hostImports);
7411  }
7412  }
7413  else { // forward mode (the default)
7414  if (constantNumPackets == 0) { // variable number of packets per LID
7415  // Make sure that host has the latest version, since we're
7416  // using the version on host. If host has the latest
7417  // version, syncing to host does nothing.
7418  destGraph->numExportPacketsPerLID_.sync_host();
7419  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7420  getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7421  destGraph->numImportPacketsPerLID_.sync_host();
7422  Teuchos::ArrayView<size_t> numImportPacketsPerLID =
7423  getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7424  Distor.doPostsAndWaits(numExportPacketsPerLID, 1,
7425  numImportPacketsPerLID);
7426  size_t totalImportPackets = 0;
7427  for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
7428  totalImportPackets += numImportPacketsPerLID[i];
7429  }
7430 
7431  // Reallocation MUST go before setting the modified flag,
7432  // because it may clear out the flags.
7433  destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
7434  destGraph->imports_.modify_host();
7435  Teuchos::ArrayView<packet_type> hostImports =
7436  getArrayViewFromDualView(destGraph->imports_);
7437  // This is a legacy host pack/unpack path, so use the host
7438  // version of exports_.
7439  destGraph->exports_.sync_host();
7440  Teuchos::ArrayView<const packet_type> hostExports =
7441  getArrayViewFromDualView(destGraph->exports_);
7442  Distor.doPostsAndWaits(hostExports,
7443  numExportPacketsPerLID,
7444  hostImports,
7445  numImportPacketsPerLID);
7446  }
7447  else { // constant number of packets per LID
7448  destGraph->imports_.modify_host();
7449  Teuchos::ArrayView<packet_type> hostImports =
7450  getArrayViewFromDualView(destGraph->imports_);
7451  // This is a legacy host pack/unpack path, so use the host
7452  // version of exports_.
7453  destGraph->exports_.sync_host();
7454  Teuchos::ArrayView<const packet_type> hostExports =
7455  getArrayViewFromDualView(destGraph->exports_);
7456  Distor.doPostsAndWaits(hostExports,
7457  constantNumPackets,
7458  hostImports);
7459  }
7460  }
7461  }
7462 
7463  /*********************************************************************/
7464  /**** 3) Copy all of the Same/Permute/Remote data into CSR_arrays ****/
7465  /*********************************************************************/
7466 
7467 #ifdef HAVE_TPETRA_MMM_TIMINGS
7468  MM = Teuchos::null;
7469  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Unpack-1"))));
7470 #endif
7471 
7472  // Backwards compatibility measure. We'll use this again below.
7473  destGraph->numImportPacketsPerLID_.sync_host();
7474  Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
7475  getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7476  destGraph->imports_.sync_host();
7477  Teuchos::ArrayView<const packet_type> hostImports =
7478  getArrayViewFromDualView(destGraph->imports_);
7479  size_t mynnz =
7480  unpackAndCombineWithOwningPIDsCount(*this, RemoteLIDs, hostImports,
7481  numImportPacketsPerLID,
7482  constantNumPackets, INSERT,
7483  NumSameIDs, PermuteToLIDs, PermuteFromLIDs);
7484  size_t N = BaseRowMap->getNodeNumElements();
7485 
7486  // Allocations
7487  ArrayRCP<size_t> CSR_rowptr(N+1);
7488  ArrayRCP<GO> CSR_colind_GID;
7489  ArrayRCP<LO> CSR_colind_LID;
7490  CSR_colind_GID.resize(mynnz);
7491 
7492  // If LO and GO are the same, we can reuse memory when
7493  // converting the column indices from global to local indices.
7494  if (typeid(LO) == typeid(GO)) {
7495  CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO>(CSR_colind_GID);
7496  }
7497  else {
7498  CSR_colind_LID.resize(mynnz);
7499  }
7500 
7501  // FIXME (mfh 15 May 2014) Why can't we abstract this out as an
7502  // unpackAndCombine method on a "CrsArrays" object? This passing
7503  // in a huge list of arrays is icky. Can't we have a bit of an
7504  // abstraction? Implementing a concrete DistObject subclass only
7505  // takes five methods.
7506  unpackAndCombineIntoCrsArrays(*this, RemoteLIDs, hostImports,
7507  numImportPacketsPerLID, constantNumPackets,
7508  INSERT, NumSameIDs, PermuteToLIDs,
7509  PermuteFromLIDs, N, mynnz, MyPID,
7510  CSR_rowptr(), CSR_colind_GID(),
7511  SourcePids(), TargetPids);
7512 
7513  /**************************************************************/
7514  /**** 4) Call Optimized MakeColMap w/ no Directory Lookups ****/
7515  /**************************************************************/
7516 #ifdef HAVE_TPETRA_MMM_TIMINGS
7517  MM = Teuchos::null;
7518  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Unpack-2"))));
7519 #endif
7520  // Call an optimized version of makeColMap that avoids the
7521  // Directory lookups (since the Import object knows who owns all
7522  // the GIDs).
7523  Teuchos::Array<int> RemotePids;
7524  Import_Util::lowCommunicationMakeColMapAndReindex(CSR_rowptr(),
7525  CSR_colind_LID(),
7526  CSR_colind_GID(),
7527  BaseDomainMap,
7528  TargetPids, RemotePids,
7529  MyColMap);
7530 
7531  /*******************************************************/
7532  /**** 4) Second communicator restriction phase ****/
7533  /*******************************************************/
7534  if (restrictComm) {
7535  ReducedColMap = (MyRowMap.getRawPtr() == MyColMap.getRawPtr()) ?
7536  ReducedRowMap :
7537  MyColMap->replaceCommWithSubset(ReducedComm);
7538  MyColMap = ReducedColMap; // Reset the "my" maps
7539  }
7540 
7541  // Replace the col map
7542  destGraph->replaceColMap(MyColMap);
7543 
7544  // Short circuit if the processor is no longer in the communicator
7545  //
7546  // NOTE: Epetra replaces modifies all "removed" processes so they
7547  // have a dummy (serial) Map that doesn't touch the original
7548  // communicator. Duplicating that here might be a good idea.
7549  if (ReducedComm.is_null()) {
7550  return;
7551  }
7552 
7553  /***************************************************/
7554  /**** 5) Sort ****/
7555  /***************************************************/
7556  if ((! reverseMode && xferAsImport != nullptr) ||
7557  (reverseMode && xferAsExport != nullptr)) {
7558  Import_Util::sortCrsEntries(CSR_rowptr(),
7559  CSR_colind_LID());
7560  }
7561  else if ((! reverseMode && xferAsExport != nullptr) ||
7562  (reverseMode && xferAsImport != nullptr)) {
7563  Import_Util::sortAndMergeCrsEntries(CSR_rowptr(),
7564  CSR_colind_LID());
7565  if (CSR_rowptr[N] != mynnz) {
7566  CSR_colind_LID.resize(CSR_rowptr[N]);
7567  }
7568  }
7569  else {
7570  TEUCHOS_TEST_FOR_EXCEPTION(
7571  true, std::logic_error,
7572  prefix << "Should never get here! Please report this bug to a Tpetra developer.");
7573  }
7574  /***************************************************/
7575  /**** 6) Reset the colmap and the arrays ****/
7576  /***************************************************/
7577 
7578  // Call constructor for the new graph (restricted as needed)
7579  //
7580  destGraph->setAllIndices(CSR_rowptr, CSR_colind_LID);
7581 
7582  /***************************************************/
7583  /**** 7) Build Importer & Call ESFC ****/
7584  /***************************************************/
7585  // Pre-build the importer using the existing PIDs
7586  Teuchos::ParameterList esfc_params;
7587 #ifdef HAVE_TPETRA_MMM_TIMINGS
7588  MM = Teuchos::null;
7589  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("CreateImporter"))));
7590 #endif
7591  RCP<import_type> MyImport = rcp(new import_type(MyDomainMap, MyColMap, RemotePids));
7592 #ifdef HAVE_TPETRA_MMM_TIMINGS
7593  MM = Teuchos::null;
7594  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("ESFC"))));
7595 
7596  esfc_params.set("Timer Label",prefix + std::string("TAFC"));
7597 #endif
7598  if(!params.is_null())
7599  esfc_params.set("compute global constants",params->get("compute global constants",true));
7600 
7601  destGraph->expertStaticFillComplete(MyDomainMap, MyRangeMap,
7602  MyImport, Teuchos::null, rcp(&esfc_params,false));
7603 
7604  }
7605 
7606  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7607  void
7610  const import_type& importer,
7611  const Teuchos::RCP<const map_type>& domainMap,
7612  const Teuchos::RCP<const map_type>& rangeMap,
7613  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7614  {
7615  transferAndFillComplete(destGraph, importer, Teuchos::null, domainMap, rangeMap, params);
7616  }
7617 
7618  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7619  void
7622  const import_type& rowImporter,
7623  const import_type& domainImporter,
7624  const Teuchos::RCP<const map_type>& domainMap,
7625  const Teuchos::RCP<const map_type>& rangeMap,
7626  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7627  {
7628  transferAndFillComplete(destGraph, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
7629  }
7630 
7631  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7632  void
7635  const export_type& exporter,
7636  const Teuchos::RCP<const map_type>& domainMap,
7637  const Teuchos::RCP<const map_type>& rangeMap,
7638  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7639  {
7640  transferAndFillComplete(destGraph, exporter, Teuchos::null, domainMap, rangeMap, params);
7641  }
7642 
7643  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7644  void
7647  const export_type& rowExporter,
7648  const export_type& domainExporter,
7649  const Teuchos::RCP<const map_type>& domainMap,
7650  const Teuchos::RCP<const map_type>& rangeMap,
7651  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7652  {
7653  transferAndFillComplete(destGraph, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
7654  }
7655 
7656 
7657  template<class LocalOrdinal, class GlobalOrdinal, class Node>
7658  void
7661  {
7662  std::swap(graph.need_sync_host_uvm_access, this->need_sync_host_uvm_access);
7663 
7664  std::swap(graph.rowMap_, this->rowMap_);
7665  std::swap(graph.colMap_, this->colMap_);
7666  std::swap(graph.rangeMap_, this->rangeMap_);
7667  std::swap(graph.domainMap_, this->domainMap_);
7668 
7669  std::swap(graph.importer_, this->importer_);
7670  std::swap(graph.exporter_, this->exporter_);
7671 
7672  std::swap(graph.rowPtrsPacked_dev_, this->rowPtrsPacked_dev_);
7673  std::swap(graph.rowPtrsPacked_host_, this->rowPtrsPacked_host_);
7674 
7675  std::swap(graph.nodeMaxNumRowEntries_, this->nodeMaxNumRowEntries_);
7676 
7677  std::swap(graph.globalNumEntries_, this->globalNumEntries_);
7678  std::swap(graph.globalMaxNumRowEntries_, this->globalMaxNumRowEntries_);
7679 
7680  std::swap(graph.numAllocForAllRows_, this->numAllocForAllRows_);
7681 
7682  std::swap(graph.rowPtrsUnpacked_dev_, this->rowPtrsUnpacked_dev_);
7683  std::swap(graph.rowPtrsUnpacked_host_, this->rowPtrsUnpacked_host_);
7684  std::swap(graph.k_offRankOffsets_, this->k_offRankOffsets_);
7685 
7686  std::swap(graph.lclIndsUnpacked_wdv, this->lclIndsUnpacked_wdv);
7687  std::swap(graph.gblInds_wdv, this->gblInds_wdv);
7688  std::swap(graph.lclIndsPacked_wdv, this->lclIndsPacked_wdv);
7689 
7690  std::swap(graph.storageStatus_, this->storageStatus_);
7691 
7692  std::swap(graph.indicesAreAllocated_, this->indicesAreAllocated_);
7693  std::swap(graph.indicesAreLocal_, this->indicesAreLocal_);
7694  std::swap(graph.indicesAreGlobal_, this->indicesAreGlobal_);
7695  std::swap(graph.fillComplete_, this->fillComplete_);
7696  std::swap(graph.indicesAreSorted_, this->indicesAreSorted_);
7697  std::swap(graph.noRedundancies_, this->noRedundancies_);
7698  std::swap(graph.haveLocalConstants_, this->haveLocalConstants_);
7699  std::swap(graph.haveGlobalConstants_, this->haveGlobalConstants_);
7700  std::swap(graph.haveLocalOffRankOffsets_, this->haveLocalOffRankOffsets_);
7701 
7702  std::swap(graph.sortGhostsAssociatedWithEachProcessor_, this->sortGhostsAssociatedWithEachProcessor_);
7703 
7704  std::swap(graph.k_numAllocPerRow_, this->k_numAllocPerRow_); // View
7705  std::swap(graph.k_numRowEntries_, this->k_numRowEntries_); // View
7706  std::swap(graph.nonlocals_, this->nonlocals_); // std::map
7707  }
7708 
7709 
7710  template<class LocalOrdinal, class GlobalOrdinal, class Node>
7711  bool
7714  {
7715  auto compare_nonlocals = [&] (const nonlocals_type & m1, const nonlocals_type & m2) {
7716  bool output = true;
7717  output = m1.size() == m2.size() ? output : false;
7718  for(auto & it_m: m1)
7719  {
7720  size_t key = it_m.first;
7721  output = m2.find(key) != m2.end() ? output : false;
7722  if(output)
7723  {
7724  auto v1 = m1.find(key)->second;
7725  auto v2 = m2.find(key)->second;
7726  std::sort(v1.begin(), v1.end());
7727  std::sort(v2.begin(), v2.end());
7728 
7729  output = v1.size() == v2.size() ? output : false;
7730  for(size_t i=0; output && i<v1.size(); i++)
7731  {
7732  output = v1[i]==v2[i] ? output : false;
7733  }
7734  }
7735  }
7736  return output;
7737  };
7738 
7739  bool output = true;
7740 
7741  output = this->rowMap_->isSameAs( *(graph.rowMap_) ) ? output : false;
7742  output = this->colMap_->isSameAs( *(graph.colMap_) ) ? output : false;
7743  output = this->rangeMap_->isSameAs( *(graph.rangeMap_) ) ? output : false;
7744  output = this->domainMap_->isSameAs( *(graph.domainMap_) ) ? output : false;
7745 
7746  output = this->nodeMaxNumRowEntries_ == graph.nodeMaxNumRowEntries_ ? output : false;
7747 
7748  output = this->globalNumEntries_ == graph.globalNumEntries_ ? output : false;
7749  output = this->globalMaxNumRowEntries_ == graph.globalMaxNumRowEntries_ ? output : false;
7750 
7751  output = this->numAllocForAllRows_ == graph.numAllocForAllRows_ ? output : false;
7752 
7753  output = this->storageStatus_ == graph.storageStatus_ ? output : false; // EStorageStatus is an enum
7754 
7755  output = this->indicesAreAllocated_ == graph.indicesAreAllocated_ ? output : false;
7756  output = this->indicesAreLocal_ == graph.indicesAreLocal_ ? output : false;
7757  output = this->indicesAreGlobal_ == graph.indicesAreGlobal_ ? output : false;
7758  output = this->fillComplete_ == graph.fillComplete_ ? output : false;
7759  output = this->indicesAreSorted_ == graph.indicesAreSorted_ ? output : false;
7760  output = this->noRedundancies_ == graph.noRedundancies_ ? output : false;
7761  output = this->haveLocalConstants_ == graph.haveLocalConstants_ ? output : false;
7762  output = this->haveGlobalConstants_ == graph.haveGlobalConstants_ ? output : false;
7763  output = this->haveLocalOffRankOffsets_ == graph.haveLocalOffRankOffsets_ ? output : false;
7764  output = this->sortGhostsAssociatedWithEachProcessor_ == this->sortGhostsAssociatedWithEachProcessor_ ? output : false;
7765 
7766  // Compare nonlocals_ -- std::map<GlobalOrdinal, std::vector<GlobalOrdinal> >
7767  // nonlocals_ isa std::map<GO, std::vector<GO> >
7768  output = compare_nonlocals(this->nonlocals_, graph.nonlocals_) ? output : false;
7769 
7770  // Compare k_numAllocPerRow_ isa Kokkos::View::HostMirror
7771  // - since this is a HostMirror type, it should be in host memory already
7772  output = this->k_numAllocPerRow_.extent(0) == graph.k_numAllocPerRow_.extent(0) ? output : false;
7773  if(output && this->k_numAllocPerRow_.extent(0) > 0)
7774  {
7775  for(size_t i=0; output && i<this->k_numAllocPerRow_.extent(0); i++)
7776  output = this->k_numAllocPerRow_(i) == graph.k_numAllocPerRow_(i) ? output : false;
7777  }
7778 
7779  // Compare k_numRowEntries_ isa Kokkos::View::HostMirror
7780  // - since this is a HostMirror type, it should be in host memory already
7781  output = this->k_numRowEntries_.extent(0) == graph.k_numRowEntries_.extent(0) ? output : false;
7782  if(output && this->k_numRowEntries_.extent(0) > 0)
7783  {
7784  for(size_t i = 0; output && i < this->k_numRowEntries_.extent(0); i++)
7785  output = this->k_numRowEntries_(i) == graph.k_numRowEntries_(i) ? output : false;
7786  }
7787 
7788  // Compare this->k_rowPtrs_ isa Kokkos::View<LocalOrdinal*, ...>
7789  output = this->rowPtrsUnpacked_host_.extent(0) == graph.rowPtrsUnpacked_host_.extent(0) ? output : false;
7790  if(output && this->rowPtrsUnpacked_host_.extent(0) > 0)
7791  {
7792  auto rowPtrsThis = this->rowPtrsUnpacked_host_;
7793  auto rowPtrsGraph = graph.rowPtrsUnpacked_host_;
7794  for(size_t i=0; output && i<rowPtrsThis.extent(0); i++)
7795  output = rowPtrsThis(i) == rowPtrsGraph(i) ? output : false;
7796  }
7797 
7798  // Compare lclIndsUnpacked_wdv isa Kokkos::View<LocalOrdinal*, ...>
7799  output = this->lclIndsUnpacked_wdv.extent(0) == graph.lclIndsUnpacked_wdv.extent(0) ? output : false;
7800  if(output && this->lclIndsUnpacked_wdv.extent(0) > 0)
7801  {
7802  auto indThis = this->lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
7803  auto indGraph = graph.lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
7804  for(size_t i=0; output && i < indThis.extent(0); i++)
7805  output = indThis(i) == indGraph(i) ? output : false;
7806  }
7807 
7808  // Compare gblInds_wdv isa Kokkos::View<GlobalOrdinal*, ...>
7809  output = this->gblInds_wdv.extent(0) == graph.gblInds_wdv.extent(0) ? output : false;
7810  if(output && this->gblInds_wdv.extent(0) > 0)
7811  {
7812  auto indtThis = this->gblInds_wdv.getHostView(Access::ReadOnly);
7813  auto indtGraph = graph.gblInds_wdv.getHostView(Access::ReadOnly);
7814  for(size_t i=0; output && i<indtThis.extent(0); i++)
7815  output = indtThis(i) == indtGraph(i) ? output : false;
7816  }
7817 
7818  // Check lclGraph_ isa
7819  // Kokkos::StaticCrsGraph<LocalOrdinal, Kokkos::LayoutLeft, execution_space>
7820  // Kokkos::StaticCrsGraph has 3 data members in it:
7821  // Kokkos::View<size_type*, ...> row_map
7822  // (local_graph_device_type::row_map_type)
7823  // Kokkos::View<data_type*, ...> entries
7824  // (local_graph_device_type::entries_type)
7825  // Kokkos::View<size_type*, ...> row_block_offsets
7826  // (local_graph_device_type::row_block_type)
7827  // There is currently no Kokkos::StaticCrsGraph comparison function
7828  // that's built-in, so we will just compare
7829  // the three data items here. This can be replaced if Kokkos ever
7830  // puts in its own comparison routine.
7831  local_graph_host_type thisLclGraph = this->getLocalGraphHost();
7832  local_graph_host_type graphLclGraph = graph.getLocalGraphHost();
7833 
7834  output = thisLclGraph.row_map.extent(0) == graphLclGraph.row_map.extent(0)
7835  ? output : false;
7836  if(output && thisLclGraph.row_map.extent(0) > 0)
7837  {
7838  auto lclGraph_rowmap_host_this = thisLclGraph.row_map;
7839  auto lclGraph_rowmap_host_graph = graphLclGraph.row_map;
7840  for (size_t i=0; output && i < lclGraph_rowmap_host_this.extent(0); i++)
7841  output = lclGraph_rowmap_host_this(i) == lclGraph_rowmap_host_graph(i)
7842  ? output : false;
7843  }
7844 
7845  output = thisLclGraph.entries.extent(0) == graphLclGraph.entries.extent(0)
7846  ? output : false;
7847  if(output && thisLclGraph.entries.extent(0) > 0)
7848  {
7849  auto lclGraph_entries_host_this = thisLclGraph.entries;
7850  auto lclGraph_entries_host_graph = graphLclGraph.entries;
7851  for (size_t i=0; output && i < lclGraph_entries_host_this.extent(0); i++)
7852  output = lclGraph_entries_host_this(i) == lclGraph_entries_host_graph(i)
7853  ? output : false;
7854  }
7855 
7856  output =
7857  thisLclGraph.row_block_offsets.extent(0) ==
7858  graphLclGraph.row_block_offsets.extent(0) ? output : false;
7859  if(output && thisLclGraph.row_block_offsets.extent(0) > 0)
7860  {
7861  auto lclGraph_rbo_host_this = thisLclGraph.row_block_offsets;
7862  auto lclGraph_rbo_host_graph = graphLclGraph.row_block_offsets;
7863  for (size_t i=0; output && i < lclGraph_rbo_host_this.extent(0); i++)
7864  output = lclGraph_rbo_host_this(i) == lclGraph_rbo_host_graph(i)
7865  ? output : false;
7866  }
7867 
7868  // For Importer and Exporter, we don't need to explicitly check them since
7869  // they will be consistent with the maps.
7870  // Note: importer_ isa Teuchos::RCP<const import_type>
7871  // exporter_ isa Teuchos::RCP<const export_type>
7872 
7873  return output;
7874  }
7875 
7876 
7877 
7878 } // namespace Tpetra
7879 
7880 //
7881 // Explicit instantiation macros
7882 //
7883 // Must be expanded from within the Tpetra namespace!
7884 //
7885 
7886 #define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7887  template<> \
7888  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7889  importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7890  const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7891  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7892  CrsGraph<LO,GO,NODE>::node_type>& importer, \
7893  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7894  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7895  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7896  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7897  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7898  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7899  const Teuchos::RCP<Teuchos::ParameterList>& params);
7900 
7901 #define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7902  template<> \
7903  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7904  importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7905  const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7906  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7907  CrsGraph<LO,GO,NODE>::node_type>& rowImporter, \
7908  const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7909  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7910  CrsGraph<LO,GO,NODE>::node_type>& domainImporter, \
7911  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7912  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7913  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7914  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7915  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7916  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7917  const Teuchos::RCP<Teuchos::ParameterList>& params);
7918 
7919 
7920 #define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7921  template<> \
7922  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7923  exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7924  const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7925  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7926  CrsGraph<LO,GO,NODE>::node_type>& exporter, \
7927  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7928  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7929  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7930  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7931  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7932  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7933  const Teuchos::RCP<Teuchos::ParameterList>& params);
7934 
7935 #define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7936  template<> \
7937  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7938  exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7939  const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7940  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7941  CrsGraph<LO,GO,NODE>::node_type>& rowExporter, \
7942  const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7943  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7944  CrsGraph<LO,GO,NODE>::node_type>& domainExporter, \
7945  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7946  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7947  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7948  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7949  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7950  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7951  const Teuchos::RCP<Teuchos::ParameterList>& params);
7952 
7953 
7954 #define TPETRA_CRSGRAPH_INSTANT( LO, GO, NODE ) \
7955  template class CrsGraph<LO, GO, NODE>; \
7956  TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7957  TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7958  TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7959  TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE)
7960 
7961 
7962 #endif // TPETRA_CRSGRAPH_DEF_HPP
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular,...
Functions for manipulating CRS arrays.
Declaration of a function that prints strings from each process.
Declaration and definition of Tpetra::Details::getEntryOnHost.
Utility functions for packing and unpacking sparse matrix entries.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects.
Stand-alone utility functions and macros.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
bool isMerged() const
Whether duplicate column indices in each row have been merged.
local_inds_dualv_type::t_dev::const_type getLocalIndsViewDevice(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
global_size_t globalMaxNumRowEntries_
Global maximum of the number of entries in each row.
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
global_inds_dualv_type::t_host::const_type getGlobalIndsViewHost(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const override
Default parameter list suitable for validation.
Details::EStorageStatus storageStatus_
Status of the graph's storage, when not in a fill-complete state.
GO global_ordinal_type
The type of the graph's global indices.
void insertGlobalIndicesIntoNonownedRows(const global_ordinal_type gblRow, const global_ordinal_type gblColInds[], const local_ordinal_type numGblColInds)
Implementation of insertGlobalIndices for nonowned rows.
Teuchos::RCP< const map_type > rangeMap_
The Map describing the range of the (matrix corresponding to the) graph.
std::pair< size_t, std::string > makeIndicesLocal(const bool verbose=false)
Convert column indices from global to local.
global_size_t getGlobalNumEntries() const override
Returns the global number of entries in the graph.
bool isIdenticalTo(const CrsGraph< LocalOrdinal, GlobalOrdinal, Node > &graph) const
Create a cloned CrsGraph for a different Node type.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
Returns the communicator.
local_inds_wdv_type lclIndsUnpacked_wdv
Local ordinals of colum indices for all rows KDDKDD UVM Removal: Device view takes place of k_lclInds...
void globalAssemble()
Communicate nonlocal contributions to other processes.
RowInfo getRowInfoFromGlobalRowIndex(const global_ordinal_type gblRow) const
Get information about the locally owned row with global index gblRow.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the graph.
size_t findGlobalIndices(const RowInfo &rowInfo, const Teuchos::ArrayView< const global_ordinal_type > &indices, std::function< void(const size_t, const size_t, const size_t)> fun) const
Finds indices in the given row.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Tell the graph that you are done changing its structure.
global_inds_wdv_type gblInds_wdv
Global ordinals of column indices for all rows KDDKDD UVM Removal: Device view takes place of k_gblIn...
size_t nodeMaxNumRowEntries_
Local maximum of the number of entries in each row.
size_t sortAndMergeRowIndices(const RowInfo &rowInfo, const bool sorted, const bool merged)
Sort and merge duplicate column indices in the given row.
Teuchos::RCP< const import_type > importer_
The Import from the domain Map to the column Map.
num_row_entries_type k_numRowEntries_
The number of local entries in each locally owned row.
size_t numAllocForAllRows_
The maximum number of entries to allow in each locally owned row.
bool hasColMap() const override
Whether the graph has a column Map.
LocalOrdinal local_ordinal_type
The type of the graph's local indices.
std::string description() const override
Return a one-line human-readable description of this object.
bool isStorageOptimized() const
Returns true if storage has been optimized.
void getGlobalRowCopy(global_ordinal_type gblRow, nonconst_global_inds_host_view_type &gblColInds, size_t &numColInds) const override
Get a copy of the given row, using global indices.
void removeLocalIndices(local_ordinal_type localRow)
Remove all graph indices from the specified local row.
void importAndFillComplete(Teuchos::RCP< CrsGraph< local_ordinal_type, global_ordinal_type, Node > > &destGraph, const import_type &importer, const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Import from this to the given destination graph, and make the result fill complete.
global_size_t getGlobalNumRows() const override
Returns the number of global rows in the graph.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
void replaceRangeMapAndExporter(const Teuchos::RCP< const map_type > &newRangeMap, const Teuchos::RCP< const export_type > &newExporter)
Replace the current Range Map and Export with the given parameters.
Teuchos::ArrayRCP< const local_ordinal_type > getNodePackedIndices() const
Get an Teuchos::ArrayRCP of the packed column-indices.
void computeLocalConstants()
Compute local constants, if they have not yet been computed.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print this object to the given output stream with the given verbosity level.
void setParameterList(const Teuchos::RCP< Teuchos::ParameterList > &params) override
Set the given list of parameters (must be nonnull).
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Resume fill operations.
size_t insertIndices(RowInfo &rowInfo, const SLocalGlobalViews &newInds, const ELocalGlobal lg, const ELocalGlobal I)
Insert indices into the given row.
typename Node::device_type device_type
This class' Kokkos device type.
void insertGlobalIndicesFiltered(const local_ordinal_type lclRow, const global_ordinal_type gblColInds[], const local_ordinal_type numGblColInds)
Like insertGlobalIndices(), but with column Map filtering.
RowInfo getRowInfo(const local_ordinal_type myRow) const
Get information about the locally owned row with local index myRow.
global_inds_dualv_type::t_dev::const_type getGlobalIndsViewDevice(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
typename local_graph_device_type::HostMirror local_graph_host_type
The type of the part of the sparse graph on each MPI process.
Teuchos::RCP< const map_type > colMap_
The Map describing the distribution of columns of the graph.
bool noRedundancies_
Whether the graph's indices are non-redundant (merged) in each row, on this process.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
void setAllIndices(const typename local_graph_device_type::row_map_type &rowPointers, const typename local_graph_device_type::entries_type::non_const_type &columnIndices)
Set the graph's data directly, using 1-D storage.
void insertLocalIndices(const local_ordinal_type localRow, const Teuchos::ArrayView< const local_ordinal_type > &indices)
Insert local indices into the graph.
bool supportsRowViews() const override
Whether this class implements getLocalRowView() and getGlobalRowView() (it does).
size_t getNumEntriesInGlobalRow(global_ordinal_type globalRow) const override
Returns the current number of entries on this node in the specified global row.
bool isFillComplete() const override
Whether fillComplete() has been called and the graph is in compute mode.
void setDomainRangeMaps(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap)
void swap(CrsGraph< local_ordinal_type, global_ordinal_type, Node > &graph)
Swaps the data from *this with the data and maps from graph.
void getGlobalRowView(const global_ordinal_type gblRow, global_inds_host_view_type &gblColInds) const override
Get a const view of the given global row's global column indices.
void exportAndFillComplete(Teuchos::RCP< CrsGraph< local_ordinal_type, global_ordinal_type, Node > > &destGraph, const export_type &exporter, const Teuchos::RCP< const map_type > &domainMap=Teuchos::null, const Teuchos::RCP< const map_type > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Export from this to the given destination graph, and make the result fill complete.
void makeColMap(Teuchos::Array< int > &remotePIDs)
Make and set the graph's column Map.
bool haveGlobalConstants_
Whether all processes have computed global constants.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, over all processes in the graph's communicator.
void getNumEntriesPerLocalRowUpperBound(Teuchos::ArrayRCP< const size_t > &boundPerLocalRow, size_t &boundForAllLocalRows, bool &boundSameForAllLocalRows) const
Get an upper bound on the number of entries that can be stored in each row.
void checkInternalState() const
Throw an exception if the internal state is not consistent.
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Perform a fillComplete on a graph that already has data, via setAllIndices().
bool sortGhostsAssociatedWithEachProcessor_
Whether to require makeColMap() (and therefore fillComplete()) to order column Map GIDs associated wi...
size_t getNumAllocatedEntriesInGlobalRow(global_ordinal_type globalRow) const
Current number of allocated entries in the given row on the calling (MPI) process,...
Teuchos::RCP< const export_type > getExporter() const override
Returns the exporter associated with this graph.
void makeImportExport(Teuchos::Array< int > &remotePIDs, const bool useRemotePIDs)
Make the Import and Export objects, if needed.
global_ordinal_type getIndexBase() const override
Returns the index base for global indices for this graph.
void getLocalRowCopy(local_ordinal_type gblRow, nonconst_local_inds_host_view_type &gblColInds, size_t &numColInds) const override
Get a copy of the given row, using local indices.
local_inds_dualv_type::t_host::const_type getLocalIndsViewHost(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
bool isFillActive() const
Whether resumeFill() has been called and the graph is in edit mode.
Teuchos::RCP< const map_type > getRowMap() const override
Returns the Map that describes the row distribution in this graph.
global_size_t globalNumEntries_
Global number of entries in the graph.
size_t getNodeAllocationSize() const
The local number of indices allocated for the graph, over all rows on the calling (MPI) process.
size_t insertGlobalIndicesImpl(const local_ordinal_type lclRow, const global_ordinal_type inputGblColInds[], const size_t numInputInds)
Insert global indices, using an input local row index.
Teuchos::RCP< const import_type > getImporter() const override
Returns the importer associated with this graph.
local_inds_wdv_type lclIndsPacked_wdv
Local ordinals of colum indices for all rows KDDKDD UVM Removal: Device view takes place of lclGraph_...
Kokkos::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type, void, size_t > local_graph_device_type
The type of the part of the sparse graph on each MPI process.
Teuchos::RCP< const map_type > domainMap_
The Map describing the domain of the (matrix corresponding to the) graph.
nonlocals_type nonlocals_
Nonlocal data given to insertGlobalIndices.
virtual void pack(const Teuchos::ArrayView< const local_ordinal_type > &exportLIDs, Teuchos::Array< global_ordinal_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t &constantNumPackets) const override
Pack this object's data for Import or Export.
Teuchos::ArrayRCP< const size_t > getNodeRowPtrs() const
Get a host view of the row offsets.
size_t getNodeMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, on this process.
void getLocalOffRankOffsets(offset_device_view_type &offsets) const
Get offsets of the off-rank entries in the graph.
global_size_t getGlobalNumCols() const override
Returns the number of global columns in the graph.
bool indicesAreSorted_
Whether the graph's indices are sorted in each row, on this process.
Node node_type
This class' Kokkos Node type.
Teuchos::RCP< const export_type > exporter_
The Export from the row Map to the range Map.
size_t getNodeNumRows() const override
Returns the number of graph rows owned on the calling node.
void insertGlobalIndices(const global_ordinal_type globalRow, const Teuchos::ArrayView< const global_ordinal_type > &indices)
Insert global indices into the graph.
local_inds_dualv_type::t_host getLocalIndsViewHostNonConst(const RowInfo &rowinfo)
Get a ReadWrite locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(m...
void replaceDomainMap(const Teuchos::RCP< const map_type > &newDomainMap)
Replace the current domain Map with the given objects.
size_t getNodeNumEntries() const override
The local number of entries in the graph.
void computeGlobalConstants()
Compute global constants, if they have not yet been computed.
size_t getNumAllocatedEntriesInLocalRow(local_ordinal_type localRow) const
Current number of allocated entries in the given row on the calling (MPI) process,...
ProfileType getProfileType() const
Returns true if the graph was allocated with static data structures.
offset_device_view_type k_offRankOffsets_
The offsets for off-rank entries.
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, const Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given parameters.
void setLocallyModified()
Report that we made a local modification to its structure.
void replaceRangeMap(const Teuchos::RCP< const map_type > &newRangeMap)
Replace the current Range Map with the given objects.
size_t getNodeNumCols() const override
Returns the number of columns connected to the locally owned rows of this graph.
Teuchos::RCP< const map_type > rowMap_
The Map describing the distribution of rows of the graph.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap) override
Remove processes owning zero rows from the Maps and their communicator.
void getLocalRowView(const LocalOrdinal lclRow, local_inds_host_view_type &lclColInds) const override
Get a const view of the given local row's local column indices.
bool isGloballyIndexed() const override
Whether the graph's column indices are stored as global indices.
bool isLocallyIndexed() const override
Whether the graph's column indices are stored as local indices.
virtual bool checkSizes(const SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
local_graph_device_type getLocalGraphDevice() const
Get the local graph.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the graph's current column Map with the given Map.
bool haveLocalConstants_
Whether this process has computed local constants.
Kokkos::View< const size_t *, device_type >::HostMirror k_numAllocPerRow_
The maximum number of entries to allow in each locally owned row, per row.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
Sets up and executes a communication plan for a Tpetra DistObject.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
A parallel distribution of indices over processes.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
Abstract base class for objects that can be the source of an Import or Export operation.
A distributed dense vector.
Implementation details of Tpetra.
int local_ordinal_type
Default value of Scalar template parameter.
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices_wdv, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries....
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types.
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
void packCrsGraph(const CrsGraph< LO, GO, NT > &sourceGraph, Teuchos::Array< typename CrsGraph< LO, GO, NT >::packet_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks.
size_t insertCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, InOutIndices &curIndices, size_t &numAssigned, InIndices const &newIndices, std::function< void(const size_t, const size_t, const size_t)> cb=std::function< void(const size_t, const size_t, const size_t)>())
Insert new indices in to current list of indices.
void packCrsGraphNew(const CrsGraph< LO, GO, NT > &sourceGraph, const Kokkos::DualView< const LO *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportLIDs, const Kokkos::DualView< const int *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportPIDs, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports, Kokkos::DualView< size_t *, typename CrsGraph< LO, GO, NT >::buffer_device_type > numPacketsPerLID, size_t &constantNumPackets, const bool pack_pids)
Pack specified entries of the given local sparse graph for communication, for "new" DistObject interf...
OffsetType convertColumnIndicesFromGlobalToLocal(const Kokkos::View< LO *, DT > &lclColInds, const Kokkos::View< const GO *, DT > &gblColInds, const Kokkos::View< const OffsetType *, DT > &ptr, const LocalMap< LO, GO, DT > &lclColMap, const Kokkos::View< const NumEntType *, DT > &numRowEnt)
Convert a CrsGraph's global column indices into local column indices.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
OffsetsViewType::non_const_value_type computeOffsetsFromConstantCount(const OffsetsViewType &ptr, const CountType count)
Compute offsets from a constant count.
size_t findCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, const size_t curNumEntries, Indices1 const &curIndices, Indices2 const &newIndices, Callback &&cb)
Finds offsets in to current list of indices.
int makeColMap(Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &colMap, Teuchos::Array< int > &remotePIDs, const Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &domMap, const RowGraph< LO, GO, NT > &graph, const bool sortEachProcsGids=true, std::ostream *errStrm=NULL)
Make the graph's column Map.
void packCrsGraphWithOwningPIDs(const CrsGraph< LO, GO, NT > &sourceGraph, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void sort(View &view, const size_t &size)
Convenience wrapper for std::sort for host-accessible views.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
size_t global_size_t
Global size_t object.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified,...
CombineMode
Rule for combining data in an Import or Export.
@ INSERT
Insert new values that don't currently exist.
static KOKKOS_INLINE_FUNCTION size_t unpackValue(LO &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const LO &)
Number of bytes required to pack or unpack the given value of type value_type.
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.