Tpetra parallel linear algebra  Version of the Day
Tpetra_CrsGraph_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_CRSGRAPH_DEF_HPP
43 #define TPETRA_CRSGRAPH_DEF_HPP
44 
52 
55 #include "Tpetra_Details_gathervPrint.hpp"
56 #include "Tpetra_Details_getGraphDiagOffsets.hpp"
57 #include "Tpetra_Details_makeColMap.hpp"
60 #include "Tpetra_Distributor.hpp"
61 #include "Teuchos_SerialDenseMatrix.hpp"
62 #include <algorithm>
63 #include <limits>
64 #include <string>
65 #include <utility>
66 
67 namespace Tpetra {
68  namespace Details {
69  namespace Impl {
70 
71  template<class LO, class GO, class DT, class OffsetType, class NumEntType>
72  class ConvertColumnIndicesFromGlobalToLocal {
73  public:
74  ConvertColumnIndicesFromGlobalToLocal (const ::Kokkos::View<LO*, DT>& lclColInds,
75  const ::Kokkos::View<const GO*, DT>& gblColInds,
76  const ::Kokkos::View<const OffsetType*, DT>& ptr,
77  const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
78  const ::Kokkos::View<const NumEntType*, DT>& numRowEnt) :
79  lclColInds_ (lclColInds),
80  gblColInds_ (gblColInds),
81  ptr_ (ptr),
82  lclColMap_ (lclColMap),
83  numRowEnt_ (numRowEnt)
84  {}
85 
86  KOKKOS_FUNCTION void
87  operator () (const LO& lclRow, OffsetType& curNumBad) const
88  {
89  const OffsetType offset = ptr_(lclRow);
90  // NOTE (mfh 26 Jun 2016) It's always legal to cast the number
91  // of entries in a row to LO, as long as the row doesn't have
92  // too many duplicate entries.
93  const LO numEnt = static_cast<LO> (numRowEnt_(lclRow));
94  for (LO j = 0; j < numEnt; ++j) {
95  const GO gid = gblColInds_(offset + j);
96  const LO lid = lclColMap_.getLocalElement (gid);
97  lclColInds_(offset + j) = lid;
98  if (lid == ::Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
99  ++curNumBad;
100  }
101  }
102  }
103 
104  static OffsetType
105  run (const ::Kokkos::View<LO*, DT>& lclColInds,
106  const ::Kokkos::View<const GO*, DT>& gblColInds,
107  const ::Kokkos::View<const OffsetType*, DT>& ptr,
108  const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
109  const ::Kokkos::View<const NumEntType*, DT>& numRowEnt)
110  {
111  typedef ::Kokkos::RangePolicy<typename DT::execution_space, LO> range_type;
112  typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> functor_type;
113 
114  const LO lclNumRows = ptr.dimension_0 () == 0 ?
115  static_cast<LO> (0) : static_cast<LO> (ptr.dimension_0 () - 1);
116  OffsetType numBad = 0;
117  // Count of "bad" column indices is a reduction over rows.
118  ::Kokkos::parallel_reduce (range_type (0, lclNumRows),
119  functor_type (lclColInds, gblColInds, ptr,
120  lclColMap, numRowEnt),
121  numBad);
122  return numBad;
123  }
124 
125  private:
126  ::Kokkos::View<LO*, DT> lclColInds_;
127  ::Kokkos::View<const GO*, DT> gblColInds_;
128  ::Kokkos::View<const OffsetType*, DT> ptr_;
130  ::Kokkos::View<const NumEntType*, DT> numRowEnt_;
131  };
132 
133  } // namespace Impl
134 
149  template<class LO, class GO, class DT, class OffsetType, class NumEntType>
150  OffsetType
151  convertColumnIndicesFromGlobalToLocal (const Kokkos::View<LO*, DT>& lclColInds,
152  const Kokkos::View<const GO*, DT>& gblColInds,
153  const Kokkos::View<const OffsetType*, DT>& ptr,
154  const LocalMap<LO, GO, DT>& lclColMap,
155  const Kokkos::View<const NumEntType*, DT>& numRowEnt)
156  {
157  using Impl::ConvertColumnIndicesFromGlobalToLocal;
158  typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> impl_type;
159  return impl_type::run (lclColInds, gblColInds, ptr, lclColMap, numRowEnt);
160  }
161 
162  } // namespace Details
163 
164  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
166  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
167  size_t maxNumEntriesPerRow,
168  ProfileType pftype,
169  const Teuchos::RCP<Teuchos::ParameterList>& params) :
170  dist_object_type (rowMap)
171  , rowMap_ (rowMap)
172  , nodeNumEntries_ (0)
173  , pftype_ (pftype)
174  , numAllocForAllRows_ (maxNumEntriesPerRow)
175  , storageStatus_ (pftype == StaticProfile ?
176  Details::STORAGE_1D_UNPACKED :
177  Details::STORAGE_2D)
178  , indicesAreAllocated_ (false)
179  , indicesAreLocal_ (false)
180  , indicesAreGlobal_ (false)
181  , fillComplete_ (false)
182  , indicesAreSorted_ (true)
183  , noRedundancies_ (true)
184  , haveLocalConstants_ (false)
185  , haveGlobalConstants_ (false)
186  , sortGhostsAssociatedWithEachProcessor_ (true)
187  {
188  const char tfecfFuncName[] = "CrsGraph(rowMap,maxNumEntriesPerRow,"
189  "pftype,params): ";
190  staticAssertions ();
191  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
192  (maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
193  std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
194  "a valid size_t value, which in this case means it must not be "
195  "Teuchos::OrdinalTraits<size_t>::invalid().");
196  resumeFill (params);
197  checkInternalState ();
198  }
199 
200  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
202  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
203  const Teuchos::RCP<const map_type>& colMap,
204  const size_t maxNumEntriesPerRow,
205  const ProfileType pftype,
206  const Teuchos::RCP<Teuchos::ParameterList>& params) :
207  dist_object_type (rowMap)
208  , rowMap_ (rowMap)
209  , colMap_ (colMap)
210  , nodeNumEntries_ (0)
211  , pftype_ (pftype)
212  , numAllocForAllRows_ (maxNumEntriesPerRow)
213  , storageStatus_ (pftype == StaticProfile ?
214  Details::STORAGE_1D_UNPACKED :
215  Details::STORAGE_2D)
216  , indicesAreAllocated_ (false)
217  , indicesAreLocal_ (false)
218  , indicesAreGlobal_ (false)
219  , fillComplete_ (false)
220  , indicesAreSorted_ (true)
221  , noRedundancies_ (true)
222  , haveLocalConstants_ (false)
223  , haveGlobalConstants_ (false)
224  , sortGhostsAssociatedWithEachProcessor_ (true)
225  {
226  const char tfecfFuncName[] = "CrsGraph(rowMap,colMap,maxNumEntriesPerRow,"
227  "pftype,params): ";
228  staticAssertions ();
229  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
230  maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
231  std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
232  "a valid size_t value, which in this case means it must not be "
233  "Teuchos::OrdinalTraits<size_t>::invalid().");
234  resumeFill (params);
235  checkInternalState ();
236  }
237 
238  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
240  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
241  const Teuchos::ArrayRCP<const size_t>& numEntPerRow,
242  const ProfileType pftype,
243  const Teuchos::RCP<Teuchos::ParameterList>& params) :
244  dist_object_type (rowMap)
245  , rowMap_ (rowMap)
246  , nodeNumEntries_ (0)
247  , pftype_ (pftype)
248  , numAllocForAllRows_ (0)
249  , storageStatus_ (pftype == StaticProfile ?
250  Details::STORAGE_1D_UNPACKED :
251  Details::STORAGE_2D)
252  , indicesAreAllocated_ (false)
253  , indicesAreLocal_ (false)
254  , indicesAreGlobal_ (false)
255  , fillComplete_ (false)
256  , indicesAreSorted_ (true)
257  , noRedundancies_ (true)
258  , haveLocalConstants_ (false)
259  , haveGlobalConstants_ (false)
260  , sortGhostsAssociatedWithEachProcessor_ (true)
261  {
262  const char tfecfFuncName[] = "CrsGraph(rowMap,numEntPerRow,pftype,params): ";
263  staticAssertions ();
264 
265  const size_t lclNumRows = rowMap.is_null () ?
266  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
267  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
268  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
269  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
270  << " != the local number of rows " << lclNumRows << " as specified by "
271  "the input row Map.");
272 
273 #ifdef HAVE_TPETRA_DEBUG
274  for (size_t r = 0; r < lclNumRows; ++r) {
275  const size_t curRowCount = numEntPerRow[r];
276  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
277  curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
278  std::invalid_argument, "numEntPerRow(" << r << ") specifies an invalid "
279  "number of entries (Teuchos::OrdinalTraits<size_t>::invalid()).");
280  }
281 #endif // HAVE_TPETRA_DEBUG
282 
283  // Deep-copy the input (ArrayRCP, therefore host accessible) into
284  // k_numAllocPerRow_. The latter is a const View, so we have to
285  // copy into a nonconst View first, then assign.
286  typedef decltype (k_numAllocPerRow_) out_view_type;
287  typedef typename out_view_type::non_const_type nc_view_type;
288  typedef Kokkos::View<const size_t*,
289  typename nc_view_type::array_layout,
290  Kokkos::HostSpace,
291  Kokkos::MemoryUnmanaged> in_view_type;
292  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
293  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
294  lclNumRows);
295  Kokkos::deep_copy (numAllocPerRowOut, numAllocPerRowIn);
296  k_numAllocPerRow_ = numAllocPerRowOut;
297 
298  resumeFill (params);
299  checkInternalState ();
300  }
301 
302  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
304  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
305  const Kokkos::DualView<const size_t*, execution_space>& numEntPerRow,
306  const ProfileType pftype,
307  const Teuchos::RCP<Teuchos::ParameterList>& params) :
308  dist_object_type (rowMap)
309  , rowMap_ (rowMap)
310  , nodeNumEntries_ (0)
311  , pftype_ (pftype)
312  , k_numAllocPerRow_ (numEntPerRow.h_view)
313  , numAllocForAllRows_ (0)
314  , storageStatus_ (pftype == StaticProfile ?
315  Details::STORAGE_1D_UNPACKED :
316  Details::STORAGE_2D)
317  , indicesAreAllocated_ (false)
318  , indicesAreLocal_ (false)
319  , indicesAreGlobal_ (false)
320  , fillComplete_ (false)
321  , indicesAreSorted_ (true)
322  , noRedundancies_ (true)
323  , haveLocalConstants_ (false)
324  , haveGlobalConstants_ (false)
325  , sortGhostsAssociatedWithEachProcessor_ (true)
326  {
327  const char tfecfFuncName[] = "CrsGraph(rowMap,numEntPerRow,pftype,params): ";
328  staticAssertions ();
329 
330  const size_t lclNumRows = rowMap.is_null () ?
331  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
332  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
333  static_cast<size_t> (numEntPerRow.dimension_0 ()) != lclNumRows,
334  std::invalid_argument, "numEntPerRow has length " <<
335  numEntPerRow.dimension_0 () << " != the local number of rows " <<
336  lclNumRows << " as specified by " "the input row Map.");
337 
338 #ifdef HAVE_TPETRA_DEBUG
339  for (size_t r = 0; r < lclNumRows; ++r) {
340  const size_t curRowCount = numEntPerRow.h_view(r);
341  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
342  curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
343  std::invalid_argument, "numEntPerRow(" << r << ") specifies an invalid "
344  "number of entries (Teuchos::OrdinalTraits<size_t>::invalid()).");
345  }
346 #endif // HAVE_TPETRA_DEBUG
347 
348  resumeFill (params);
349  checkInternalState ();
350  }
351 
352 
353  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
355  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
356  const Teuchos::RCP<const map_type>& colMap,
357  const Kokkos::DualView<const size_t*, execution_space>& numEntPerRow,
358  const ProfileType pftype,
359  const Teuchos::RCP<Teuchos::ParameterList>& params) :
360  dist_object_type (rowMap)
361  , rowMap_ (rowMap)
362  , colMap_ (colMap)
363  , nodeNumEntries_ (0)
364  , pftype_ (pftype)
365  , k_numAllocPerRow_ (numEntPerRow.h_view)
366  , numAllocForAllRows_ (0)
367  , storageStatus_ (pftype == StaticProfile ?
368  Details::STORAGE_1D_UNPACKED :
369  Details::STORAGE_2D)
370  , indicesAreAllocated_ (false)
371  , indicesAreLocal_ (false)
372  , indicesAreGlobal_ (false)
373  , fillComplete_ (false)
374  , indicesAreSorted_ (true)
375  , noRedundancies_ (true)
376  , haveLocalConstants_ (false)
377  , haveGlobalConstants_ (false)
378  , sortGhostsAssociatedWithEachProcessor_ (true)
379  {
380  const char tfecfFuncName[] = "CrsGraph(rowMap,colMap,numEntPerRow,pftype,params): ";
381  staticAssertions ();
382 
383  const size_t lclNumRows = rowMap.is_null () ?
384  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
385  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
386  static_cast<size_t> (numEntPerRow.dimension_0 ()) != lclNumRows,
387  std::invalid_argument, "numEntPerRow has length " <<
388  numEntPerRow.dimension_0 () << " != the local number of rows " <<
389  lclNumRows << " as specified by " "the input row Map.");
390 
391 #ifdef HAVE_TPETRA_DEBUG
392  for (size_t r = 0; r < lclNumRows; ++r) {
393  const size_t curRowCount = numEntPerRow.h_view(r);
394  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
395  curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
396  std::invalid_argument, "numEntPerRow(" << r << ") specifies an invalid "
397  "number of entries (Teuchos::OrdinalTraits<size_t>::invalid()).");
398  }
399 #endif // HAVE_TPETRA_DEBUG
400 
401  resumeFill (params);
402  checkInternalState ();
403  }
404 
405 
406  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
408  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
409  const Teuchos::RCP<const map_type>& colMap,
410  const Teuchos::ArrayRCP<const size_t>& numEntPerRow,
411  ProfileType pftype,
412  const Teuchos::RCP<Teuchos::ParameterList>& params) :
413  dist_object_type (rowMap)
414  , rowMap_ (rowMap)
415  , colMap_ (colMap)
416  , nodeNumEntries_ (0)
417  , pftype_ (pftype)
418  , numAllocForAllRows_ (0)
419  , storageStatus_ (pftype == StaticProfile ?
420  Details::STORAGE_1D_UNPACKED :
421  Details::STORAGE_2D)
422  , indicesAreAllocated_ (false)
423  , indicesAreLocal_ (false)
424  , indicesAreGlobal_ (false)
425  , fillComplete_ (false)
426  , indicesAreSorted_ (true)
427  , noRedundancies_ (true)
428  , haveLocalConstants_ (false)
429  , haveGlobalConstants_ (false)
430  , sortGhostsAssociatedWithEachProcessor_ (true)
431  {
432  const char tfecfFuncName[] = "CrsGraph(rowMap,colMap,numEntPerRow,pftype,"
433  "params): ";
434  staticAssertions ();
435 
436  const size_t lclNumRows = rowMap.is_null () ?
437  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
438  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
439  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
440  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
441  << " != the local number of rows " << lclNumRows << " as specified by "
442  "the input row Map.");
443 
444 #ifdef HAVE_TPETRA_DEBUG
445  for (size_t r = 0; r < lclNumRows; ++r) {
446  const size_t curRowCount = numEntPerRow[r];
447  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
448  curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
449  std::invalid_argument, "numEntPerRow(" << r << ") specifies an invalid "
450  "number of entries (Teuchos::OrdinalTraits<size_t>::invalid()).");
451  }
452 #endif // HAVE_TPETRA_DEBUG
453 
454  // Deep-copy the input (ArrayRCP, therefore host accessible) into
455  // k_numAllocPerRow_. The latter is a const View, so we have to
456  // copy into a nonconst View first, then assign.
457  typedef decltype (k_numAllocPerRow_) out_view_type;
458  typedef typename out_view_type::non_const_type nc_view_type;
459  typedef Kokkos::View<const size_t*,
460  typename nc_view_type::array_layout,
461  Kokkos::HostSpace,
462  Kokkos::MemoryUnmanaged> in_view_type;
463  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
464  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
465  lclNumRows);
466  Kokkos::deep_copy (numAllocPerRowOut, numAllocPerRowIn);
467  k_numAllocPerRow_ = numAllocPerRowOut;
468 
469  resumeFill (params);
470  checkInternalState ();
471  }
472 
473 
474  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
476  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
477  const Teuchos::RCP<const map_type>& colMap,
478  const typename local_graph_type::row_map_type& rowPointers,
479  const typename local_graph_type::entries_type::non_const_type& columnIndices,
480  const Teuchos::RCP<Teuchos::ParameterList>& params) :
481  dist_object_type (rowMap)
482  , rowMap_(rowMap)
483  , colMap_(colMap)
484  , nodeNumEntries_ (0)
485  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
486  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
487  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
488  , pftype_(StaticProfile)
489  , numAllocForAllRows_(0)
490  , storageStatus_ (Details::STORAGE_1D_PACKED)
491  , indicesAreAllocated_(true)
492  , indicesAreLocal_(true)
493  , indicesAreGlobal_(false)
494  , fillComplete_(false)
495  , indicesAreSorted_(true)
496  , noRedundancies_(true)
497  , haveLocalConstants_ (false)
498  , haveGlobalConstants_ (false)
499  , sortGhostsAssociatedWithEachProcessor_(true)
500  {
501  staticAssertions ();
502  setAllIndices (rowPointers, columnIndices);
503  checkInternalState ();
504  }
505 
506 
507  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
509  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
510  const Teuchos::RCP<const map_type>& colMap,
511  const Teuchos::ArrayRCP<size_t>& rowPointers,
512  const Teuchos::ArrayRCP<LocalOrdinal> & columnIndices,
513  const Teuchos::RCP<Teuchos::ParameterList>& params) :
514  dist_object_type (rowMap)
515  , rowMap_ (rowMap)
516  , colMap_ (colMap)
517  , nodeNumEntries_ (0)
518  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
519  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
520  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
521  , pftype_ (StaticProfile)
522  , numAllocForAllRows_ (0)
523  , storageStatus_ (Details::STORAGE_1D_PACKED)
524  , indicesAreAllocated_ (true)
525  , indicesAreLocal_ (true)
526  , indicesAreGlobal_ (false)
527  , fillComplete_ (false)
528  , indicesAreSorted_ (true)
529  , noRedundancies_ (true)
530  , haveLocalConstants_ (false)
531  , haveGlobalConstants_ (false)
532  , sortGhostsAssociatedWithEachProcessor_ (true)
533  {
534  staticAssertions ();
535  setAllIndices (rowPointers, columnIndices);
536  checkInternalState ();
537  }
538 
539 
540  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
542  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
543  const Teuchos::RCP<const map_type>& colMap,
544  const local_graph_type& k_local_graph_,
545  const Teuchos::RCP<Teuchos::ParameterList>& params)
546  : DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap)
547  , rowMap_ (rowMap)
548  , colMap_ (colMap)
549  , lclGraph_ (k_local_graph_)
550  , nodeNumEntries_ (0) // FIXME (mfh 17 Mar 2014) should get from lclGraph_ right now
551  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
552  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
553  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
554  , pftype_ (StaticProfile)
555  , numAllocForAllRows_ (0)
556  , storageStatus_ (Details::STORAGE_1D_PACKED)
557  , indicesAreAllocated_ (true)
558  , indicesAreLocal_ (true)
559  , indicesAreGlobal_ (false)
560  , fillComplete_ (false)
561  , indicesAreSorted_ (true)
562  , noRedundancies_ (true)
563  , haveLocalConstants_ (false)
564  , haveGlobalConstants_ (false)
565  , sortGhostsAssociatedWithEachProcessor_(true)
566  {
567  using Teuchos::arcp;
568  using Teuchos::ArrayRCP;
569  using Teuchos::ParameterList;
570  using Teuchos::parameterList;
571  using Teuchos::rcp;
572  typedef GlobalOrdinal GO;
573  typedef LocalOrdinal LO;
574 
575  staticAssertions();
576  const char tfecfFuncName[] = "CrsGraph(Map,Map,Kokkos::LocalStaticCrsGraph)";
577 
578  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
579  colMap.is_null (), std::runtime_error,
580  ": The input column Map must be nonnull.");
581  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
582  k_local_graph_.numRows () != rowMap->getNodeNumElements (),
583  std::runtime_error,
584  ": The input row Map and the input local graph need to have the same "
585  "number of rows. The row Map claims " << rowMap->getNodeNumElements ()
586  << " row(s), but the local graph claims " << k_local_graph_.numRows ()
587  << " row(s).");
588  // NOTE (mfh 17 Mar 2014) getNodeNumRows() returns
589  // rowMap_->getNodeNumElements(), but it doesn't have to.
590  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
591  // k_local_graph_.numRows () != getNodeNumRows (), std::runtime_error,
592  // ": The input row Map and the input local graph need to have the same "
593  // "number of rows. The row Map claims " << getNodeNumRows () << " row(s), "
594  // "but the local graph claims " << k_local_graph_.numRows () << " row(s).");
595  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
596  k_lclInds1D_.dimension_0 () != 0 || k_gblInds1D_.dimension_0 () != 0, std::logic_error,
597  ": cannot have 1D data structures allocated.");
598  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
599  ! lclInds2D_.is_null () || ! gblInds2D_.is_null (), std::logic_error,
600  ": cannot have 2D data structures allocated.");
601 
602  if (this->getNodeNumRows () == 0) {
603  nodeNumEntries_ = 0;
604  }
605  else {
606  nodeNumEntries_ = Details::getEntryOnHost (this->lclGraph_.row_map,
607  this->getNodeNumRows ());
608  }
609 
610  // NOTE (mfh 17 Mar 2014) We also need a version of this CrsGraph
611  // constructor that takes a domain and range Map, as well as a row
612  // and column Map. In that case, we must pass the domain and
613  // range Map into the following method.
614  setDomainRangeMaps (rowMap_, rowMap_);
615  makeImportExport ();
616 
617  k_lclInds1D_ = lclGraph_.entries;
618  k_rowPtrs_ = lclGraph_.row_map;
619 
620  typename local_graph_type::row_map_type d_ptrs = lclGraph_.row_map;
621  typename local_graph_type::entries_type d_inds = lclGraph_.entries;
622 
623  // Reset local properties
624  upperTriangular_ = true;
625  lowerTriangular_ = true;
626  nodeMaxNumRowEntries_ = 0;
627  nodeNumDiags_ = 0;
628 
629  // Compute triangular properties
630  const size_t numLocalRows = getNodeNumRows ();
631  for (size_t localRow = 0; localRow < numLocalRows; ++localRow) {
632  const GO globalRow = rowMap_->getGlobalElement (localRow);
633  const LO rlcid = colMap_->getLocalElement (globalRow);
634 
635  // It's entirely possible that the local matrix has no entries
636  // in the column corresponding to the current row. In that
637  // case, the column Map may not necessarily contain that GID.
638  // This is why we check whether rlcid is "invalid" (which means
639  // that globalRow is not a GID in the column Map).
640  if (rlcid != Teuchos::OrdinalTraits<LO>::invalid ()) {
641  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
642  rlcid + 1 >= static_cast<LO> (d_ptrs.dimension_0 ()),
643  std::runtime_error, ": The given row Map and/or column Map is/are "
644  "not compatible with the provided local graph.");
645  if (d_ptrs(rlcid) != d_ptrs(rlcid + 1)) {
646  const size_t smallestCol =
647  static_cast<size_t> (d_inds(d_ptrs(rlcid)));
648  const size_t largestCol =
649  static_cast<size_t> (d_inds(d_ptrs(rlcid + 1)-1));
650  if (smallestCol < localRow) {
651  upperTriangular_ = false;
652  }
653  if (localRow < largestCol) {
654  lowerTriangular_ = false;
655  }
656  for (size_t i = d_ptrs(rlcid); i < d_ptrs(rlcid + 1); ++i) {
657  if (d_inds(i) == rlcid) {
658  ++nodeNumDiags_;
659  }
660  }
661  }
662  nodeMaxNumRowEntries_ =
663  std::max (static_cast<size_t> (d_ptrs(rlcid + 1) - d_ptrs(rlcid)),
664  nodeMaxNumRowEntries_);
665  }
666  }
667 
668  haveLocalConstants_ = true;
669  computeGlobalConstants ();
670 
671  fillComplete_ = true;
672  checkInternalState ();
673  }
674 
675 
676  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
679  {}
680 
681 
682  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
683  Teuchos::RCP<const Teuchos::ParameterList>
686  {
687  using Teuchos::RCP;
688  using Teuchos::ParameterList;
689  using Teuchos::parameterList;
690 
691  RCP<ParameterList> params = parameterList ("Tpetra::CrsGraph");
692 
693  // Make a sublist for the Import.
694  RCP<ParameterList> importSublist = parameterList ("Import");
695 
696  // FIXME (mfh 02 Apr 2012) We should really have the Import and
697  // Export objects fill in these lists. However, we don't want to
698  // create an Import or Export unless we need them. For now, we
699  // know that the Import and Export just pass the list directly to
700  // their Distributor, so we can create a Distributor here
701  // (Distributor's constructor is a lightweight operation) and have
702  // it fill in the list.
703 
704  // Fill in Distributor default parameters by creating a
705  // Distributor and asking it to do the work.
706  Distributor distributor (rowMap_->getComm (), importSublist);
707  params->set ("Import", *importSublist, "How the Import performs communication.");
708 
709  // Make a sublist for the Export. For now, it's a clone of the
710  // Import sublist. It's not a shallow copy, though, since we
711  // might like the Import to do communication differently than the
712  // Export.
713  params->set ("Export", *importSublist, "How the Export performs communication.");
714 
715  return params;
716  }
717 
718 
719  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
720  void
722  setParameterList (const Teuchos::RCP<Teuchos::ParameterList>& params)
723  {
724  Teuchos::RCP<const Teuchos::ParameterList> validParams =
725  getValidParameters ();
726  params->validateParametersAndSetDefaults (*validParams);
727  this->setMyParamList (params);
728  }
729 
730 
731  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
735  {
736  return rowMap_->getGlobalNumElements ();
737  }
738 
739 
740  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
744  {
745  const char tfecfFuncName[] = "getGlobalNumCols: ";
746  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
747  ! isFillComplete () || getDomainMap ().is_null (), std::runtime_error,
748  "The graph does not have a domain Map. You may not call this method in "
749  "that case.");
750  return getDomainMap ()->getGlobalNumElements ();
751  }
752 
753 
754  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
755  size_t
758  {
759  return this->rowMap_.is_null () ?
760  static_cast<size_t> (0) :
761  this->rowMap_->getNodeNumElements ();
762  }
763 
764 
765  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
766  size_t
769  {
770  const char tfecfFuncName[] = "getNodeNumCols: ";
771  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
772  ! hasColMap (), std::runtime_error,
773  "The graph does not have a column Map. You may not call this method "
774  "unless the graph has a column Map. This requires either that a custom "
775  "column Map was given to the constructor, or that fillComplete() has "
776  "been called.");
777  return colMap_.is_null () ? static_cast<size_t> (0) :
778  colMap_->getNodeNumElements ();
779  }
780 
781 
782  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
783  size_t
786  {
787  return nodeNumDiags_;
788  }
789 
790 
791  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
795  {
796 #ifdef HAVE_TPETRA_DEBUG
797  const char tfecfFuncName[] = "getGlobalNumDiags()";
798  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!haveGlobalConstants_, std::logic_error,
799  ": The matrix does not have globalConstants computed, but the user has requested them.");
800 #endif
801  return globalNumDiags_;
802  }
803 
804 
805  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
806  Teuchos::RCP<Node>
808  getNode () const
809  {
810  return rowMap_.is_null () ? Teuchos::null : rowMap_->getNode ();
811  }
812 
813 
814  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
815  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
817  getRowMap () const
818  {
819  return rowMap_;
820  }
821 
822 
823  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
824  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
826  getColMap () const
827  {
828  return colMap_;
829  }
830 
831 
832  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
833  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
836  {
837  return domainMap_;
838  }
839 
840 
841  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
842  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
844  getRangeMap () const
845  {
846  return rangeMap_;
847  }
848 
849  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
850  Teuchos::RCP<const Import<LocalOrdinal, GlobalOrdinal, Node> >
852  getImporter () const
853  {
854  return importer_;
855  }
856 
857 
858  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
859  Teuchos::RCP<const Export<LocalOrdinal, GlobalOrdinal, Node> >
861  getExporter () const
862  {
863  return exporter_;
864  }
865 
866 
867  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
868  bool
870  hasColMap () const
871  {
872  return ! colMap_.is_null ();
873  }
874 
875 
876  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
877  bool
880  {
881  // FIXME (mfh 07 Aug 2014) Why wouldn't storage be optimized if
882  // getNodeNumRows() is zero?
883 
884  const bool isOpt = indicesAreAllocated_ &&
885  k_numRowEntries_.dimension_0 () == 0 &&
886  getNodeNumRows () > 0;
887 
888 #ifdef HAVE_TPETRA_DEBUG
889  const char tfecfFuncName[] = "isStorageOptimized";
890  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
891  isOpt && getProfileType () == DynamicProfile, std::logic_error,
892  ": The matrix claims to have optimized storage, but getProfileType() "
893  "returns DynamicProfile. This should never happen. Please report this "
894  "bug to the Tpetra developers.");
895 #endif // HAVE_TPETRA_DEBUG
896 
897  return isOpt;
898  }
899 
900 
901  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
905  {
906  return pftype_;
907  }
908 
909 
910  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
914  {
915 #ifdef HAVE_TPETRA_DEBUG
916  const char tfecfFuncName[] = "getGlobalNumEntries()";
917  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!haveGlobalConstants_, std::logic_error,
918  ": The matrix does not have globalConstants computed, but the user has requested them.");
919 #endif
920  return globalNumEntries_;
921  }
922 
923 
924  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
925  size_t
928  {
929  return nodeNumEntries_;
930  }
931 
932 
933  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
937  {
938 #ifdef HAVE_TPETRA_DEBUG
939  const char tfecfFuncName[] = "getGlobalMaxNumRowEntries()";
940  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(!haveGlobalConstants_, std::logic_error,
941  ": The matrix does not have globalConstants computed, but the user has requested them.");
942 #endif
943 
944  return globalMaxNumRowEntries_;
945  }
946 
947 
948  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
949  size_t
952  {
953  return nodeMaxNumRowEntries_;
954  }
955 
956 
957  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
958  bool
961  {
962  return fillComplete_;
963  }
964 
965 
966  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
967  bool
970  {
971  return ! fillComplete_;
972  }
973 
974 
975  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
976  bool
979  {
980  return upperTriangular_;
981  }
982 
983 
984  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
985  bool
988  {
989  return lowerTriangular_;
990  }
991 
992 
993  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
994  bool
997  {
998  return indicesAreLocal_;
999  }
1000 
1001 
1002  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1003  bool
1006  {
1007  return indicesAreGlobal_;
1008  }
1009 
1010 
1011  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1012  size_t
1015  {
1016  typedef LocalOrdinal LO;
1017 
1018  if (this->indicesAreAllocated_) {
1019  const LO lclNumRows = this->getNodeNumRows ();
1020  if (lclNumRows == 0) {
1021  return static_cast<size_t> (0);
1022  }
1023  else if (this->storageStatus_ == Details::STORAGE_1D_PACKED) {
1024  if (static_cast<LO> (this->lclGraph_.row_map.dimension_0 ()) <
1025  static_cast<LO> (lclNumRows + 1)) {
1026  return static_cast<size_t> (0);
1027  }
1028  else {
1029  return Details::getEntryOnHost (this->lclGraph_.row_map, lclNumRows);
1030  }
1031  }
1032  else if (this->storageStatus_ == Details::STORAGE_1D_UNPACKED) {
1033  if (this->k_rowPtrs_.dimension_0 () == 0) {
1034  return static_cast<size_t> (0);
1035  }
1036  else {
1037  return Details::getEntryOnHost (this->k_rowPtrs_, lclNumRows);
1038  }
1039  }
1040  else if (this->storageStatus_ == Details::STORAGE_2D) {
1041  size_t numAllocated = 0;
1042  if (this->isLocallyIndexed ()) {
1043  for (LocalOrdinal lclRow = 0; lclRow < lclNumRows; ++lclRow) {
1044  numAllocated += this->lclInds2D_[lclRow].size ();
1045  }
1046  }
1047  else if (this->isGloballyIndexed ()) {
1048  for (LocalOrdinal lclRow = 0; lclRow < lclNumRows; ++lclRow) {
1049  numAllocated += this->gblInds2D_[lclRow].size ();
1050  }
1051  }
1052  // Neither locally nor globally indexed, means no indices allocated.
1053  return numAllocated;
1054  }
1055  else {
1056  return static_cast<size_t> (0);
1057  }
1058  }
1059  else {
1060  return Tpetra::Details::OrdinalTraits<size_t>::invalid ();
1061  }
1062  }
1063 
1064 
1065  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1066  Teuchos::RCP<const Teuchos::Comm<int> >
1068  getComm () const
1069  {
1070  return this->rowMap_.is_null () ? Teuchos::null : this->rowMap_->getComm ();
1071  }
1072 
1073 
1074  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1075  GlobalOrdinal
1078  {
1079  return rowMap_->getIndexBase ();
1080  }
1081 
1082 
1083  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1084  bool
1086  indicesAreAllocated () const
1087  {
1088  return indicesAreAllocated_;
1089  }
1090 
1091 
1092  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1093  bool
1095  isSorted () const
1096  {
1097  return indicesAreSorted_;
1098  }
1099 
1100 
1101  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1102  bool
1104  isMerged () const
1105  {
1106  return noRedundancies_;
1107  }
1108 
1109 
1110  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1111  void
1114  {
1115  // FIXME (mfh 07 May 2013) How do we know that the change
1116  // introduced a redundancy, or even that it invalidated the sorted
1117  // order of indices? CrsGraph has always made this conservative
1118  // guess. It could be a bit costly to check at insertion time,
1119  // though.
1120  indicesAreSorted_ = false;
1121  noRedundancies_ = false;
1122 
1123  // We've modified the graph, so we'll have to recompute local
1124  // constants like the number of diagonal entries on this process.
1125  haveLocalConstants_ = false;
1126  }
1127 
1128 
1129  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1130  void
1132  allocateIndices (const ELocalGlobal lg)
1133  {
1134  using Teuchos::arcp;
1135  using Teuchos::Array;
1136  using Teuchos::ArrayRCP;
1137  typedef Teuchos::ArrayRCP<size_t>::size_type size_type;
1138  typedef typename local_graph_type::row_map_type::non_const_type
1139  non_const_row_map_type;
1140  typedef typename local_graph_type::entries_type::non_const_type
1141  lcl_col_inds_type;
1142  typedef Kokkos::View<GlobalOrdinal*,
1143  typename lcl_col_inds_type::array_layout,
1144  device_type> gbl_col_inds_type;
1145  const char tfecfFuncName[] = "allocateIndices: ";
1146  const char suffix[] = " Please report this bug to the Tpetra developers.";
1147 
1148  // This is a protected function, only callable by us. If it was
1149  // called incorrectly, it is our fault. That's why the tests
1150  // below throw std::logic_error instead of std::invalid_argument.
1151  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1152  (this->isLocallyIndexed () && lg == GlobalIndices, std::logic_error,
1153  "The graph is locally indexed, but Tpetra code is calling this method "
1154  "with lg=GlobalIndices." << suffix);
1155  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1156  (this->isGloballyIndexed () && lg == LocalIndices, std::logic_error,
1157  "The graph is globally indexed, but Tpetra code is calling this method "
1158  "with lg=LocalIndices. " << suffix);
1159  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1160  (this->indicesAreAllocated (), std::logic_error, "The graph's indices "
1161  "are already allocated, but Tpetra is calling allocateIndices again."
1162  << suffix);
1163  const size_t numRows = this->getNodeNumRows ();
1164 
1165  if (this->getProfileType () == StaticProfile) {
1166  //
1167  // STATIC ALLOCATION PROFILE
1168  //
1169  non_const_row_map_type k_rowPtrs ("Tpetra::CrsGraph::ptr", numRows + 1);
1170 
1171  if (this->k_numAllocPerRow_.dimension_0 () != 0) {
1172  // It's OK to throw std::invalid_argument here, because we
1173  // haven't incurred any side effects yet. Throwing that
1174  // exception (and not, say, std::logic_error) implies that the
1175  // instance can recover.
1176  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1177  (this->k_numAllocPerRow_.dimension_0 () != numRows,
1178  std::invalid_argument, "k_numAllocPerRow_ is allocated, that is, "
1179  "has nonzero length " << this->k_numAllocPerRow_.dimension_0 ()
1180  << ", but its length != numRows = " << numRows << ".");
1181 
1182  // k_numAllocPerRow_ is a host View, but k_rowPtrs (the thing
1183  // we want to compute here) lives on device. That's OK;
1184  // computeOffsetsFromCounts can handle this case.
1186 
1187  // FIXME (mfh 27 Jun 2016) Currently, computeOffsetsFromCounts
1188  // doesn't attempt to check its input for "invalid" flag
1189  // values. For now, we omit that feature of the sequential
1190  // code disabled below.
1191  computeOffsetsFromCounts (k_rowPtrs, k_numAllocPerRow_);
1192  }
1193  else {
1194  // It's OK to throw std::invalid_argument here, because we
1195  // haven't incurred any side effects yet. Throwing that
1196  // exception (and not, say, std::logic_error) implies that the
1197  // instance can recover.
1198  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1199  (this->numAllocForAllRows_ ==
1200  Tpetra::Details::OrdinalTraits<size_t>::invalid (),
1201  std::invalid_argument, "numAllocForAllRows_ has an invalid value, "
1202  "namely Tpetra::Details::OrdinalTraits<size_t>::invalid() = " <<
1203  Tpetra::Details::OrdinalTraits<size_t>::invalid () << ".");
1204 
1206  computeOffsetsFromConstantCount (k_rowPtrs, this->numAllocForAllRows_);
1207  }
1208 
1209  // "Commit" the resulting row offsets.
1210  this->k_rowPtrs_ = k_rowPtrs;
1211 
1212  const size_type numInds = Details::getEntryOnHost (this->k_rowPtrs_, numRows);
1213  // const size_type numInds = static_cast<size_type> (this->k_rowPtrs_(numRows));
1214  if (lg == LocalIndices) {
1215  k_lclInds1D_ = lcl_col_inds_type ("Tpetra::CrsGraph::ind", numInds);
1216  }
1217  else {
1218  k_gblInds1D_ = gbl_col_inds_type ("Tpetra::CrsGraph::ind", numInds);
1219  }
1220  storageStatus_ = Details::STORAGE_1D_UNPACKED;
1221  }
1222  else {
1223  //
1224  // DYNAMIC ALLOCATION PROFILE
1225  //
1226  const bool useNumAllocPerRow =
1227  (this->k_numAllocPerRow_.dimension_0 () != 0);
1228 
1229  if (lg == LocalIndices) {
1230  this->lclInds2D_ = arcp<Array<LocalOrdinal> > (numRows);
1231  for (size_t i = 0; i < numRows; ++i) {
1232  const size_t howMany = useNumAllocPerRow ?
1233  this->k_numAllocPerRow_(i) :
1234  this->numAllocForAllRows_;
1235  if (howMany > 0) {
1236  this->lclInds2D_[i].resize (howMany);
1237  }
1238  }
1239  }
1240  else { // allocate global indices
1241  this->gblInds2D_ = arcp<Array<GlobalOrdinal> > (numRows);
1242  for (size_t i = 0; i < numRows; ++i) {
1243  const size_t howMany = useNumAllocPerRow ?
1244  this->k_numAllocPerRow_(i) :
1245  this->numAllocForAllRows_;
1246  if (howMany > 0) {
1247  this->gblInds2D_[i].resize (howMany);
1248  }
1249  }
1250  }
1251  this->storageStatus_ = Details::STORAGE_2D;
1252  }
1253 
1254  this->indicesAreLocal_ = (lg == LocalIndices);
1255  this->indicesAreGlobal_ = (lg == GlobalIndices);
1256 
1257  if (numRows > 0) { // reallocate k_numRowEntries_ & fill w/ 0s
1258  using Kokkos::ViewAllocateWithoutInitializing;
1259  typedef decltype (k_numRowEntries_) row_ent_type;
1260  const char label[] = "Tpetra::CrsGraph::numRowEntries";
1261 
1262  row_ent_type numRowEnt (ViewAllocateWithoutInitializing (label), numRows);
1263  Kokkos::deep_copy (numRowEnt, static_cast<size_t> (0)); // fill w/ 0s
1264  this->k_numRowEntries_ = numRowEnt; // "commit" our allocation
1265  }
1266 
1267  // Once indices are allocated, CrsGraph needs to free this information.
1268  this->numAllocForAllRows_ = 0;
1269  this->k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
1270  this->indicesAreAllocated_ = true;
1271 
1272  try {
1273  this->checkInternalState ();
1274  }
1275  catch (std::logic_error& e) {
1276  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1277  (true, std::logic_error, "At end of allocateIndices, "
1278  "checkInternalState threw std::logic_error: "
1279  << e.what ());
1280  }
1281  catch (std::exception& e) {
1282  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1283  (true, std::runtime_error, "At end of allocateIndices, "
1284  "checkInternalState threw std::exception: "
1285  << e.what ());
1286  }
1287  catch (...) {
1288  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1289  (true, std::runtime_error, "At end of allocateIndices, "
1290  "checkInternalState threw an exception "
1291  "not a subclass of std::exception.");
1292  }
1293  }
1294 
1295 
1296  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1297  Teuchos::ArrayView<const LocalOrdinal>
1299  getLocalView (const RowInfo rowinfo) const
1300  {
1301  using Kokkos::subview;
1302  typedef LocalOrdinal LO;
1303  typedef Kokkos::View<const LO*, execution_space,
1304  Kokkos::MemoryUnmanaged> row_view_type;
1305 
1306  if (rowinfo.allocSize == 0) {
1307  return Teuchos::ArrayView<const LO> ();
1308  }
1309  else { // nothing in the row to view
1310  if (k_lclInds1D_.dimension_0 () != 0) { // 1-D storage
1311  const size_t start = rowinfo.offset1D;
1312  const size_t len = rowinfo.allocSize;
1313  const std::pair<size_t, size_t> rng (start, start + len);
1314  // mfh 23 Nov 2015: Don't just create a subview of
1315  // k_lclInds1D_ directly, because that first creates a
1316  // _managed_ subview, then returns an unmanaged version of
1317  // that. That touches the reference count, which costs
1318  // performance in a measurable way.
1319  row_view_type rowView = subview (row_view_type (k_lclInds1D_), rng);
1320  const LO* const rowViewRaw = (len == 0) ? NULL : rowView.ptr_on_device ();
1321  return Teuchos::ArrayView<const LO> (rowViewRaw, len, Teuchos::RCP_DISABLE_NODE_LOOKUP);
1322  }
1323  else if (! lclInds2D_[rowinfo.localRow].empty ()) { // 2-D storage
1324  return lclInds2D_[rowinfo.localRow] ();
1325  }
1326  else {
1327  return Teuchos::ArrayView<const LO> (); // nothing in the row to view
1328  }
1329  }
1330  }
1331 
1332  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1333  LocalOrdinal
1335  getLocalViewRawConst (const LocalOrdinal*& lclInds,
1336  LocalOrdinal& numEnt,
1337  const RowInfo& rowinfo) const
1338  {
1339  lclInds = NULL;
1340  numEnt = 0;
1341 
1342  if (rowinfo.allocSize != 0) {
1343  if (k_lclInds1D_.dimension_0 () != 0) { // 1-D storage
1344 #ifdef HAVE_TPETRA_DEBUG
1345  if (rowinfo.offset1D + rowinfo.allocSize >
1346  static_cast<size_t> (k_lclInds1D_.dimension_0 ())) {
1347  return static_cast<LocalOrdinal> (-1);
1348  }
1349 #endif // HAVE_TPETRA_DEBUG
1350  lclInds = &k_lclInds1D_[rowinfo.offset1D];
1351  numEnt = rowinfo.allocSize;
1352  }
1353  else { // 2-D storage
1354 #ifdef HAVE_TPETRA_DEBUG
1355  if (rowinfo.localRow >= static_cast<size_t> (lclInds2D_.size ())) {
1356  return static_cast<LocalOrdinal> (-1);
1357  }
1358 #endif // HAVE_TPETRA_DEBUG
1359  // Use a const reference so we don't touch the ArrayRCP's ref
1360  // count, since ArrayRCP's ref count is not thread safe.
1361  const auto& curRow = lclInds2D_[rowinfo.localRow];
1362  if (! curRow.empty ()) {
1363  lclInds = curRow.getRawPtr ();
1364  numEnt = curRow.size ();
1365  }
1366  }
1367  }
1368  return static_cast<LocalOrdinal> (0);
1369  }
1370 
1371  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1372  Teuchos::ArrayView<LocalOrdinal>
1375  {
1376  using Kokkos::subview;
1377  typedef LocalOrdinal LO;
1378  typedef Kokkos::View<LO*, execution_space,
1379  Kokkos::MemoryUnmanaged> row_view_type;
1380 
1381  if (rowinfo.allocSize == 0) { // nothing in the row to view
1382  return Teuchos::ArrayView<LO> ();
1383  }
1384  else {
1385  if (k_lclInds1D_.dimension_0 () != 0) { // 1-D storage
1386  const size_t start = rowinfo.offset1D;
1387  const size_t len = rowinfo.allocSize;
1388  const std::pair<size_t, size_t> rng (start, start + len);
1389  // mfh 23 Nov 2015: Don't just create a subview of
1390  // k_lclInds1D_ directly, because that first creates a
1391  // _managed_ subview, then returns an unmanaged version of
1392  // that. That touches the reference count, which costs
1393  // performance in a measurable way.
1394  row_view_type rowView = subview (row_view_type (k_lclInds1D_), rng);
1395  LO* const rowViewRaw = (len == 0) ? NULL : rowView.ptr_on_device ();
1396  return Teuchos::ArrayView<LO> (rowViewRaw, len, Teuchos::RCP_DISABLE_NODE_LOOKUP);
1397  }
1398  else if (! lclInds2D_[rowinfo.localRow].empty ()) { // 2-D storage
1399  return lclInds2D_[rowinfo.localRow] ();
1400  }
1401  else {
1402  return Teuchos::ArrayView<LO> (); // nothing in the row to view
1403  }
1404  }
1405  }
1406 
1407 
1408  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1409  Kokkos::View<const LocalOrdinal*,
1411  Kokkos::MemoryUnmanaged>
1413  getLocalKokkosRowView (const RowInfo& rowInfo) const
1414  {
1415  typedef LocalOrdinal LO;
1416  typedef Kokkos::View<const LO*, execution_space,
1417  Kokkos::MemoryUnmanaged> row_view_type;
1418 
1419  if (rowInfo.allocSize == 0) {
1420  return row_view_type ();
1421  }
1422  else { // nothing in the row to view
1423  if (k_lclInds1D_.dimension_0 () != 0) { // 1-D storage
1424  const size_t start = rowInfo.offset1D;
1425  const size_t len = rowInfo.allocSize;
1426  const std::pair<size_t, size_t> rng (start, start + len);
1427  // mfh 23 Nov 2015: Don't just create a subview of
1428  // k_lclInds1D_ directly, because that first creates a
1429  // _managed_ subview, then returns an unmanaged version of
1430  // that. That touches the reference count, which costs
1431  // performance in a measurable way.
1432  return Kokkos::subview (row_view_type (k_lclInds1D_), rng);
1433  }
1434  else if (! this->lclInds2D_[rowInfo.localRow].empty ()) { // 2-D storage
1435  // Use a reference, so that I don't touch the
1436  // Teuchos::ArrayView reference count in a debug build. (It
1437  // has no reference count in a release build.) This ensures
1438  // thread safety.
1439  //
1440  // lclInds2D_ lives on host, so this code does not assume UVM.
1441  Teuchos::Array<LO>& lclInds = this->lclInds2D_[rowInfo.localRow];
1442  return row_view_type (lclInds.getRawPtr (), lclInds.size ());
1443  }
1444  else {
1445  return row_view_type (); // nothing in the row to view
1446  }
1447  }
1448  }
1449 
1450 
1451  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1452  Kokkos::View<LocalOrdinal*,
1453  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node, classic>::execution_space,
1454  Kokkos::MemoryUnmanaged>
1456  getLocalKokkosRowViewNonConst (const RowInfo& rowInfo)
1457  {
1458  typedef LocalOrdinal LO;
1459  typedef Kokkos::View<LO*, execution_space,
1460  Kokkos::MemoryUnmanaged> row_view_type;
1461 
1462  if (rowInfo.allocSize == 0) {
1463  return row_view_type ();
1464  }
1465  else { // nothing in the row to view
1466  if (k_lclInds1D_.dimension_0 () != 0) { // 1-D storage
1467  const size_t start = rowInfo.offset1D;
1468  const size_t len = rowInfo.allocSize;
1469  const std::pair<size_t, size_t> rng (start, start + len);
1470  // mfh 23 Nov 2015: Don't just create a subview of
1471  // k_lclInds1D_ directly, because that first creates a
1472  // _managed_ subview, then returns an unmanaged version of
1473  // that. That touches the reference count, which costs
1474  // performance in a measurable way.
1475  return Kokkos::subview (row_view_type (this->k_lclInds1D_), rng);
1476  }
1477  else if (! this->lclInds2D_[rowInfo.localRow].empty ()) { // 2-D storage
1478  // Use a reference, so that I don't touch the
1479  // Teuchos::ArrayView reference count in a debug build. (It
1480  // has no reference count in a release build.) This ensures
1481  // thread safety.
1482  //
1483  // lclInds2D_ lives on host, so this code does not assume UVM.
1484  Teuchos::Array<LO>& cols = this->lclInds2D_[rowInfo.localRow];
1485  LO* const colsRaw = cols.getRawPtr ();
1486  return row_view_type (colsRaw, cols.size ());
1487  }
1488  else {
1489  return row_view_type (); // nothing in the row to view
1490  }
1491  }
1492  }
1493 
1494 
1495  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1496  Kokkos::View<const GlobalOrdinal*,
1497  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node, classic>::execution_space,
1498  Kokkos::MemoryUnmanaged>
1500  getGlobalKokkosRowView (const RowInfo& rowinfo) const
1501  {
1502  typedef GlobalOrdinal GO;
1503  typedef Kokkos::View<const GO*, execution_space,
1504  Kokkos::MemoryUnmanaged> row_view_type;
1505 
1506  if (rowinfo.allocSize == 0) {
1507  return row_view_type ();
1508  }
1509  else { // nothing in the row to view
1510  if (this->k_gblInds1D_.dimension_0 () != 0) { // 1-D storage
1511  const size_t start = rowinfo.offset1D;
1512  const size_t len = rowinfo.allocSize;
1513  const std::pair<size_t, size_t> rng (start, start + len);
1514  // mfh 23 Nov 2015: Don't just create a subview of
1515  // k_gblInds1D_ directly, because that first creates a
1516  // _managed_ subview, then returns an unmanaged version of
1517  // that. That touches the reference count, which costs
1518  // performance in a measurable way.
1519  return Kokkos::subview (row_view_type (this->k_gblInds1D_), rng);
1520  }
1521  else if (! this->gblInds2D_[rowinfo.localRow].empty ()) { // 2-D storage
1522  // Use a reference, so that I don't touch the
1523  // Teuchos::ArrayView reference count in a debug build. (It
1524  // has no reference count in a release build.) This ensures
1525  // thread safety.
1526  //
1527  // gblInds2D_ lives on host, so this code does not assume UVM.
1528  Teuchos::Array<GO>& cols = this->gblInds2D_[rowinfo.localRow];
1529  return row_view_type (cols.getRawPtr (), cols.size ());
1530  }
1531  else {
1532  return row_view_type (); // nothing in the row to view
1533  }
1534  }
1535  }
1536 
1537 
1538  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1539  Teuchos::ArrayView<const GlobalOrdinal>
1541  getGlobalView (const RowInfo& rowinfo) const
1542  {
1543  Teuchos::ArrayView<const GlobalOrdinal> view;
1544  if (rowinfo.allocSize > 0) {
1545  if (k_gblInds1D_.dimension_0 () != 0) {
1546  auto rng = std::make_pair (rowinfo.offset1D,
1547  rowinfo.offset1D + rowinfo.allocSize);
1548  // mfh 23 Nov 2015: Don't just create a subview of
1549  // k_gblInds1D_ directly, because that first creates a
1550  // _managed_ subview, then returns an unmanaged version of
1551  // that. That touches the reference count, which costs
1552  // performance in a measurable way.
1553  Kokkos::View<const GlobalOrdinal*, execution_space,
1554  Kokkos::MemoryUnmanaged> k_gblInds1D_unmanaged = k_gblInds1D_;
1555  view = Kokkos::Compat::getConstArrayView (Kokkos::subview (k_gblInds1D_unmanaged, rng));
1556  }
1557  else if (! gblInds2D_[rowinfo.localRow].empty()) {
1558  view = gblInds2D_[rowinfo.localRow] ();
1559  }
1560  }
1561  return view;
1562  }
1563 
1564 
1565  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1566  LocalOrdinal
1568  getGlobalViewRawConst (const GlobalOrdinal*& gblInds,
1569  LocalOrdinal& numEnt,
1570  const RowInfo& rowinfo) const
1571  {
1572  gblInds = NULL;
1573  numEnt = 0;
1574 
1575  if (rowinfo.allocSize != 0) {
1576  if (k_gblInds1D_.dimension_0 () != 0) { // 1-D storage
1577 #ifdef HAVE_TPETRA_DEBUG
1578  if (rowinfo.offset1D + rowinfo.allocSize >
1579  static_cast<size_t> (k_gblInds1D_.dimension_0 ())) {
1580  return static_cast<LocalOrdinal> (-1);
1581  }
1582 #endif // HAVE_TPETRA_DEBUG
1583  gblInds = &k_gblInds1D_[rowinfo.offset1D];
1584  numEnt = rowinfo.allocSize;
1585  }
1586  else {
1587 #ifdef HAVE_TPETRA_DEBUG
1588  if (rowinfo.localRow >= static_cast<size_t> (gblInds2D_.size ())) {
1589  return static_cast<LocalOrdinal> (-1);
1590  }
1591 #endif // HAVE_TPETRA_DEBUG
1592  const auto& curRow = gblInds2D_[rowinfo.localRow];
1593  if (! curRow.empty ()) {
1594  gblInds = curRow.getRawPtr ();
1595  numEnt = curRow.size ();
1596  }
1597  }
1598  }
1599  return static_cast<LocalOrdinal> (0);
1600  }
1601 
1602 
1603  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1604  Teuchos::ArrayView<GlobalOrdinal>
1607  {
1608  Teuchos::ArrayView<GlobalOrdinal> view;
1609  if (rowinfo.allocSize > 0) {
1610  if (k_gblInds1D_.dimension_0 () != 0) {
1611  auto rng = std::make_pair (rowinfo.offset1D,
1612  rowinfo.offset1D + rowinfo.allocSize);
1613  // mfh 23 Nov 2015: Don't just create a subview of
1614  // k_gblInds1D_ directly, because that first creates a
1615  // _managed_ subview, then returns an unmanaged version of
1616  // that. That touches the reference count, which costs
1617  // performance in a measurable way.
1618  Kokkos::View<GlobalOrdinal*, execution_space,
1619  Kokkos::MemoryUnmanaged> k_gblInds1D_unmanaged = k_gblInds1D_;
1620  view = Kokkos::Compat::getArrayView (Kokkos::subview (k_gblInds1D_unmanaged, rng));
1621  }
1622  else if (! gblInds2D_[rowinfo.localRow].empty()) {
1623  view = gblInds2D_[rowinfo.localRow] ();
1624  }
1625  }
1626  return view;
1627  }
1628 
1629 
1630  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1631  RowInfo
1633  getRowInfo (const LocalOrdinal myRow) const
1634  {
1635 #ifdef HAVE_TPETRA_DEBUG
1636  const char tfecfFuncName[] = "getRowInfo: ";
1637  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1638  ! hasRowInfo (), std::logic_error,
1639  "Late catch! Graph does not have row info anymore. "
1640  "Error should have been caught earlier. "
1641  "Please report this bug to the Tpetra developers.");
1642 #endif // HAVE_TPETRA_DEBUG
1643 
1644  const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1645  RowInfo ret;
1646  if (! this->hasRowInfo () || this->rowMap_.is_null () ||
1647  ! this->rowMap_->isNodeLocalElement (myRow)) {
1648  ret.localRow = STINV;
1649  ret.allocSize = 0;
1650  ret.numEntries = 0;
1651  ret.offset1D = STINV;
1652  return ret;
1653  }
1654 
1655  ret.localRow = static_cast<size_t> (myRow);
1656  if (this->indicesAreAllocated ()) {
1657  if (this->getProfileType () == StaticProfile) {
1658  // Offsets tell us the allocation size in this case.
1659  if (this->k_rowPtrs_.dimension_0 () == 0) {
1660  ret.offset1D = 0;
1661  ret.allocSize = 0;
1662  }
1663  else {
1664  ret.offset1D = this->k_rowPtrs_(myRow);
1665  ret.allocSize = this->k_rowPtrs_(myRow+1) - this->k_rowPtrs_(myRow);
1666  }
1667 
1668  ret.numEntries = (this->k_numRowEntries_.dimension_0 () == 0) ?
1669  ret.allocSize :
1670  this->k_numRowEntries_(myRow);
1671  }
1672  else { // DynamicProfile
1673  ret.offset1D = STINV;
1674  if (this->isLocallyIndexed ()) {
1675  ret.allocSize = (this->lclInds2D_.size () == 0) ?
1676  size_t (0) :
1677  this->lclInds2D_[myRow].size ();
1678  }
1679  else if (this->isGloballyIndexed ()) {
1680  ret.allocSize = (this->gblInds2D_.size () == 0) ?
1681  size_t (0) :
1682  this->gblInds2D_[myRow].size ();
1683  }
1684  else { // neither locally nor globally indexed means no indices alloc'd
1685  ret.allocSize = 0;
1686  }
1687 
1688  ret.numEntries = (this->k_numRowEntries_.dimension_0 () == 0) ?
1689  size_t (0) :
1690  this->k_numRowEntries_(myRow);
1691  }
1692  }
1693  else { // haven't performed allocation yet; probably won't hit this code
1694  // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1695  // allocate, rather than doing lazy allocation at first insert.
1696  // This will make k_numAllocPerRow_ obsolete.
1697  ret.allocSize = (this->k_numAllocPerRow_.dimension_0 () != 0) ?
1698  this->k_numAllocPerRow_(myRow) : // this is a host View
1699  this->numAllocForAllRows_;
1700  ret.numEntries = 0;
1701  ret.offset1D = STINV;
1702  }
1703 
1704  return ret;
1705  }
1706 
1707 
1708  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1709  RowInfo
1711  getRowInfoFromGlobalRowIndex (const GlobalOrdinal gblRow) const
1712  {
1713  const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1714  RowInfo ret;
1715  if (! this->hasRowInfo () || this->rowMap_.is_null ()) {
1716  ret.localRow = STINV;
1717  ret.allocSize = 0;
1718  ret.numEntries = 0;
1719  ret.offset1D = STINV;
1720  return ret;
1721  }
1722  const LocalOrdinal myRow = this->rowMap_->getLocalElement (gblRow);
1723  if (myRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) {
1724  ret.localRow = STINV;
1725  ret.allocSize = 0;
1726  ret.numEntries = 0;
1727  ret.offset1D = STINV;
1728  return ret;
1729  }
1730 
1731  ret.localRow = static_cast<size_t> (myRow);
1732  if (this->indicesAreAllocated ()) {
1733  // graph data structures have the info that we need
1734  //
1735  // if static graph, offsets tell us the allocation size
1736  if (this->getProfileType() == StaticProfile) {
1737  if (this->k_rowPtrs_.dimension_0 () == 0) {
1738  ret.offset1D = 0;
1739  ret.allocSize = 0;
1740  }
1741  else {
1742  ret.offset1D = this->k_rowPtrs_(myRow);
1743  ret.allocSize = this->k_rowPtrs_(myRow+1) - this->k_rowPtrs_(myRow);
1744  }
1745 
1746  ret.numEntries = (this->k_numRowEntries_.dimension_0 () == 0) ?
1747  ret.allocSize :
1748  this->k_numRowEntries_(myRow);
1749  }
1750  else { // DynamicProfile
1751  ret.offset1D = STINV;
1752  if (this->isLocallyIndexed ()) {
1753  ret.allocSize = (this->lclInds2D_.size () == 0) ?
1754  size_t (0) :
1755  this->lclInds2D_[myRow].size ();
1756  }
1757  else {
1758  ret.allocSize = (this->gblInds2D_.size () == 0) ?
1759  size_t (0) :
1760  this->gblInds2D_[myRow].size ();
1761  }
1762 
1763  ret.numEntries = (this->k_numRowEntries_.dimension_0 () == 0) ?
1764  size_t (0) :
1765  this->k_numRowEntries_(myRow);
1766  }
1767  }
1768  else { // haven't performed allocation yet; probably won't hit this code
1769  // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1770  // allocate, rather than doing lazy allocation at first insert.
1771  // This will make k_numAllocPerRow_ obsolete.
1772  ret.allocSize = (this->k_numAllocPerRow_.dimension_0 () != 0) ?
1773  this->k_numAllocPerRow_(myRow) : // this is a host View
1774  this->numAllocForAllRows_;
1775  ret.numEntries = 0;
1776  ret.offset1D = STINV;
1777  }
1778 
1779  return ret;
1780  }
1781 
1782 
1783  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1784  void
1786  staticAssertions () const
1787  {
1788  using Teuchos::OrdinalTraits;
1789  typedef LocalOrdinal LO;
1790  typedef GlobalOrdinal GO;
1791  typedef global_size_t GST;
1792 
1793  // Assumption: sizeof(GlobalOrdinal) >= sizeof(LocalOrdinal):
1794  // This is so that we can store local indices in the memory
1795  // formerly occupied by global indices.
1796  static_assert (sizeof (GlobalOrdinal) >= sizeof (LocalOrdinal),
1797  "Tpetra::CrsGraph: sizeof(GlobalOrdinal) must be >= sizeof(LocalOrdinal).");
1798  // Assumption: max(size_t) >= max(LocalOrdinal)
1799  // This is so that we can represent any LocalOrdinal as a size_t.
1800  static_assert (sizeof (size_t) >= sizeof (LocalOrdinal),
1801  "Tpetra::CrsGraph: sizeof(size_t) must be >= sizeof(LocalOrdinal).");
1802  static_assert (sizeof(GST) >= sizeof(size_t),
1803  "Tpetra::CrsGraph: sizeof(Tpetra::global_size_t) must be >= sizeof(size_t).");
1804 
1805  // FIXME (mfh 30 Sep 2015) We're not using
1806  // Teuchos::CompileTimeAssert any more. Can we do these checks
1807  // with static_assert?
1808 
1809  // can't call max() with CompileTimeAssert, because it isn't a
1810  // constant expression; will need to make this a runtime check
1811  const char msg[] = "Tpetra::CrsGraph: Object cannot be created with the "
1812  "given template arguments: size assumptions are not valid.";
1813  TEUCHOS_TEST_FOR_EXCEPTION(
1814  static_cast<size_t> (Teuchos::OrdinalTraits<LO>::max ()) > Teuchos::OrdinalTraits<size_t>::max (),
1815  std::runtime_error, msg);
1816  TEUCHOS_TEST_FOR_EXCEPTION(
1817  static_cast<GST> (Teuchos::OrdinalTraits<LO>::max ()) > static_cast<GST> (Teuchos::OrdinalTraits<GO>::max ()),
1818  std::runtime_error, msg);
1819  TEUCHOS_TEST_FOR_EXCEPTION(
1820  static_cast<size_t> (Teuchos::OrdinalTraits<GO>::max ()) > Teuchos::OrdinalTraits<GST>::max(),
1821  std::runtime_error, msg);
1822  TEUCHOS_TEST_FOR_EXCEPTION(
1823  Teuchos::OrdinalTraits<size_t>::max () > Teuchos::OrdinalTraits<GST>::max (),
1824  std::runtime_error, msg);
1825  }
1826 
1827 
1828  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1829  size_t
1831  insertIndices (const RowInfo& rowinfo,
1832  const SLocalGlobalViews &newInds,
1833  const ELocalGlobal lg,
1834  const ELocalGlobal I)
1835  {
1836  using Teuchos::ArrayView;
1837 #ifdef HAVE_TPETRA_DEBUG
1838  TEUCHOS_TEST_FOR_EXCEPTION(
1839  lg != GlobalIndices && lg != LocalIndices, std::invalid_argument,
1840  "Tpetra::CrsGraph::insertIndices: lg must be either GlobalIndices or "
1841  "LocalIndices.");
1842 #endif // HAVE_TPETRA_DEBUG
1843  size_t numNewInds = 0;
1844  if (lg == GlobalIndices) { // input indices are global
1845  ArrayView<const GlobalOrdinal> new_ginds = newInds.ginds;
1846  numNewInds = new_ginds.size();
1847  if (I == GlobalIndices) { // store global indices
1848  ArrayView<GlobalOrdinal> gind_view = getGlobalViewNonConst(rowinfo);
1849  std::copy(new_ginds.begin(), new_ginds.end(), gind_view.begin()+rowinfo.numEntries);
1850  }
1851  else if (I == LocalIndices) { // store local indices
1852  ArrayView<LocalOrdinal> lind_view = getLocalViewNonConst(rowinfo);
1853  typename ArrayView<const GlobalOrdinal>::iterator in = new_ginds.begin();
1854  const typename ArrayView<const GlobalOrdinal>::iterator stop = new_ginds.end();
1855  typename ArrayView<LocalOrdinal>::iterator out = lind_view.begin()+rowinfo.numEntries;
1856  while (in != stop) {
1857  *out++ = colMap_->getLocalElement (*in++);
1858  }
1859  }
1860  }
1861  else if (lg == LocalIndices) { // input indices are local
1862  ArrayView<const LocalOrdinal> new_linds = newInds.linds;
1863  numNewInds = new_linds.size();
1864  if (I == LocalIndices) { // store local indices
1865  ArrayView<LocalOrdinal> lind_view = getLocalViewNonConst(rowinfo);
1866  std::copy(new_linds.begin(), new_linds.end(), lind_view.begin()+rowinfo.numEntries);
1867  }
1868  else if (I == GlobalIndices) {
1869  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "Tpetra::CrsGraph::"
1870  "insertIndices: the case where the input indices are local and the "
1871  "indices to write are global (lg=LocalIndices, I=GlobalIndices) is "
1872  "not implemented, because it does not make sense." << std::endl <<
1873  "If you have correct local column indices, that means the graph has "
1874  "a column Map. In that case, you should be storing local indices.");
1875  }
1876  }
1877 
1878  k_numRowEntries_(rowinfo.localRow) += numNewInds;
1879  nodeNumEntries_ += numNewInds;
1880  setLocallyModified ();
1881  return numNewInds;
1882  }
1883 
1884 
1885  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1886  void
1888  insertGlobalIndicesImpl (const LocalOrdinal myRow,
1889  const Teuchos::ArrayView<const GlobalOrdinal>& indices)
1890  {
1891  using Kokkos::subview;
1892  typedef Kokkos::pair<size_t, size_t> range_type;
1893  const char tfecfFuncName[] = "insertGlobalIndicesImpl: ";
1894 
1895  RowInfo rowInfo = getRowInfo(myRow);
1896  size_t numNewInds = indices.size();
1897  size_t newNumEntries = rowInfo.numEntries + numNewInds;
1898 
1899  if (newNumEntries > rowInfo.allocSize) {
1900  if (getProfileType () == StaticProfile) {
1901 
1902  // Count how many new indices are just duplicates of the old
1903  // ones. If enough are duplicates, then we're safe.
1904  //
1905  // TODO (09 Sep 2016) CrsGraph never supported this use case
1906  // before. Thus, we're justified in not optimizing it. We
1907  // could use binary search if the graph's current entries are
1908  // sorted, for example, but we just use linear search for now.
1909 
1910  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1911  (rowInfo.numEntries > rowInfo.allocSize, std::logic_error,
1912  "For local row " << myRow << ", rowInfo.numEntries = "
1913  << rowInfo.numEntries << " > rowInfo.allocSize = "
1914  << rowInfo.allocSize
1915  << ". Please report this bug to the Tpetra developers.");
1916 
1917  size_t dupCount = 0;
1918  if (k_gblInds1D_.dimension_0 () != 0) {
1919  const size_t curOffset = rowInfo.offset1D;
1920  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1921  (static_cast<size_t> (k_gblInds1D_.dimension_0 ()) < curOffset,
1922  std::logic_error, "k_gblInds1D_.dimension_0() = " <<
1923  k_gblInds1D_.dimension_0 () << " < offset1D = " << curOffset
1924  << ". Please report this bug to the Tpetra developers.");
1925  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1926  (static_cast<size_t> (k_gblInds1D_.dimension_0 ()) <
1927  curOffset + rowInfo.numEntries,
1928  std::logic_error, "k_gblInds1D_.dimension_0() = " <<
1929  k_gblInds1D_.dimension_0 () << " < offset1D (= " << curOffset <<
1930  ") + rowInfo.numEntries (= " << rowInfo.numEntries << "). Please "
1931  "report this bug to the Tpetra developers.");
1932  const Kokkos::pair<size_t, size_t>
1933  range (curOffset, curOffset + rowInfo.numEntries);
1934 
1935  auto gblIndsCur = subview (k_gblInds1D_, range);
1936  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1937  (static_cast<size_t> (gblIndsCur.dimension_0 ()) != rowInfo.numEntries,
1938  std::logic_error, "gblIndsCur.dimension_0() = " <<
1939  gblIndsCur.dimension_0 () << " != rowInfo.numEntries = " <<
1940  rowInfo.numEntries << ". Please report this bug to the Tpetra "
1941  "developers.");
1942 
1943  const size_t numInput = static_cast<size_t> (indices.size ());
1944  for (size_t k_new = 0; k_new < numInput; ++k_new) {
1945  const GlobalOrdinal gblIndToInsert = indices[k_new];
1946  for (size_t k_old = 0; k_old < rowInfo.numEntries; ++k_old) {
1947  if (gblIndsCur[k_old] == gblIndToInsert) {
1948  // Input could itself have duplicates. Input is
1949  // const, so we can't remove duplicates. That's OK
1950  // here, though, because dupCount just refers to the
1951  // number of input entries that actually need to be
1952  // inserted.
1953  ++dupCount;
1954  }
1955  }
1956  }
1957  }
1958  else {
1959  Teuchos::ArrayView<GlobalOrdinal> gblInds = (gblInds2D_[myRow]) ();
1960  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1961  (rowInfo.allocSize != static_cast<size_t> (gblInds.size ()),
1962  std::logic_error, "rowInfo.allocSize = " << rowInfo.allocSize
1963  << " != gblInds.size() = " << gblInds.size ()
1964  << ". Please report this bug to the Tpetra developers.");
1965  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1966  (rowInfo.numEntries > static_cast<size_t> (gblInds.size ()),
1967  std::logic_error, "rowInfo.numEntries = " << rowInfo.numEntries
1968  << " > gblInds.size() = " << gblInds.size ()
1969  << ". Please report this bug to the Tpetra developers.");
1970  auto gblIndsCur = gblInds (0, rowInfo.numEntries);
1971 
1972  const size_t numInput = static_cast<size_t> (indices.size ());
1973  for (size_t k_new = 0; k_new < numInput; ++k_new) {
1974  const GlobalOrdinal gblIndToInsert = indices[k_new];
1975  for (size_t k_old = 0; k_old < rowInfo.numEntries; ++k_old) {
1976  if (gblIndsCur[k_old] == gblIndToInsert) {
1977  // Input could itself have duplicates. Input is
1978  // const, so we can't remove duplicates. That's OK
1979  // here, though, because dupCount just refers to the
1980  // number of input entries that actually need to be
1981  // inserted.
1982  ++dupCount;
1983  }
1984  }
1985  }
1986  }
1987 
1988  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1989  (static_cast<size_t> (indices.size ()) < dupCount, std::logic_error,
1990  "indices.size() = " << indices.size () << " < dupCount = " <<
1991  dupCount << ". Please report this bug to the Tpetra developers.");
1992  const size_t numNewToInsert =
1993  static_cast<size_t> (indices.size ()) - dupCount;
1994 
1995  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1996  (rowInfo.numEntries + numNewToInsert > rowInfo.allocSize,
1997  std::runtime_error, "For local row " << myRow << " on Process " <<
1998  this->getComm ()->getRank () << ", even after excluding " << dupCount
1999  << " duplicate(s) in input, the new number of entries " <<
2000  (rowInfo.numEntries + numNewToInsert) << " still exceeds this row's "
2001  "static allocation size " << rowInfo.allocSize << ". You must "
2002  "either fix the upper bound on the number of entries in this row, "
2003  "or switch from StaticProfile to DynamicProfile.");
2004 
2005  if (k_gblInds1D_.dimension_0 () != 0) {
2006  const size_t curOffset = rowInfo.offset1D;
2007  auto gblIndsCur =
2008  subview (k_gblInds1D_, range_type (curOffset,
2009  curOffset + rowInfo.numEntries));
2010  auto gblIndsNew =
2011  subview (k_gblInds1D_, range_type (curOffset + rowInfo.numEntries,
2012  curOffset + rowInfo.allocSize));
2013 
2014  size_t curPos = 0;
2015  for (size_t k_new = 0; k_new < numNewInds; ++k_new) {
2016  const GlobalOrdinal gblIndToInsert = indices[k_new];
2017 
2018  bool isAlreadyInOld = false;
2019  for (size_t k_old = 0; k_old < rowInfo.numEntries; ++k_old) {
2020  if (gblIndsCur[k_old] == gblIndToInsert) {
2021  isAlreadyInOld = true;
2022  break;
2023  }
2024  }
2025  if (! isAlreadyInOld) {
2026  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2027  (curPos >= numNewToInsert, std::logic_error, "curPos = " <<
2028  curPos << " >= numNewToInsert = " << numNewToInsert << ". "
2029  "Please report this bug to the Tpetra developers.");
2030  gblIndsNew[curPos] = gblIndToInsert;
2031  ++curPos;
2032  }
2033  } // for each input column index
2034  }
2035  else {
2036  Teuchos::ArrayView<GlobalOrdinal> gblInds = (gblInds2D_[myRow]) ();
2037  // Teuchos::ArrayView::operator() takes (offset, size).
2038  auto gblIndsCur = gblInds (0, rowInfo.numEntries);
2039  auto gblIndsNew = gblInds (rowInfo.numEntries,
2040  rowInfo.allocSize - rowInfo.numEntries);
2041 
2042  size_t curPos = 0;
2043  for (size_t k_new = 0; k_new < numNewInds; ++k_new) {
2044  const GlobalOrdinal gblIndToInsert = indices[k_new];
2045 
2046  bool isAlreadyInOld = false;
2047  for (size_t k_old = 0; k_old < rowInfo.numEntries; ++k_old) {
2048  if (gblIndsCur[k_old] == gblIndToInsert) {
2049  isAlreadyInOld = true;
2050  break;
2051  }
2052  }
2053  if (! isAlreadyInOld) {
2054  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2055  (curPos >= numNewToInsert, std::logic_error, "curPos = " <<
2056  curPos << " >= numNewToInsert = " << numNewToInsert << ". "
2057  "Please report this bug to the Tpetra developers.");
2058  gblIndsNew[curPos] = gblIndToInsert;
2059  ++curPos;
2060  }
2061  } // for each input column index
2062  }
2063 
2064  k_numRowEntries_(myRow) = rowInfo.numEntries + numNewToInsert;
2065  nodeNumEntries_ += numNewToInsert;
2066  setLocallyModified ();
2067 
2068 #ifdef HAVE_TPETRA_DEBUG
2069  newNumEntries = rowInfo.numEntries + numNewToInsert;
2070  const size_t chkNewNumEntries = getNumEntriesInLocalRow (myRow);
2071  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2072  (chkNewNumEntries != newNumEntries, std::logic_error,
2073  "After inserting new entries, getNumEntriesInLocalRow(" << myRow <<
2074  ") = " << chkNewNumEntries << " != newNumEntries = " << newNumEntries
2075  << ". Please report this bug to the Tpetra developers.");
2076 #endif // HAVE_TPETRA_DEBUG
2077 
2078  return; // all done!
2079  }
2080  else {
2081  // update allocation, doubling size to reduce # reallocations
2082  size_t newAllocSize = 2*rowInfo.allocSize;
2083  if (newAllocSize < newNumEntries) {
2084  newAllocSize = newNumEntries;
2085  }
2086  gblInds2D_[myRow].resize(newAllocSize);
2087  }
2088  }
2089 
2090  // Copy new indices at end of global index array
2091  if (k_gblInds1D_.dimension_0 () != 0) {
2092  const size_t numIndsToCopy = static_cast<size_t> (indices.size ());
2093  const size_t offset = rowInfo.offset1D + rowInfo.numEntries;
2094  for (size_t k = 0; k < numIndsToCopy; ++k) {
2095  k_gblInds1D_[offset + k] = indices[k];
2096  }
2097  }
2098  else {
2099  std::copy(indices.begin(), indices.end(),
2100  gblInds2D_[myRow].begin()+rowInfo.numEntries);
2101  }
2102 
2103  k_numRowEntries_(myRow) += numNewInds;
2104  nodeNumEntries_ += numNewInds;
2105  setLocallyModified ();
2106 
2107 #ifdef HAVE_TPETRA_DEBUG
2108  {
2109  const size_t chkNewNumEntries = getNumEntriesInLocalRow (myRow);
2110  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2111  chkNewNumEntries != newNumEntries, std::logic_error,
2112  ": Internal logic error. Please contact Tpetra team.");
2113  }
2114 #endif
2115  }
2116 
2117 
2118  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2119  void
2121  insertLocalIndicesImpl (const LocalOrdinal myRow,
2122  const Teuchos::ArrayView<const LocalOrdinal>& indices)
2123  {
2124  using Kokkos::MemoryUnmanaged;
2125  using Kokkos::subview;
2126  using Kokkos::View;
2127  typedef LocalOrdinal LO;
2128  const char* tfecfFuncName ("insertLocallIndicesImpl");
2129 
2130  const RowInfo rowInfo = this->getRowInfo(myRow);
2131  const size_t numNewInds = indices.size();
2132  const size_t newNumEntries = rowInfo.numEntries + numNewInds;
2133  if (newNumEntries > rowInfo.allocSize) {
2134  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2135  getProfileType() == StaticProfile, std::runtime_error,
2136  ": new indices exceed statically allocated graph structure.");
2137 
2138  // update allocation, doubling size to reduce number of reallocations
2139  size_t newAllocSize = 2*rowInfo.allocSize;
2140  if (newAllocSize < newNumEntries)
2141  newAllocSize = newNumEntries;
2142  lclInds2D_[myRow].resize(newAllocSize);
2143  }
2144 
2145  // Store the new indices at the end of row myRow.
2146  if (k_lclInds1D_.dimension_0 () != 0) {
2147  typedef View<const LO*, execution_space, MemoryUnmanaged> input_view_type;
2148  typedef View<LO*, execution_space, MemoryUnmanaged> row_view_type;
2149 
2150  input_view_type inputInds (indices.getRawPtr (), indices.size ());
2151  const size_t start = rowInfo.offset1D + rowInfo.numEntries; // end of row
2152  const std::pair<size_t, size_t> rng (start, start + newNumEntries);
2153  // mfh 23 Nov 2015: Don't just create a subview of k_lclInds1D_
2154  // directly, because that first creates a _managed_ subview,
2155  // then returns an unmanaged version of that. That touches the
2156  // reference count, which costs performance in a measurable way.
2157  row_view_type myInds = subview (row_view_type (k_lclInds1D_), rng);
2158  Kokkos::deep_copy (myInds, inputInds);
2159  }
2160  else {
2161  std::copy (indices.begin (), indices.end (),
2162  lclInds2D_[myRow].begin () + rowInfo.numEntries);
2163  }
2164 
2165  k_numRowEntries_(myRow) += numNewInds;
2166  nodeNumEntries_ += numNewInds;
2167  setLocallyModified ();
2168 #ifdef HAVE_TPETRA_DEBUG
2169  {
2170  const size_t chkNewNumEntries = getNumEntriesInLocalRow (myRow);
2171  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2172  chkNewNumEntries != newNumEntries, std::logic_error,
2173  ": Internal logic error. Please contact Tpetra team.");
2174  }
2175 #endif
2176  }
2177 
2178 
2179  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2180  void
2182  sortRowIndices (const RowInfo& rowInfo)
2183  {
2184  if (rowInfo.numEntries > 0) {
2185  auto lclColInds = this->getLocalKokkosRowViewNonConst (rowInfo);
2186  // FIXME (mfh 08 May 2017) This assumes CUDA UVM.
2187  LocalOrdinal* const lclColIndsRaw = lclColInds.ptr_on_device ();
2188  std::sort (lclColIndsRaw, lclColIndsRaw + rowInfo.numEntries);
2189  }
2190  }
2191 
2192 
2193  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2194  size_t
2196  mergeRowIndices (const RowInfo& rowInfo)
2197  {
2198  auto lclColInds = this->getLocalKokkosRowViewNonConst (rowInfo);
2199 
2200  // FIXME (mfh 08 May 2017) This may assume CUDA UVM.
2201  LocalOrdinal* const beg = lclColInds.ptr_on_device ();
2202  LocalOrdinal* const end = beg + rowInfo.numEntries;
2203  LocalOrdinal* const newend = std::unique (beg, end);
2204  const size_t mergedEntries = newend - beg;
2205 
2206  // NOTE (mfh 08 May 2017) This is a host View, so it does not assume UVM.
2207  this->k_numRowEntries_(rowInfo.localRow) = mergedEntries;
2208  return rowInfo.numEntries - mergedEntries;
2209  }
2210 
2211 
2212  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2213  void
2215  setDomainRangeMaps (const Teuchos::RCP<const map_type>& domainMap,
2216  const Teuchos::RCP<const map_type>& rangeMap)
2217  {
2218  // simple pointer comparison for equality
2219  if (domainMap_ != domainMap) {
2220  domainMap_ = domainMap;
2221  importer_ = Teuchos::null;
2222  }
2223  if (rangeMap_ != rangeMap) {
2224  rangeMap_ = rangeMap;
2225  exporter_ = Teuchos::null;
2226  }
2227  }
2228 
2229 
2230  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2231  void
2234  {
2235  globalNumEntries_ = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2236  globalNumDiags_ = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2237  globalMaxNumRowEntries_ = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2238  haveGlobalConstants_ = false;
2239  }
2240 
2241 
2242  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2243  void
2246  {
2247 #ifdef HAVE_TPETRA_DEBUG
2248  const char tfecfFuncName[] = "checkInternalState: ";
2249  const char suffix[] = " Please report this bug to the Tpetra developers.";
2250 
2251  const global_size_t GSTI = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2252  //const size_t STI = Teuchos::OrdinalTraits<size_t>::invalid (); // unused
2253  // check the internal state of this data structure
2254  // this is called by numerous state-changing methods, in a debug build, to ensure that the object
2255  // always remains in a valid state
2256 
2257  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2258  (this->rowMap_.is_null (), std::logic_error,
2259  "Row Map is null." << suffix);
2260  // This may access the row Map, so we need to check first (above)
2261  // whether the row Map is null.
2262  const LocalOrdinal lclNumRows =
2263  static_cast<LocalOrdinal> (this->getNodeNumRows ());
2264 
2265  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2266  (this->isFillActive () == this->isFillComplete (), std::logic_error,
2267  "Graph cannot be both fill active and fill complete." << suffix);
2268  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2269  (this->isFillComplete () &&
2270  (this->colMap_.is_null () ||
2271  this->rangeMap_.is_null () ||
2272  this->domainMap_.is_null ()),
2273  std::logic_error,
2274  "Graph is full complete, but at least one of {column, range, domain} "
2275  "Map is null." << suffix);
2276  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2277  (this->isStorageOptimized () && ! this->indicesAreAllocated (),
2278  std::logic_error, "Storage is optimized, but indices are not "
2279  "allocated, not even trivially." << suffix);
2280  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2281  (this->indicesAreAllocated_ &&
2282  (this->storageStatus_ == Details::STORAGE_1D_PACKED ||
2283  this->storageStatus_ == Details::STORAGE_1D_UNPACKED) &&
2284  this->pftype_ == DynamicProfile, std::logic_error,
2285  "Graph claims to have allocated indices and 1-D storage "
2286  "(either packed or unpacked), but also claims to be DynamicProfile.");
2287  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2288  (this->indicesAreAllocated_ &&
2289  this->storageStatus_ == Details::STORAGE_2D &&
2290  this->pftype_ == StaticProfile, std::logic_error,
2291  "Graph claims to have allocated indices and 2-D storage, "
2292  "but also claims to be StaticProfile.");
2293  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2294  (this->indicesAreAllocated_ &&
2295  this->storageStatus_ == Details::STORAGE_2D &&
2296  this->isLocallyIndexed () &&
2297  static_cast<LocalOrdinal> (this->lclInds2D_.size ()) != lclNumRows,
2298  std::logic_error,
2299  "Graph claims to have allocated indices, be locally indexed, and have "
2300  "2-D storage, but lclInds2D_.size() = " << this->lclInds2D_.size ()
2301  << " != getNodeNumRows() = " << lclNumRows << ".");
2302  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2303  (this->indicesAreAllocated_ &&
2304  this->storageStatus_ == Details::STORAGE_2D &&
2305  this->isGloballyIndexed () &&
2306  static_cast<LocalOrdinal> (this->gblInds2D_.size ()) != lclNumRows,
2307  std::logic_error,
2308  "Graph claims to have allocated indices, be globally indexed, and have "
2309  "2-D storage, but gblInds2D_.size() = " << this->gblInds2D_.size ()
2310  << " != getNodeNumRows() = " << lclNumRows << ".");
2311 
2312  size_t nodeAllocSize = 0;
2313  try {
2314  nodeAllocSize = this->getNodeAllocationSize ();
2315  }
2316  catch (std::logic_error& e) {
2317  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2318  (true, std::runtime_error, "getNodeAllocationSize threw "
2319  "std::logic_error: " << e.what ());
2320  }
2321  catch (std::exception& e) {
2322  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2323  (true, std::runtime_error, "getNodeAllocationSize threw an "
2324  "std::exception: " << e.what ());
2325  }
2326  catch (...) {
2327  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2328  (true, std::runtime_error, "getNodeAllocationSize threw an exception "
2329  "not a subclass of std::exception.");
2330  }
2331 
2332  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2333  (this->isStorageOptimized () &&
2334  nodeAllocSize != this->getNodeNumEntries (),
2335  std::logic_error, "Storage is optimized, but "
2336  "this->getNodeAllocationSize() = " << nodeAllocSize
2337  << " != this->getNodeNumEntries() = " << this->getNodeNumEntries ()
2338  << "." << suffix);
2339  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2340  (! this->haveGlobalConstants_ &&
2341  (this->globalNumEntries_ != GSTI ||
2342  this->globalNumDiags_ != GSTI ||
2343  this->globalMaxNumRowEntries_ != GSTI),
2344  std::logic_error, "Graph claims not to have global constants, but "
2345  "some of the global constants are not marked as invalid." << suffix);
2346  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2347  (this->haveGlobalConstants_ &&
2348  (this->globalNumEntries_ == GSTI ||
2349  this->globalNumDiags_ == GSTI ||
2350  this->globalMaxNumRowEntries_ == GSTI),
2351  std::logic_error, "Graph claims to have global constants, but "
2352  "some of them are marked as invalid." << suffix);
2353  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2354  (this->haveGlobalConstants_ &&
2355  (this->globalNumEntries_ < this->nodeNumEntries_ ||
2356  this->globalNumDiags_ < this->nodeNumDiags_ ||
2357  this->globalMaxNumRowEntries_ < this->nodeMaxNumRowEntries_),
2358  std::logic_error, "Graph claims to have global constants, and "
2359  "all of the values of the global constants are valid, but "
2360  "some of the local constants are greater than "
2361  "their corresponding global constants." << suffix);
2362  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2363  (this->indicesAreAllocated () &&
2364  (this->numAllocForAllRows_ != 0 ||
2365  this->k_numAllocPerRow_.dimension_0 () != 0),
2366  std::logic_error, "The graph claims that its indices are allocated, but "
2367  "either numAllocForAllRows_ (= " << this->numAllocForAllRows_ << ") is "
2368  "nonzero, or k_numAllocPerRow_ has nonzero dimension. In other words, "
2369  "the graph is supposed to release its \"allocation specifications\" "
2370  "when it allocates its indices." << suffix);
2371  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2372  (this->isStorageOptimized () && this->pftype_ != StaticProfile,
2373  std::logic_error,
2374  "Storage is optimized, but graph is not StaticProfile." << suffix);
2375  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2376  (this->isGloballyIndexed () &&
2377  this->k_rowPtrs_.dimension_0 () != 0 &&
2378  (static_cast<size_t> (this->k_rowPtrs_.dimension_0 ()) != static_cast<size_t> (lclNumRows + 1) ||
2379  this->k_rowPtrs_(lclNumRows) != static_cast<size_t> (this->k_gblInds1D_.dimension_0 ())),
2380  std::logic_error, "If k_rowPtrs_ has nonzero size and "
2381  "the graph is globally indexed, then "
2382  "k_rowPtrs_ must have N+1 rows, and "
2383  "k_rowPtrs_(N) must equal k_gblInds1D_.dimension_0()." << suffix);
2384  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2385  (this->isLocallyIndexed () &&
2386  this->k_rowPtrs_.dimension_0 () != 0 &&
2387  (static_cast<size_t> (k_rowPtrs_.dimension_0 ()) != static_cast<size_t> (lclNumRows + 1) ||
2388  this->k_rowPtrs_(lclNumRows) != static_cast<size_t> (this->k_lclInds1D_.dimension_0 ())),
2389  std::logic_error, "If k_rowPtrs_ has nonzero size and "
2390  "the graph is locally indexed, then "
2391  "k_rowPtrs_ must have N+1 rows, and "
2392  "k_rowPtrs_(N) must equal k_lclInds1D_.dimension_0()." << suffix);
2393 
2394  if (this->pftype_ == DynamicProfile) {
2395  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2396  (this->indicesAreAllocated () &&
2397  this->getNodeNumRows () > 0 &&
2398  this->lclInds2D_.is_null () &&
2399  this->gblInds2D_.is_null (),
2400  std::logic_error, "Graph has DynamicProfile, indices are allocated, and "
2401  "the calling process has nonzero rows, but 2-D column index storage "
2402  "(whether local or global) is not present." << suffix);
2403  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2404  (this->indicesAreAllocated () &&
2405  this->getNodeNumRows () > 0 &&
2406  this->k_numRowEntries_.dimension_0 () == 0,
2407  std::logic_error, "Graph has DynamicProfile, indices are allocated, and "
2408  "the calling process has nonzero rows, but k_numRowEntries_ is not "
2409  "present." << suffix);
2410  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2411  (this->k_lclInds1D_.dimension_0 () != 0 ||
2412  this->k_gblInds1D_.dimension_0 () != 0,
2413  std::logic_error, "Graph has DynamicProfile, but "
2414  "1-D allocations are present." << suffix);
2415  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2416  (this->k_rowPtrs_.dimension_0 () != 0,
2417  std::logic_error, "Graph has DynamicProfile, but "
2418  "row offsets are present." << suffix);
2419  }
2420  else if (this->pftype_ == StaticProfile) {
2421  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2422  (this->indicesAreAllocated () &&
2423  nodeAllocSize > 0 &&
2424  this->k_lclInds1D_.dimension_0 () == 0 &&
2425  this->k_gblInds1D_.dimension_0 () == 0,
2426  std::logic_error, "Graph has StaticProfile and is allocated "
2427  "nonnontrivally, but 1-D allocations are not present." << suffix);
2428  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2429  (this->lclInds2D_ != Teuchos::null || this->gblInds2D_ != Teuchos::null,
2430  std::logic_error, "Graph has StaticProfile, but 2-D allocations are "
2431  "present." << suffix);
2432  }
2433 
2434  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2435  (! this->indicesAreAllocated () &&
2436  ((this->k_rowPtrs_.dimension_0 () != 0 ||
2437  this->k_numRowEntries_.dimension_0 () != 0) ||
2438  this->k_lclInds1D_.dimension_0 () != 0 ||
2439  this->lclInds2D_ != Teuchos::null ||
2440  this->k_gblInds1D_.dimension_0 () != 0 ||
2441  this->gblInds2D_ != Teuchos::null),
2442  std::logic_error, "If indices are not allocated, "
2443  "then none of the buffers should be." << suffix);
2444  // indices may be local or global only if they are allocated
2445  // (numAllocated is redundant; could simply be indicesAreLocal_ ||
2446  // indicesAreGlobal_)
2447  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2448  ((this->indicesAreLocal_ || this->indicesAreGlobal_) &&
2449  ! this->indicesAreAllocated_,
2450  std::logic_error, "Indices may be local or global only if they are "
2451  "allocated." << suffix);
2452  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2453  (this->indicesAreLocal_ && this->indicesAreGlobal_,
2454  std::logic_error, "Indices may not be both local and global." << suffix);
2455  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2456  (this->indicesAreLocal_ &&
2457  (this->k_gblInds1D_.dimension_0 () != 0 || ! this->gblInds2D_.is_null ()),
2458  std::logic_error, "Indices are local, but either "
2459  "k_gblInds1D_.dimension_0() (= "
2460  << this->k_gblInds1D_.dimension_0 () << ") != 0, or "
2461  "gblInds2D_ is not null. In other words, if indices are local, "
2462  "then global allocations should not be present." << suffix);
2463  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2464  (this->indicesAreGlobal_ &&
2465  (this->k_lclInds1D_.dimension_0 () != 0 ||
2466  ! this->lclInds2D_.is_null ()),
2467  std::logic_error, "Indices are global, but either "
2468  "k_lclInds1D_.dimension_0() (= "
2469  << this->k_lclInds1D_.dimension_0 () << ") != 0, or "
2470  "lclInds2D_ is not null. In other words, if indices are global, "
2471  "then local allocations should not be present." << suffix);
2472  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2473  (this->indicesAreLocal_ &&
2474  nodeAllocSize > 0 &&
2475  this->k_lclInds1D_.dimension_0 () == 0 &&
2476  this->getNodeNumRows () > 0 &&
2477  this->lclInds2D_.is_null (),
2478  std::logic_error, "Indices are local, getNodeAllocationSize() = "
2479  << nodeAllocSize << " > 0, k_lclInds1D_.dimension_0() = 0, "
2480  "getNodeNumRows() = " << this->getNodeNumRows () << " > 0, and "
2481  "lclInds2D_ is null." << suffix);
2482  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2483  (this->indicesAreGlobal_ &&
2484  nodeAllocSize > 0 &&
2485  this->k_gblInds1D_.dimension_0 () == 0 &&
2486  this->getNodeNumRows () > 0 &&
2487  this->gblInds2D_.is_null (),
2488  std::logic_error, "Indices are global, getNodeAllocationSize() = "
2489  << nodeAllocSize << " > 0, k_gblInds1D_.dimension_0() = 0, "
2490  "getNodeNumRows() = " << this->getNodeNumRows () << " > 0, and "
2491  "gblInds2D_ is null." << suffix);
2492  // check the actual allocations
2493  if (this->indicesAreAllocated () &&
2494  this->pftype_ == StaticProfile &&
2495  this->k_rowPtrs_.dimension_0 () != 0) {
2496  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2497  (static_cast<size_t> (this->k_rowPtrs_.dimension_0 ()) !=
2498  this->getNodeNumRows () + 1,
2499  std::logic_error, "Graph is StaticProfile, indices are allocated, and "
2500  "k_rowPtrs_ has nonzero length, but k_rowPtrs_.dimension_0() = "
2501  << this->k_rowPtrs_.dimension_0 () << " != getNodeNumRows()+1 = "
2502  << (this->getNodeNumRows () + 1) << "." << suffix);
2503  const size_t actualNumAllocated =
2504  Details::getEntryOnHost (this->k_rowPtrs_, this->getNodeNumRows ());
2505  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2506  (this->isLocallyIndexed () &&
2507  static_cast<size_t> (this->k_lclInds1D_.dimension_0 ()) != actualNumAllocated,
2508  std::logic_error, "Graph is StaticProfile and locally indexed, "
2509  "indices are allocated, and k_rowPtrs_ has nonzero length, but "
2510  "k_lclInds1D_.dimension_0() = " << this->k_lclInds1D_.dimension_0 ()
2511  << " != actualNumAllocated = " << actualNumAllocated << suffix);
2512  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2513  (this->isGloballyIndexed () &&
2514  static_cast<size_t> (this->k_gblInds1D_.dimension_0 ()) != actualNumAllocated,
2515  std::logic_error, "Graph is StaticProfile and globally indexed, "
2516  "indices are allocated, and k_rowPtrs_ has nonzero length, but "
2517  "k_gblInds1D_.dimension_0() = " << this->k_gblInds1D_.dimension_0 ()
2518  << " != actualNumAllocated = " << actualNumAllocated << suffix);
2519  }
2520 #endif // HAVE_TPETRA_DEBUG
2521  }
2522 
2523 
2524  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2525  size_t
2527  getNumEntriesInGlobalRow (GlobalOrdinal globalRow) const
2528  {
2529  using Teuchos::OrdinalTraits;
2530  const LocalOrdinal lrow = rowMap_->getLocalElement (globalRow);
2531  if (hasRowInfo () && lrow != OrdinalTraits<LocalOrdinal>::invalid ()) {
2532  const RowInfo rowinfo = this->getRowInfo (lrow);
2533  return rowinfo.numEntries;
2534  } else {
2535  return OrdinalTraits<size_t>::invalid ();
2536  }
2537  }
2538 
2539 
2540  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2541  size_t
2543  getNumEntriesInLocalRow (LocalOrdinal localRow) const
2544  {
2545  if (hasRowInfo () && rowMap_->isNodeLocalElement (localRow)) {
2546  const RowInfo rowinfo = this->getRowInfo (localRow);
2547  return rowinfo.numEntries;
2548  } else {
2549  return Teuchos::OrdinalTraits<size_t>::invalid ();
2550  }
2551  }
2552 
2553 
2554  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2555  size_t
2557  getNumAllocatedEntriesInGlobalRow (GlobalOrdinal globalRow) const
2558  {
2559  const LocalOrdinal lrow = rowMap_->getLocalElement (globalRow);
2560  if (hasRowInfo () && lrow != Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) {
2561  const RowInfo rowinfo = this->getRowInfo (lrow);
2562  return rowinfo.allocSize;
2563  } else {
2564  return Teuchos::OrdinalTraits<size_t>::invalid ();
2565  }
2566  }
2567 
2568 
2569  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2570  size_t
2572  getNumAllocatedEntriesInLocalRow (LocalOrdinal localRow) const
2573  {
2574  if (hasRowInfo () && rowMap_->isNodeLocalElement (localRow)) {
2575  const RowInfo rowinfo = this->getRowInfo (localRow);
2576  return rowinfo.allocSize;
2577  } else {
2578  return Teuchos::OrdinalTraits<size_t>::invalid();
2579  }
2580  }
2581 
2582 
2583  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2584  Teuchos::ArrayRCP<const size_t>
2587  {
2588  using Kokkos::ViewAllocateWithoutInitializing;
2589  using Kokkos::create_mirror_view;
2590  using Teuchos::ArrayRCP;
2591  typedef typename local_graph_type::row_map_type row_map_type;
2592  typedef typename row_map_type::non_const_value_type row_offset_type;
2593 #ifdef HAVE_TPETRA_DEBUG
2594  const char prefix[] = "Tpetra::CrsGraph::getNodeRowPtrs: ";
2595  const char suffix[] = " Please report this bug to the Tpetra developers.";
2596 #endif // HAVE_TPETRA_DEBUG
2597  const size_t size = k_rowPtrs_.dimension_0 ();
2598  const bool same = Kokkos::Impl::is_same<size_t, row_offset_type>::value;
2599 
2600  if (size == 0) {
2601  return ArrayRCP<const size_t> ();
2602  }
2603 
2604  ArrayRCP<const row_offset_type> ptr_rot;
2605  ArrayRCP<const size_t> ptr_st;
2606  if (same) { // size_t == row_offset_type
2607  // NOTE (mfh 22 Mar 2015) In a debug build of Kokkos, the result
2608  // of create_mirror_view might actually be a new allocation.
2609  // This helps with debugging when there are two memory spaces.
2610  typename row_map_type::HostMirror ptr_h = create_mirror_view (k_rowPtrs_);
2611  Kokkos::deep_copy (ptr_h, k_rowPtrs_);
2612 #ifdef HAVE_TPETRA_DEBUG
2613  TEUCHOS_TEST_FOR_EXCEPTION(
2614  ptr_h.dimension_0 () != k_rowPtrs_.dimension_0 (), std::logic_error,
2615  prefix << "size_t == row_offset_type, but ptr_h.dimension_0() = "
2616  << ptr_h.dimension_0 () << " != k_rowPtrs_.dimension_0() = "
2617  << k_rowPtrs_.dimension_0 () << ".");
2618  TEUCHOS_TEST_FOR_EXCEPTION(
2619  same && size != 0 && k_rowPtrs_.ptr_on_device () == NULL, std::logic_error,
2620  prefix << "size_t == row_offset_type and k_rowPtrs_.dimension_0() = "
2621  << size << " != 0, but k_rowPtrs_.ptr_on_device() == NULL." << suffix);
2622  TEUCHOS_TEST_FOR_EXCEPTION(
2623  same && size != 0 && ptr_h.ptr_on_device () == NULL, std::logic_error,
2624  prefix << "size_t == row_offset_type and k_rowPtrs_.dimension_0() = "
2625  << size << " != 0, but create_mirror_view(k_rowPtrs_).ptr_on_device() "
2626  "== NULL." << suffix);
2627 #endif // HAVE_TPETRA_DEBUG
2628  ptr_rot = Kokkos::Compat::persistingView (ptr_h);
2629  }
2630  else { // size_t != row_offset_type
2631  typedef Kokkos::View<size_t*, device_type> ret_view_type;
2632  ret_view_type ptr_d (ViewAllocateWithoutInitializing ("ptr"), size);
2633  ::Tpetra::Details::copyOffsets (ptr_d, k_rowPtrs_);
2634  typename ret_view_type::HostMirror ptr_h = create_mirror_view (ptr_d);
2635  Kokkos::deep_copy (ptr_h, ptr_d);
2636  ptr_st = Kokkos::Compat::persistingView (ptr_h);
2637  }
2638 #ifdef HAVE_TPETRA_DEBUG
2639  TEUCHOS_TEST_FOR_EXCEPTION(
2640  same && size != 0 && ptr_rot.is_null (), std::logic_error,
2641  prefix << "size_t == row_offset_type and size = " << size
2642  << " != 0, but ptr_rot is null." << suffix);
2643  TEUCHOS_TEST_FOR_EXCEPTION(
2644  ! same && size != 0 && ptr_st.is_null (), std::logic_error,
2645  prefix << "size_t != row_offset_type and size = " << size
2646  << " != 0, but ptr_st is null." << suffix);
2647 #endif // HAVE_TPETRA_DEBUG
2648 
2649  // If size_t == row_offset_type, return a persisting host view of
2650  // k_rowPtrs_. Otherwise, return a size_t host copy of k_rowPtrs_.
2651 #ifdef HAVE_TPETRA_DEBUG
2652  ArrayRCP<const size_t> retval =
2653  Kokkos::Impl::if_c<same,
2654  ArrayRCP<const row_offset_type>,
2655  ArrayRCP<const size_t> >::select (ptr_rot, ptr_st);
2656  TEUCHOS_TEST_FOR_EXCEPTION(
2657  size != 0 && retval.is_null (), std::logic_error,
2658  prefix << "size = " << size << " != 0, but retval is null." << suffix);
2659  return retval;
2660 #else
2661  return Kokkos::Impl::if_c<same,
2662  ArrayRCP<const row_offset_type>,
2663  ArrayRCP<const size_t> >::select (ptr_rot, ptr_st);
2664 #endif // HAVE_TPETRA_DEBUG
2665  }
2666 
2667 
2668  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2669  Teuchos::ArrayRCP<const LocalOrdinal>
2672  {
2673  return Kokkos::Compat::persistingView (k_lclInds1D_);
2674  }
2675 
2676 
2677  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2678  void
2680  getLocalRowCopy (LocalOrdinal localRow,
2681  const Teuchos::ArrayView<LocalOrdinal>&indices,
2682  size_t& numEntries) const
2683  {
2684  using Teuchos::ArrayView;
2685  typedef LocalOrdinal LO;
2686  typedef GlobalOrdinal GO;
2687  const char tfecfFuncName[] = "getLocalRowCopy: ";
2688 
2689  TEUCHOS_TEST_FOR_EXCEPTION(
2690  isGloballyIndexed () && ! hasColMap (), std::runtime_error,
2691  "Tpetra::CrsGraph::getLocalRowCopy: The graph is globally indexed and "
2692  "does not have a column Map yet. That means we don't have local indices "
2693  "for columns yet, so it doesn't make sense to call this method. If the "
2694  "graph doesn't have a column Map yet, you should call fillComplete on "
2695  "it first.");
2696 #ifdef HAVE_TPETRA_DEBUG
2697  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2698  ! hasRowInfo(), std::runtime_error,
2699  "Graph row information was deleted at fillComplete.");
2700 #endif // HAVE_TPETRA_DEBUG
2701 
2702  // This does the right thing (reports an empty row) if the input
2703  // row is invalid.
2704  const RowInfo rowinfo = getRowInfo (localRow);
2705  // No side effects on error.
2706  const size_t theNumEntries = rowinfo.numEntries;
2707  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2708  static_cast<size_t> (indices.size ()) < theNumEntries, std::runtime_error,
2709  "Specified storage (size==" << indices.size () << ") does not suffice "
2710  "to hold all " << theNumEntries << " entry/ies for this row.");
2711  numEntries = theNumEntries;
2712 
2713  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2714  if (isLocallyIndexed ()) {
2715  ArrayView<const LO> lview = getLocalView (rowinfo);
2716  for (size_t j = 0; j < theNumEntries; ++j) {
2717  indices[j] = lview[j];
2718  }
2719  }
2720  else if (isGloballyIndexed ()) {
2721  ArrayView<const GO> gview = getGlobalView (rowinfo);
2722  for (size_t j = 0; j < theNumEntries; ++j) {
2723  indices[j] = colMap_->getLocalElement (gview[j]);
2724  }
2725  }
2726  }
2727  }
2728 
2729 
2730  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2731  void
2733  getGlobalRowCopy (GlobalOrdinal globalRow,
2734  const Teuchos::ArrayView<GlobalOrdinal>& indices,
2735  size_t& numEntries) const
2736  {
2737  using Teuchos::ArrayView;
2738  const char tfecfFuncName[] = "getGlobalRowCopy: ";
2739 #ifdef HAVE_TPETRA_DEBUG
2740  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2741  ! hasRowInfo (), std::runtime_error,
2742  "Graph row information was deleted at fillComplete.");
2743 #endif // HAVE_TPETRA_DEBUG
2744 
2745  // This does the right thing (reports an empty row) if the input
2746  // row is invalid.
2747  const RowInfo rowinfo = getRowInfoFromGlobalRowIndex (globalRow);
2748  const size_t theNumEntries = rowinfo.numEntries;
2749  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2750  static_cast<size_t> (indices.size ()) < theNumEntries, std::runtime_error,
2751  "Specified storage (size==" << indices.size () << ") does not suffice "
2752  "to hold all " << theNumEntries << " entry/ies for this row.");
2753  numEntries = theNumEntries; // first side effect
2754 
2755  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2756  if (isLocallyIndexed ()) {
2757  ArrayView<const LocalOrdinal> lview = getLocalView (rowinfo);
2758  for (size_t j = 0; j < theNumEntries; ++j) {
2759  indices[j] = colMap_->getGlobalElement (lview[j]);
2760  }
2761  }
2762  else if (isGloballyIndexed ()) {
2763  ArrayView<const GlobalOrdinal> gview = getGlobalView (rowinfo);
2764  for (size_t j = 0; j < theNumEntries; ++j) {
2765  indices[j] = gview[j];
2766  }
2767  }
2768  }
2769  }
2770 
2771 
2772  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2773  void
2775  getLocalRowView (const LocalOrdinal localRow,
2776  Teuchos::ArrayView<const LocalOrdinal>& indices) const
2777  {
2778  const char tfecfFuncName[] = "getLocalRowView: ";
2779  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2780  isGloballyIndexed (), std::runtime_error, "The graph's indices are "
2781  "currently stored as global indices, so we cannot return a view with "
2782  "local column indices, whether or not the graph has a column Map. If "
2783  "the graph _does_ have a column Map, use getLocalRowCopy() instead.");
2784 #ifdef HAVE_TPETRA_DEBUG
2785  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2786  ! hasRowInfo (), std::runtime_error, "Graph row information was "
2787  "deleted at fillComplete().");
2788 #endif // HAVE_TPETRA_DEBUG
2789 
2790  // This does the right thing (reports an empty row) if the input
2791  // row is invalid.
2792  const RowInfo rowInfo = getRowInfo (localRow);
2793  indices = Teuchos::null;
2794  if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2795  rowInfo.numEntries > 0) {
2796  indices = this->getLocalView (rowInfo);
2797  // getLocalView returns a view of the _entire_ row, including
2798  // any extra space at the end (which 1-D unpacked storage
2799  // might have, for example). That's why we have to take a
2800  // subview of the returned view.
2801  indices = indices (0, rowInfo.numEntries);
2802  }
2803 
2804 #ifdef HAVE_TPETRA_DEBUG
2805  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2806  (static_cast<size_t> (indices.size ()) !=
2807  getNumEntriesInLocalRow (localRow), std::logic_error, "indices.size() "
2808  "= " << indices.size () << " != getNumEntriesInLocalRow(localRow=" <<
2809  localRow << ") = " << getNumEntriesInLocalRow (localRow) <<
2810  ". Please report this bug to the Tpetra developers.");
2811 #endif // HAVE_TPETRA_DEBUG
2812  }
2813 
2814 
2815  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2816  void
2818  getGlobalRowView (const GlobalOrdinal globalRow,
2819  Teuchos::ArrayView<const GlobalOrdinal>& indices) const
2820  {
2821  const char tfecfFuncName[] = "getGlobalRowView: ";
2822  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2823  isLocallyIndexed (), std::runtime_error, "The graph's indices are "
2824  "currently stored as local indices, so we cannot return a view with "
2825  "global column indices. Use getGlobalRowCopy() instead.");
2826 #ifdef HAVE_TPETRA_DEBUG
2827  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2828  ! hasRowInfo (), std::runtime_error,
2829  "Graph row information was deleted at fillComplete().");
2830 #endif // HAVE_TPETRA_DEBUG
2831 
2832  // This does the right thing (reports an empty row) if the input
2833  // row is invalid.
2834  const RowInfo rowInfo = getRowInfoFromGlobalRowIndex (globalRow);
2835  indices = Teuchos::null;
2836  if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2837  rowInfo.numEntries > 0) {
2838  indices = (this->getGlobalView (rowInfo)) (0, rowInfo.numEntries);
2839  }
2840 
2841 #ifdef HAVE_TPETRA_DEBUG
2842  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2843  (static_cast<size_t> (indices.size ()) != getNumEntriesInGlobalRow (globalRow),
2844  std::logic_error, "indices.size() = " << indices.size ()
2845  << " != getNumEntriesInGlobalRow(globalRow=" << globalRow << ") = "
2846  << getNumEntriesInGlobalRow (globalRow)
2847  << ". Please report this bug to the Tpetra developers.");
2848 #endif // HAVE_TPETRA_DEBUG
2849  }
2850 
2851 
2852  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2853  void
2855  insertLocalIndices (const LocalOrdinal localRow,
2856  const Teuchos::ArrayView<const LocalOrdinal>& indices)
2857  {
2858  const char tfecfFuncName[] = "insertLocalIndices";
2859 
2860  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2861  ! isFillActive (), std::runtime_error,
2862  ": requires that fill is active.");
2863  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2864  isGloballyIndexed (), std::runtime_error,
2865  ": graph indices are global; use insertGlobalIndices().");
2866  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2867  ! hasColMap (), std::runtime_error,
2868  ": cannot insert local indices without a column map.");
2869  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2870  ! rowMap_->isNodeLocalElement (localRow), std::runtime_error,
2871  ": row does not belong to this node.");
2872  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2873  ! hasRowInfo (), std::runtime_error,
2874  ": graph row information was deleted at fillComplete().");
2875  if (! indicesAreAllocated ()) {
2876  allocateIndices (LocalIndices);
2877  }
2878 
2879 #ifdef HAVE_TPETRA_DEBUG
2880  // In a debug build, if the graph has a column Map, test whether
2881  // any of the given column indices are not in the column Map.
2882  // Keep track of the invalid column indices so we can tell the
2883  // user about them.
2884  if (hasColMap ()) {
2885  using Teuchos::Array;
2886  using Teuchos::toString;
2887  using std::endl;
2888  typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type size_type;
2889 
2890  const map_type& colMap = * (getColMap ());
2891  Array<LocalOrdinal> badColInds;
2892  bool allInColMap = true;
2893  for (size_type k = 0; k < indices.size (); ++k) {
2894  if (! colMap.isNodeLocalElement (indices[k])) {
2895  allInColMap = false;
2896  badColInds.push_back (indices[k]);
2897  }
2898  }
2899  if (! allInColMap) {
2900  std::ostringstream os;
2901  os << "Tpetra::CrsMatrix::insertLocalIndices: You attempted to insert "
2902  "entries in owned row " << localRow << ", at the following column "
2903  "indices: " << toString (indices) << "." << endl;
2904  os << "Of those, the following indices are not in the column Map on "
2905  "this process: " << toString (badColInds) << "." << endl << "Since "
2906  "the graph has a column Map already, it is invalid to insert entries "
2907  "at those locations.";
2908  TEUCHOS_TEST_FOR_EXCEPTION(! allInColMap, std::invalid_argument, os.str ());
2909  }
2910  }
2911 #endif // HAVE_TPETRA_DEBUG
2912 
2913  insertLocalIndicesImpl (localRow, indices);
2914 
2915 #ifdef HAVE_TPETRA_DEBUG
2916  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2917  indicesAreAllocated() == false || isLocallyIndexed() == false,
2918  std::logic_error,
2919  ": Violated stated post-conditions. Please contact Tpetra team.");
2920 #endif
2921  }
2922 
2923  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2924  void
2926  insertLocalIndices (const LocalOrdinal localRow,
2927  const LocalOrdinal numEnt,
2928  const LocalOrdinal inds[])
2929  {
2930  Teuchos::ArrayView<const LocalOrdinal> indsT (inds, numEnt);
2931  this->insertLocalIndices (localRow, indsT);
2932  }
2933 
2934  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2935  void
2937  insertLocalIndicesFiltered (const LocalOrdinal localRow,
2938  const Teuchos::ArrayView<const LocalOrdinal>& indices)
2939  {
2940  typedef LocalOrdinal LO;
2941  const char tfecfFuncName[] = "insertLocalIndicesFiltered";
2942 
2943  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2944  isFillActive() == false, std::runtime_error,
2945  ": requires that fill is active.");
2946  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2947  isGloballyIndexed() == true, std::runtime_error,
2948  ": graph indices are global; use insertGlobalIndices().");
2949  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2950  hasColMap() == false, std::runtime_error,
2951  ": cannot insert local indices without a column map.");
2952  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2953  rowMap_->isNodeLocalElement(localRow) == false, std::runtime_error,
2954  ": row does not belong to this node.");
2955  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2956  ! hasRowInfo (), std::runtime_error,
2957  ": graph row information was deleted at fillComplete().");
2958  if (! indicesAreAllocated ()) {
2959  allocateIndices (LocalIndices);
2960  }
2961 
2962  // If we have a column map, use it to filter the entries.
2963  if (hasColMap ()) {
2964  Teuchos::Array<LO> filtered_indices (indices);
2965  SLocalGlobalViews inds_view;
2966  SLocalGlobalNCViews inds_ncview;
2967  inds_ncview.linds = filtered_indices();
2968  const size_t numFilteredEntries =
2969  filterIndices<LocalIndices>(inds_ncview);
2970  inds_view.linds = filtered_indices (0, numFilteredEntries);
2971  insertLocalIndicesImpl(localRow, inds_view.linds);
2972  }
2973  else {
2974  insertLocalIndicesImpl(localRow, indices);
2975  }
2976 #ifdef HAVE_TPETRA_DEBUG
2977  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2978  indicesAreAllocated() == false || isLocallyIndexed() == false,
2979  std::logic_error,
2980  ": Violated stated post-conditions. Please contact Tpetra team.");
2981 #endif
2982  }
2983 
2984 
2985  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2986  void
2988  insertGlobalIndices (const GlobalOrdinal grow,
2989  const Teuchos::ArrayView<const GlobalOrdinal>& indices)
2990  {
2991  using Teuchos::Array;
2992  using Teuchos::ArrayView;
2993  typedef LocalOrdinal LO;
2994  typedef GlobalOrdinal GO;
2995  typedef typename ArrayView<const GO>::size_type size_type;
2996  const char tfecfFuncName[] = "insertGlobalIndices";
2997 
2998  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2999  isLocallyIndexed() == true, std::runtime_error,
3000  ": graph indices are local; use insertLocalIndices().");
3001  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3002  ! hasRowInfo (), std::runtime_error,
3003  ": graph row information was deleted at fillComplete().");
3004  // This can't really be satisfied for now, because if we are
3005  // fillComplete(), then we are local. In the future, this may
3006  // change. However, the rule that modification require active
3007  // fill will not change.
3008  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3009  isFillActive() == false, std::runtime_error,
3010  ": You are not allowed to call this method if fill is not active. "
3011  "If fillComplete has been called, you must first call resumeFill "
3012  "before you may insert indices.");
3013  if (! indicesAreAllocated ()) {
3014  allocateIndices (GlobalIndices);
3015  }
3016  const LO myRow = rowMap_->getLocalElement (grow);
3017  if (myRow != Teuchos::OrdinalTraits<LO>::invalid ()) {
3018 #ifdef HAVE_TPETRA_DEBUG
3019  if (hasColMap ()) {
3020  using std::endl;
3021  const map_type& colMap = * (getColMap ());
3022  // In a debug build, keep track of the nonowned ("bad") column
3023  // indices, so that we can display them in the exception
3024  // message. In a release build, just ditch the loop early if
3025  // we encounter a nonowned column index.
3026  Array<GO> badColInds;
3027  bool allInColMap = true;
3028  for (size_type k = 0; k < indices.size (); ++k) {
3029  if (! colMap.isNodeGlobalElement (indices[k])) {
3030  allInColMap = false;
3031  badColInds.push_back (indices[k]);
3032  }
3033  }
3034  if (! allInColMap) {
3035  std::ostringstream os;
3036  os << "Tpetra::CrsGraph::insertGlobalIndices: You attempted to insert "
3037  "entries in owned row " << grow << ", at the following column "
3038  "indices: " << toString (indices) << "." << endl;
3039  os << "Of those, the following indices are not in the column Map on "
3040  "this process: " << toString (badColInds) << "." << endl << "Since "
3041  "the matrix has a column Map already, it is invalid to insert "
3042  "entries at those locations.";
3043  TEUCHOS_TEST_FOR_EXCEPTION(! allInColMap, std::invalid_argument, os.str ());
3044  }
3045  }
3046 #endif // HAVE_TPETRA_DEBUG
3047  insertGlobalIndicesImpl (myRow, indices);
3048  }
3049  else { // a nonlocal row
3050  const size_type numIndices = indices.size ();
3051  // This creates the Array if it doesn't exist yet. std::map's
3052  // operator[] does a lookup each time, so it's better to pull
3053  // nonlocals_[grow] out of the loop.
3054  std::vector<GO>& nonlocalRow = nonlocals_[grow];
3055  for (size_type k = 0; k < numIndices; ++k) {
3056  nonlocalRow.push_back (indices[k]);
3057  }
3058  }
3059 #ifdef HAVE_TPETRA_DEBUG
3060  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3061  indicesAreAllocated() == false || isGloballyIndexed() == false,
3062  std::logic_error,
3063  ": Violated stated post-conditions. Please contact Tpetra team.");
3064 #endif
3065  }
3066 
3067 
3068  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3069  void
3071  insertGlobalIndices (const GlobalOrdinal globalRow,
3072  const LocalOrdinal numEnt,
3073  const GlobalOrdinal inds[])
3074  {
3075  Teuchos::ArrayView<const GlobalOrdinal> indsT (inds, numEnt);
3076  this->insertGlobalIndices (globalRow, indsT);
3077  }
3078 
3079 
3080  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3081  void
3083  insertGlobalIndicesFiltered (const GlobalOrdinal grow,
3084  const Teuchos::ArrayView<const GlobalOrdinal>& indices)
3085  {
3086  using Teuchos::Array;
3087  using Teuchos::ArrayView;
3088  typedef LocalOrdinal LO;
3089  typedef GlobalOrdinal GO;
3090  const char tfecfFuncName[] = "insertGlobalIndicesFiltered";
3091 
3092  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3093  isLocallyIndexed() == true, std::runtime_error,
3094  ": graph indices are local; use insertLocalIndices().");
3095  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3096  ! hasRowInfo (), std::runtime_error,
3097  ": graph row information was deleted at fillComplete().");
3098  // This can't really be satisfied for now, because if we are
3099  // fillComplete(), then we are local. In the future, this may
3100  // change. However, the rule that modification require active
3101  // fill will not change.
3102  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3103  isFillActive() == false, std::runtime_error,
3104  ": You are not allowed to call this method if fill is not active. "
3105  "If fillComplete has been called, you must first call resumeFill "
3106  "before you may insert indices.");
3107  if (! indicesAreAllocated ()) {
3108  allocateIndices (GlobalIndices);
3109  }
3110  const LO myRow = rowMap_->getLocalElement (grow);
3111  if (myRow != Teuchos::OrdinalTraits<LO>::invalid ()) {
3112  // If we have a column map, use it to filter the entries.
3113  if (hasColMap ()) {
3114  Array<GO> filtered_indices(indices);
3115  SLocalGlobalViews inds_view;
3116  SLocalGlobalNCViews inds_ncview;
3117  inds_ncview.ginds = filtered_indices();
3118  const size_t numFilteredEntries =
3119  filterIndices<GlobalIndices> (inds_ncview);
3120  inds_view.ginds = filtered_indices (0, numFilteredEntries);
3121  insertGlobalIndicesImpl(myRow, inds_view.ginds);
3122  }
3123  else {
3124  insertGlobalIndicesImpl(myRow, indices);
3125  }
3126  }
3127  else { // a nonlocal row
3128  typedef typename ArrayView<const GO>::size_type size_type;
3129  const size_type numIndices = indices.size ();
3130  for (size_type k = 0; k < numIndices; ++k) {
3131  nonlocals_[grow].push_back (indices[k]);
3132  }
3133  }
3134 #ifdef HAVE_TPETRA_DEBUG
3135  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3136  indicesAreAllocated() == false || isGloballyIndexed() == false,
3137  std::logic_error,
3138  ": Violated stated post-conditions. Please contact Tpetra team.");
3139 #endif
3140  }
3141 
3142 
3143  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3144  void
3146  removeLocalIndices (LocalOrdinal lrow)
3147  {
3148  const char tfecfFuncName[] = "removeLocalIndices: ";
3149  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3150  ! isFillActive (), std::runtime_error, "requires that fill is active.");
3151  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3152  isStorageOptimized (), std::runtime_error,
3153  "cannot remove indices after optimizeStorage() has been called.");
3154  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3155  isGloballyIndexed (), std::runtime_error, "graph indices are global.");
3156  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3157  ! rowMap_->isNodeLocalElement (lrow), std::runtime_error,
3158  "Local row " << lrow << " is not in the row Map on the calling process.");
3159  if (! indicesAreAllocated ()) {
3160  allocateIndices (LocalIndices);
3161  }
3162 
3163  // FIXME (mfh 13 Aug 2014) What if they haven't been cleared on
3164  // all processes?
3165  clearGlobalConstants ();
3166 
3167  if (k_numRowEntries_.dimension_0 () != 0) {
3168  const size_t oldNumEntries = k_numRowEntries_(lrow);
3169  nodeNumEntries_ -= oldNumEntries;
3170  k_numRowEntries_(lrow) = 0;
3171  }
3172 #ifdef HAVE_TPETRA_DEBUG
3173  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3174  getNumEntriesInLocalRow (lrow) != 0 ||
3175  ! indicesAreAllocated () ||
3176  ! isLocallyIndexed (), std::logic_error,
3177  ": Violated stated post-conditions. Please contact Tpetra team.");
3178 #endif // HAVE_TPETRA_DEBUG
3179  }
3180 
3181 
3182  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3183  void
3185  setAllIndices (const typename local_graph_type::row_map_type& rowPointers,
3186  const typename local_graph_type::entries_type::non_const_type& columnIndices)
3187  {
3188  const char tfecfFuncName[] = "setAllIndices: ";
3189  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3190  ! hasColMap () || getColMap ().is_null (), std::runtime_error,
3191  "The graph must have a column Map before you may call this method.");
3192  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3193  static_cast<size_t> (rowPointers.size ()) != this->getNodeNumRows () + 1,
3194  std::runtime_error, "rowPointers.size() = " << rowPointers.size () <<
3195  " != this->getNodeNumRows()+1 = " << (this->getNodeNumRows () + 1) <<
3196  ".");
3197 
3198  // FIXME (mfh 07 Aug 2014) We need to relax this restriction,
3199  // since the future model will be allocation at construction, not
3200  // lazy allocation on first insert.
3201  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3202  (this->k_lclInds1D_.dimension_0 () != 0 ||
3203  this->k_gblInds1D_.dimension_0 () != 0,
3204  std::runtime_error, "You may not call this method if 1-D data "
3205  "structures are already allocated.");
3206 
3207  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3208  (this->lclInds2D_ != Teuchos::null ||
3209  this->gblInds2D_ != Teuchos::null,
3210  std::runtime_error, "You may not call this method if 2-D data "
3211  "structures are already allocated.");
3212  const size_t localNumEntries =
3213  Details::getEntryOnHost (rowPointers, this->getNodeNumRows ());
3214 
3215  indicesAreAllocated_ = true;
3216  indicesAreLocal_ = true;
3217  pftype_ = StaticProfile; // if the profile wasn't static before, it sure is now.
3218  k_lclInds1D_ = columnIndices;
3219  k_rowPtrs_ = rowPointers;
3220  nodeNumEntries_ = localNumEntries;
3221  // We don't know whether it's packed or unpacked, but we may
3222  // conservatively assume that it is packed.
3223  storageStatus_ = Details::STORAGE_1D_UNPACKED;
3224 
3225 
3226  // Build the local graph.
3227  lclGraph_ = local_graph_type (k_lclInds1D_, k_rowPtrs_);
3228 
3229  // These normally get cleared out at the end of allocateIndices.
3230  // It makes sense to clear them out here, because at the end of
3231  // this method, the graph is allocated on the calling process.
3232  numAllocForAllRows_ = 0;
3233  k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
3234 
3235  checkInternalState ();
3236  }
3237 
3238 
3239  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3240  void
3242  setAllIndices (const Teuchos::ArrayRCP<size_t>& rowPointers,
3243  const Teuchos::ArrayRCP<LocalOrdinal>& columnIndices)
3244  {
3245  using Kokkos::View;
3246  typedef typename local_graph_type::row_map_type row_map_type;
3247  typedef typename row_map_type::array_layout layout_type;
3248  typedef typename row_map_type::non_const_value_type row_offset_type;
3249  typedef View<size_t*, layout_type , Kokkos::HostSpace,
3250  Kokkos::MemoryUnmanaged> input_view_type;
3251  typedef typename row_map_type::non_const_type nc_row_map_type;
3252 
3253  const size_t size = static_cast<size_t> (rowPointers.size ());
3254  const bool same = Kokkos::Impl::is_same<size_t, row_offset_type>::value;
3255  input_view_type ptr_in (rowPointers.getRawPtr (), size);
3256 
3257  nc_row_map_type ptr_rot ("Tpetra::CrsGraph::ptr", size);
3258 
3259  if (same) { // size_t == row_offset_type
3260  // This compile-time logic ensures that the compiler never sees
3261  // an assignment of View<row_offset_type*, ...> to View<size_t*,
3262  // ...> unless size_t == row_offset_type.
3263  input_view_type ptr_decoy (rowPointers.getRawPtr (), size); // never used
3264  Kokkos::deep_copy (Kokkos::Impl::if_c<same,
3265  nc_row_map_type,
3266  input_view_type>::select (ptr_rot, ptr_decoy),
3267  ptr_in);
3268  }
3269  else { // size_t != row_offset_type
3270  // CudaUvmSpace != HostSpace, so this will be false in that case.
3271  const bool inHostMemory =
3272  Kokkos::Impl::is_same<typename row_map_type::memory_space,
3273  Kokkos::HostSpace>::value;
3274  if (inHostMemory) {
3275  // Copy (with cast from size_t to row_offset_type, with bounds
3276  // checking if necessary) to ptr_rot.
3277  ::Tpetra::Details::copyOffsets (ptr_rot, ptr_in);
3278  }
3279  else { // Copy input row offsets to device first.
3280  //
3281  // FIXME (mfh 24 Mar 2015) If CUDA UVM, running in the host's
3282  // execution space would avoid the double copy.
3283  //
3284  View<size_t*, layout_type ,execution_space > ptr_st ("Tpetra::CrsGraph::ptr", size);
3285  Kokkos::deep_copy (ptr_st, ptr_in);
3286  // Copy on device (casting from size_t to row_offset_type,
3287  // with bounds checking if necessary) to ptr_rot. This
3288  // executes in the output View's execution space, which is the
3289  // same as execution_space.
3290  ::Tpetra::Details::copyOffsets (ptr_rot, ptr_st);
3291  }
3292  }
3293 
3294  Kokkos::View<LocalOrdinal*, layout_type , execution_space > k_ind =
3295  Kokkos::Compat::getKokkosViewDeepCopy<device_type> (columnIndices ());
3296  setAllIndices (ptr_rot, k_ind);
3297  }
3298 
3299 
3300  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3301  void
3303  getNumEntriesPerLocalRowUpperBound (Teuchos::ArrayRCP<const size_t>& boundPerLocalRow,
3304  size_t& boundForAllLocalRows,
3305  bool& boundSameForAllLocalRows) const
3306  {
3307  const char tfecfFuncName[] = "getNumEntriesPerLocalRowUpperBound: ";
3308  const char suffix[] = " Please report this bug to the Tpetra developers.";
3309 
3310  // The three output arguments. We assign them to the actual
3311  // output arguments at the end, in order to implement
3312  // transactional semantics.
3313  Teuchos::ArrayRCP<const size_t> numEntriesPerRow;
3314  size_t numEntriesForAll = 0;
3315  bool allRowsSame = true;
3316 
3317  const ptrdiff_t numRows = static_cast<ptrdiff_t> (this->getNodeNumRows ());
3318 
3319  if (this->indicesAreAllocated ()) {
3320  if (this->isStorageOptimized ()) {
3321  // left with the case that we have optimized storage. in this
3322  // case, we have to construct a list of row sizes.
3323  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3324  (this->getProfileType () != StaticProfile, std::logic_error,
3325  "The graph is not StaticProfile, but storage appears to be optimized."
3326  << suffix);
3327  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3328  (numRows != 0 && k_rowPtrs_.dimension_0 () == 0, std::logic_error,
3329  "The graph has " << numRows << " (> 0) row" << (numRows != 1 ? "s" : "")
3330  << " on the calling process, but the k_rowPtrs_ array has zero entries."
3331  << suffix);
3332  Teuchos::ArrayRCP<size_t> numEnt;
3333  if (numRows != 0) {
3334  numEnt = Teuchos::arcp<size_t> (numRows);
3335  }
3336 
3337  // We have to iterate through the row offsets anyway, so we
3338  // might as well check whether all rows' bounds are the same.
3339  bool allRowsReallySame = false;
3340  for (ptrdiff_t i = 0; i < numRows; ++i) {
3341  numEnt[i] = this->k_rowPtrs_(i+1) - this->k_rowPtrs_(i);
3342  if (i != 0 && numEnt[i] != numEnt[i-1]) {
3343  allRowsReallySame = false;
3344  }
3345  }
3346  if (allRowsReallySame) {
3347  if (numRows == 0) {
3348  numEntriesForAll = 0;
3349  } else {
3350  numEntriesForAll = numEnt[1] - numEnt[0];
3351  }
3352  allRowsSame = true;
3353  }
3354  else {
3355  numEntriesPerRow = numEnt; // Teuchos::arcp_const_cast<const size_t> (numEnt);
3356  allRowsSame = false; // conservatively; we don't check the array
3357  }
3358  }
3359  else if (k_numRowEntries_.dimension_0 () != 0) {
3360  // This is a shallow copy; the ArrayRCP wraps the View in a
3361  // custom destructor, which ensures correct deallocation if
3362  // that is the only reference to the View. Furthermore, this
3363  // View is a host View, so this doesn't assume UVM.
3364  numEntriesPerRow = Kokkos::Compat::persistingView (k_numRowEntries_);
3365  allRowsSame = false; // conservatively; we don't check the array
3366  }
3367  else {
3368  numEntriesForAll = 0;
3369  allRowsSame = true;
3370  }
3371  }
3372  else { // indices not allocated
3373  if (k_numAllocPerRow_.dimension_0 () != 0) {
3374  // This is a shallow copy; the ArrayRCP wraps the View in a
3375  // custom destructor, which ensures correct deallocation if
3376  // that is the only reference to the View. Furthermore, this
3377  // View is a host View, so this doesn't assume UVM.
3378  numEntriesPerRow = Kokkos::Compat::persistingView (k_numAllocPerRow_);
3379  allRowsSame = false; // conservatively; we don't check the array
3380  }
3381  else {
3382  numEntriesForAll = numAllocForAllRows_;
3383  allRowsSame = true;
3384  }
3385  }
3386 
3387  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3388  (numEntriesForAll != 0 && numEntriesPerRow.size () != 0, std::logic_error,
3389  "numEntriesForAll and numEntriesPerRow are not consistent. The former "
3390  "is nonzero (" << numEntriesForAll << "), but the latter has nonzero "
3391  "size " << numEntriesPerRow.size () << "." << suffix);
3392  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3393  (numEntriesForAll != 0 && ! allRowsSame, std::logic_error,
3394  "numEntriesForAll and allRowsSame are not consistent. The former "
3395  "is nonzero (" << numEntriesForAll << "), but the latter is false."
3396  << suffix);
3397  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3398  (numEntriesPerRow.size () != 0 && allRowsSame, std::logic_error,
3399  "numEntriesPerRow and allRowsSame are not consistent. The former has "
3400  "nonzero length " << numEntriesForAll << ", but the latter is true."
3401  << suffix);
3402 
3403  boundPerLocalRow = numEntriesPerRow;
3404  boundForAllLocalRows = numEntriesForAll;
3405  boundSameForAllLocalRows = allRowsSame;
3406  }
3407 
3408 
3409  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3410  void
3413  {
3414  using Teuchos::Comm;
3415  using Teuchos::outArg;
3416  using Teuchos::RCP;
3417  using Teuchos::rcp;
3418  using Teuchos::REDUCE_MAX;
3419  using Teuchos::REDUCE_MIN;
3420  using Teuchos::reduceAll;
3422  typedef LocalOrdinal LO;
3423  typedef GlobalOrdinal GO;
3424  typedef typename Teuchos::Array<GO>::size_type size_type;
3425  const char tfecfFuncName[] = "globalAssemble: "; // for exception macro
3426 
3427  RCP<const Comm<int> > comm = getComm ();
3428 
3429  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3430  (! isFillActive (), std::runtime_error, "Fill must be active before "
3431  "you may call this method.");
3432 
3433  const size_t myNumNonlocalRows = nonlocals_.size ();
3434 
3435  // If no processes have nonlocal rows, then we don't have to do
3436  // anything. Checking this is probably cheaper than constructing
3437  // the Map of nonlocal rows (see below) and noticing that it has
3438  // zero global entries.
3439  {
3440  const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
3441  int someoneHasNonlocalRows = 0;
3442  reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
3443  outArg (someoneHasNonlocalRows));
3444  if (someoneHasNonlocalRows == 0) {
3445  return; // no process has nonlocal rows, so nothing to do
3446  }
3447  }
3448 
3449  // 1. Create a list of the "nonlocal" rows on each process. this
3450  // requires iterating over nonlocals_, so while we do this,
3451  // deduplicate the entries and get a count for each nonlocal
3452  // row on this process.
3453  // 2. Construct a new row Map corresponding to those rows. This
3454  // Map is likely overlapping. We know that the Map is not
3455  // empty on all processes, because the above all-reduce and
3456  // return exclude that case.
3457 
3458  RCP<const map_type> nonlocalRowMap;
3459  // Keep this for CrsGraph's constructor, so we can use StaticProfile.
3460  Teuchos::ArrayRCP<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
3461  {
3462  Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
3463  size_type curPos = 0;
3464  for (auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
3465  ++mapIter, ++curPos) {
3466  myNonlocalGblRows[curPos] = mapIter->first;
3467  std::vector<GO>& gblCols = mapIter->second; // by ref; change in place
3468  std::sort (gblCols.begin (), gblCols.end ());
3469  auto vecLast = std::unique (gblCols.begin (), gblCols.end ());
3470  gblCols.erase (vecLast, gblCols.end ());
3471  numEntPerNonlocalRow[curPos] = gblCols.size ();
3472  }
3473 
3474  // Currently, Map requires that its indexBase be the global min
3475  // of all its global indices. Map won't compute this for us, so
3476  // we must do it. If our process has no nonlocal rows, set the
3477  // "min" to the max possible GO value. This ensures that if
3478  // some process has at least one nonlocal row, then it will pick
3479  // that up as the min. We know that at least one process has a
3480  // nonlocal row, since the all-reduce and return at the top of
3481  // this method excluded that case.
3482  GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
3483  {
3484  auto iter = std::min_element (myNonlocalGblRows.begin (),
3485  myNonlocalGblRows.end ());
3486  if (iter != myNonlocalGblRows.end ()) {
3487  myMinNonlocalGblRow = *iter;
3488  }
3489  }
3490  GO gblMinNonlocalGblRow = 0;
3491  reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
3492  outArg (gblMinNonlocalGblRow));
3493  const GO indexBase = gblMinNonlocalGblRow;
3494  const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
3495  nonlocalRowMap = rcp (new map_type (INV, myNonlocalGblRows (), indexBase, comm));
3496  }
3497 
3498  // 3. Use the column indices for each nonlocal row, as stored in
3499  // nonlocals_, to construct a CrsGraph corresponding to
3500  // nonlocal rows. We may use StaticProfile, since we have
3501  // exact counts of the number of entries in each nonlocal row.
3502 
3503  RCP<crs_graph_type> nonlocalGraph =
3504  rcp (new crs_graph_type (nonlocalRowMap, numEntPerNonlocalRow,
3505  StaticProfile));
3506  {
3507  size_type curPos = 0;
3508  for (auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
3509  ++mapIter, ++curPos) {
3510  const GO gblRow = mapIter->first;
3511  std::vector<GO>& gblCols = mapIter->second; // by ref just to avoid copy
3512  const LO numEnt = static_cast<LO> (numEntPerNonlocalRow[curPos]);
3513  nonlocalGraph->insertGlobalIndices (gblRow, numEnt, gblCols.data ());
3514  }
3515  }
3516  // There's no need to fill-complete the nonlocals graph.
3517  // We just use it as a temporary container for the Export.
3518 
3519  // 4. If the original row Map is one to one, then we can Export
3520  // directly from nonlocalGraph into this. Otherwise, we have
3521  // to create a temporary graph with a one-to-one row Map,
3522  // Export into that, then Import from the temporary graph into
3523  // *this.
3524 
3525  auto origRowMap = this->getRowMap ();
3526  const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
3527 
3528  if (origRowMapIsOneToOne) {
3529  export_type exportToOrig (nonlocalRowMap, origRowMap);
3530  this->doExport (*nonlocalGraph, exportToOrig, Tpetra::INSERT);
3531  // We're done at this point!
3532  }
3533  else {
3534  // If you ask a Map whether it is one to one, it does some
3535  // communication and stashes intermediate results for later use
3536  // by createOneToOne. Thus, calling createOneToOne doesn't cost
3537  // much more then the original cost of calling isOneToOne.
3538  auto oneToOneRowMap = Tpetra::createOneToOne (origRowMap);
3539  export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
3540 
3541  // Create a temporary graph with the one-to-one row Map.
3542  //
3543  // TODO (mfh 09 Sep 2016) Estimate the number of entries in each
3544  // row, to avoid reallocation during the Export operation.
3545  crs_graph_type oneToOneGraph (oneToOneRowMap, 0);
3546  // Export from graph of nonlocals into the temp one-to-one graph.
3547  oneToOneGraph.doExport (*nonlocalGraph, exportToOneToOne, Tpetra::INSERT);
3548 
3549  // We don't need the graph of nonlocals anymore, so get rid of
3550  // it, to keep the memory high-water mark down.
3551  nonlocalGraph = Teuchos::null;
3552 
3553  // Import from the one-to-one graph to the original graph.
3554  import_type importToOrig (oneToOneRowMap, origRowMap);
3555  this->doImport (oneToOneGraph, importToOrig, Tpetra::INSERT);
3556  }
3557 
3558  // It's safe now to clear out nonlocals_, since we've already
3559  // committed side effects to *this. The standard idiom for
3560  // clearing a Container like std::map, is to swap it with an empty
3561  // Container and let the swapped Container fall out of scope.
3562  decltype (nonlocals_) newNonlocals;
3563  std::swap (nonlocals_, newNonlocals);
3564 
3565  checkInternalState ();
3566  }
3567 
3568 
3569  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3570  void
3572  resumeFill (const Teuchos::RCP<Teuchos::ParameterList>& params)
3573  {
3574  const char tfecfFuncName[] = "resumeFill";
3575  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! hasRowInfo(), std::runtime_error,
3576  ": Sorry, you cannot resume fill of the CrsGraph, since the graph's row "
3577  "information was deleted in fillComplete().");
3578 
3579 #ifdef HAVE_TPETRA_DEBUG
3580  Teuchos::barrier( *rowMap_->getComm() );
3581 #endif // HAVE_TPETRA_DEBUG
3582  clearGlobalConstants();
3583  if (params != Teuchos::null) this->setParameterList (params);
3584  lowerTriangular_ = false;
3585  upperTriangular_ = false;
3586  // either still sorted/merged or initially sorted/merged
3587  indicesAreSorted_ = true;
3588  noRedundancies_ = true;
3589  fillComplete_ = false;
3590 #ifdef HAVE_TPETRA_DEBUG
3591  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3592  ! isFillActive() || isFillComplete(), std::logic_error,
3593  "::resumeFill(): At end of method, either fill is not active or fill is "
3594  "complete. This violates stated post-conditions. Please report this bug "
3595  "to the Tpetra developers.");
3596 #endif // HAVE_TPETRA_DEBUG
3597  }
3598 
3599 
3600  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3601  void
3603  fillComplete (const Teuchos::RCP<Teuchos::ParameterList>& params)
3604  {
3605  // If the graph already has domain and range Maps, don't clobber
3606  // them. If it doesn't, use the current row Map for both the
3607  // domain and range Maps.
3608  //
3609  // NOTE (mfh 28 Sep 2014): If the graph was constructed without a
3610  // column Map, and column indices are inserted which are not in
3611  // the row Map on any process, this will cause troubles. However,
3612  // that is not a common case for most applications that we
3613  // encounter, and checking for it might require more
3614  // communication.
3615  Teuchos::RCP<const map_type> domMap = this->getDomainMap ();
3616  if (domMap.is_null ()) {
3617  domMap = this->getRowMap ();
3618  }
3619  Teuchos::RCP<const map_type> ranMap = this->getRangeMap ();
3620  if (ranMap.is_null ()) {
3621  ranMap = this->getRowMap ();
3622  }
3623  this->fillComplete (domMap, ranMap, params);
3624  }
3625 
3626 
3627  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3628  void
3630  fillComplete (const Teuchos::RCP<const map_type>& domainMap,
3631  const Teuchos::RCP<const map_type>& rangeMap,
3632  const Teuchos::RCP<Teuchos::ParameterList>& params)
3633  {
3634  const char tfecfFuncName[] = "fillComplete: ";
3635 
3636 #ifdef HAVE_TPETRA_DEBUG
3637  rowMap_->getComm ()->barrier ();
3638 #endif // HAVE_TPETRA_DEBUG
3639 
3640  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( ! isFillActive() || isFillComplete(),
3641  std::runtime_error, "Graph fill state must be active (isFillActive() "
3642  "must be true) before calling fillComplete().");
3643 
3644  const int numProcs = getComm ()->getSize ();
3645 
3646  //
3647  // Read and set parameters
3648  //
3649 
3650  // Does the caller want to sort remote GIDs (within those owned by
3651  // the same process) in makeColMap()?
3652  if (! params.is_null ()) {
3653  if (params->isParameter ("sort column map ghost gids")) {
3654  sortGhostsAssociatedWithEachProcessor_ =
3655  params->get<bool> ("sort column map ghost gids",
3656  sortGhostsAssociatedWithEachProcessor_);
3657  }
3658  else if (params->isParameter ("Sort column Map ghost GIDs")) {
3659  sortGhostsAssociatedWithEachProcessor_ =
3660  params->get<bool> ("Sort column Map ghost GIDs",
3661  sortGhostsAssociatedWithEachProcessor_);
3662  }
3663  }
3664 
3665  // If true, the caller promises that no process did nonlocal
3666  // changes since the last call to fillComplete.
3667  bool assertNoNonlocalInserts = false;
3668  if (! params.is_null ()) {
3669  assertNoNonlocalInserts =
3670  params->get<bool> ("No Nonlocal Changes", assertNoNonlocalInserts);
3671  }
3672 
3673  //
3674  // Allocate indices, if they haven't already been allocated
3675  //
3676  if (! indicesAreAllocated ()) {
3677  if (hasColMap ()) {
3678  // We have a column Map, so use local indices.
3679  allocateIndices (LocalIndices);
3680  } else {
3681  // We don't have a column Map, so use global indices.
3682  allocateIndices (GlobalIndices);
3683  }
3684  }
3685 
3686  //
3687  // Do global assembly, if requested and if the communicator
3688  // contains more than one process.
3689  //
3690  const bool mayNeedGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
3691  if (mayNeedGlobalAssemble) {
3692  // This first checks if we need to do global assembly.
3693  // The check costs a single all-reduce.
3694  globalAssemble ();
3695  }
3696  else {
3697  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3698  (numProcs > 1 && nonlocals_.size() > 0, std::runtime_error,
3699  "The graph's communicator contains only one process, "
3700  "but there are nonlocal entries. "
3701  "This probably means that invalid entries were added to the graph.");
3702  }
3703 
3704  // Set domain and range Map. This may clear the Import / Export
3705  // objects if the new Maps differ from any old ones.
3706  setDomainRangeMaps (domainMap, rangeMap);
3707 
3708  // If the graph does not already have a column Map (either from
3709  // the user constructor calling the version of the constructor
3710  // that takes a column Map, or from a previous fillComplete call),
3711  // then create it.
3712  if (! hasColMap ()) {
3713  this->makeColMap ();
3714  }
3715 
3716  // Make indices local, if they aren't already.
3717  // The method doesn't do any work if the indices are already local.
3718  makeIndicesLocal ();
3719 
3720  // If this process has no indices, then CrsGraph considers it
3721  // already trivially sorted and merged. Thus, this method need
3722  // not be called on all processes in the row Map's communicator.
3723  this->sortAndMergeAllIndices (this->isSorted (), this->isMerged ());
3724 
3725  makeImportExport (); // Make Import and Export objects, if necessary
3726  computeGlobalConstants ();
3727  fillLocalGraph (params);
3728  fillComplete_ = true;
3729 
3730 #ifdef HAVE_TPETRA_DEBUG
3731  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3732  isFillActive() == true || isFillComplete() == false, std::logic_error,
3733  ": Violated stated post-conditions. Please contact Tpetra team.");
3734 #endif
3735 
3736  checkInternalState ();
3737  }
3738 
3739 
3740  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3741  void
3743  expertStaticFillComplete (const Teuchos::RCP<const map_type>& domainMap,
3744  const Teuchos::RCP<const map_type>& rangeMap,
3745  const Teuchos::RCP<const import_type>& importer,
3746  const Teuchos::RCP<const export_type>& exporter,
3747  const Teuchos::RCP<Teuchos::ParameterList>& params)
3748  {
3749  const char tfecfFuncName[] = "expertStaticFillComplete: ";
3750 #ifdef HAVE_TPETRA_MMM_TIMINGS
3751  std::string label;
3752  if(!params.is_null())
3753  label = params->get("Timer Label",label);
3754  std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
3755  using Teuchos::TimeMonitor;
3756  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Setup"))));
3757 #endif
3758 
3759 
3760  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3761  domainMap.is_null () || rangeMap.is_null (),
3762  std::runtime_error, "The input domain Map and range Map must be nonnull.");
3763  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3764  pftype_ != StaticProfile, std::runtime_error, "You may not call this "
3765  "method unless the graph is StaticProfile.");
3766  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3767  isFillComplete () || ! hasColMap (), std::runtime_error, "You may not "
3768  "call this method unless the graph has a column Map.");
3769  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3770  getNodeNumRows () > 0 && k_rowPtrs_.dimension_0 () == 0,
3771  std::runtime_error, "The calling process has getNodeNumRows() = "
3772  << getNodeNumRows () << " > 0 rows, but the row offsets array has not "
3773  "been set.");
3774  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3775  static_cast<size_t> (k_rowPtrs_.dimension_0 ()) != getNodeNumRows () + 1,
3776  std::runtime_error, "The row offsets array has length " <<
3777  k_rowPtrs_.dimension_0 () << " != getNodeNumRows()+1 = " <<
3778  (getNodeNumRows () + 1) << ".");
3779 
3780  // Note: We don't need to do the following things which are normally done in fillComplete:
3781  // allocateIndices, globalAssemble, makeColMap, makeIndicesLocal, sortAndMergeAllIndices
3782 
3783  // Note: Need to do this so computeGlobalConstants & fillLocalGraph work
3784  //
3785  // The first assignment is always true if the graph has 1-D
3786  // storage (StaticProfile). The second assignment is only true if
3787  // storage is packed.
3788  nodeNumEntries_ =
3789  Details::getEntryOnHost (this->k_rowPtrs_, this->getNodeNumRows ());
3790 
3791  // Constants from allocateIndices
3792  //
3793  // mfh 08 Aug 2014: numAllocForAllRows_ and k_numAllocPerRow_ go
3794  // away once the graph is allocated. expertStaticFillComplete
3795  // either presumes that the graph is allocated, or "allocates" it.
3796  //
3797  // FIXME (mfh 08 Aug 2014) The goal for the Kokkos refactor
3798  // version of CrsGraph is to allocate in the constructor, not
3799  // lazily on first insert. That will make both
3800  // numAllocForAllRows_ and k_numAllocPerRow_ obsolete.
3801  numAllocForAllRows_ = 0;
3802  k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
3803  indicesAreAllocated_ = true;
3804 
3805  // Constants from makeIndicesLocal
3806  //
3807  // The graph has a column Map, so its indices had better be local.
3808  indicesAreLocal_ = true;
3809  indicesAreGlobal_ = false;
3810 
3811  // set domain/range map: may clear the import/export objects
3812 #ifdef HAVE_TPETRA_MMM_TIMINGS
3813  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Maps"))));
3814 #endif
3815  setDomainRangeMaps (domainMap, rangeMap);
3816 
3817  // Presume the user sorted and merged the arrays first
3818  indicesAreSorted_ = true;
3819  noRedundancies_ = true;
3820 
3821  // makeImportExport won't create a new importer/exporter if I set one here first.
3822 #ifdef HAVE_TPETRA_MMM_TIMINGS
3823  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckI"))));
3824 #endif
3825 
3826  importer_ = Teuchos::null;
3827  exporter_ = Teuchos::null;
3828  if (importer != Teuchos::null) {
3829  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3830  ! importer->getSourceMap ()->isSameAs (*getDomainMap ()) ||
3831  ! importer->getTargetMap ()->isSameAs (*getColMap ()),
3832  std::invalid_argument,": importer does not match matrix maps.");
3833  importer_ = importer;
3834 
3835  }
3836 
3837 #ifdef HAVE_TPETRA_MMM_TIMINGS
3838  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckE"))));
3839 #endif
3840 
3841  if (exporter != Teuchos::null) {
3842  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3843  ! exporter->getSourceMap ()->isSameAs (*getRowMap ()) ||
3844  ! exporter->getTargetMap ()->isSameAs (*getRangeMap ()),
3845  std::invalid_argument,": exporter does not match matrix maps.");
3846  exporter_ = exporter;
3847  }
3848 
3849 #ifdef HAVE_TPETRA_MMM_TIMINGS
3850  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXmake"))));
3851 #endif
3852 
3853  makeImportExport ();
3854 
3855  // Compute the constants
3856 #ifdef HAVE_TPETRA_MMM_TIMINGS
3857  if(params.is_null() || params->get("compute global constants",true))
3858  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (const)"))));
3859  else
3860  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (noconst)"))));
3861 #endif
3862  if(params.is_null() || params->get("compute global constants",true))
3863  computeGlobalConstants ();
3864  else
3865  computeLocalConstants ();
3866 
3867  // Since we have a StaticProfile, fillLocalGraph will do the right thing...
3868 #ifdef HAVE_TPETRA_MMM_TIMINGS
3869  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-fLG"))));
3870 #endif
3871  fillLocalGraph (params);
3872  fillComplete_ = true;
3873 
3874 #ifdef HAVE_TPETRA_MMM_TIMINGS
3875  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cIS"))));
3876 #endif
3877  checkInternalState ();
3878  }
3879 
3880 
3881  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3882  void
3884  fillLocalGraph (const Teuchos::RCP<Teuchos::ParameterList>& params)
3885  {
3887  typedef decltype (k_numRowEntries_) row_entries_type;
3888  typedef typename local_graph_type::row_map_type row_map_type;
3889  typedef typename row_map_type::non_const_type non_const_row_map_type;
3890  typedef typename local_graph_type::entries_type::non_const_type lclinds_1d_type;
3891  const char tfecfFuncName[] = "fillLocalGraph (called from fillComplete or "
3892  "expertStaticFillComplete): ";
3893  const size_t lclNumRows = this->getNodeNumRows ();
3894 
3895  // This method's goal is to fill in the two arrays (compressed
3896  // sparse row format) that define the sparse graph's structure.
3897  //
3898  // Use the nonconst version of row_map_type for ptr_d, because
3899  // the latter is const and we need to modify ptr_d here.
3900  non_const_row_map_type ptr_d;
3901  row_map_type ptr_d_const;
3902  lclinds_1d_type ind_d;
3903 
3904  bool requestOptimizedStorage = true;
3905  if (! params.is_null () && ! params->get ("Optimize Storage", true)) {
3906  requestOptimizedStorage = false;
3907  }
3908  if (this->getProfileType () == DynamicProfile) {
3909  // Pack 2-D storage (DynamicProfile) into 1-D packed storage.
3910  //
3911  // DynamicProfile means that the graph's column indices are
3912  // currently stored in a 2-D "unpacked" format, in the
3913  // arrays-of-arrays lclInds2D_. We allocate 1-D storage
3914  // (ind_d) and then copy from 2-D storage (lclInds2D_) into 1-D
3915  // storage (ind_d).
3916 #ifdef HAVE_TPETRA_DEBUG
3917  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3918  (static_cast<size_t> (this->k_numRowEntries_.dimension_0 ()) !=
3919  lclNumRows, std::logic_error, "(DynamicProfile branch) "
3920  "k_numRowEntries_.dimension_0() = " << k_numRowEntries_.dimension_0 ()
3921  << " != getNodeNumRows() = " << lclNumRows << "");
3922 #endif // HAVE_TPETRA_DEBUG
3923 
3924  // Pack the row offsets into ptr_d, by doing a sum-scan of the
3925  // array of valid entry counts per row (k_numRowEntries_). The
3926  // pack method can handle its counts input being a host View.
3927  //
3928  // Total number of entries in the matrix on the calling
3929  // process. We will compute this in the loop below. It's
3930  // cheap to compute and useful as a sanity check.
3931  size_t lclTotalNumEntries = 0;
3932  {
3933  // Allocate the packed row offsets array.
3934  ptr_d = non_const_row_map_type ("Tpetra::CrsGraph::ptr", lclNumRows+1);
3935  typename row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
3936  // This function can handle that numRowEnt_h lives on host.
3937  lclTotalNumEntries = computeOffsetsFromCounts (ptr_d, numRowEnt_h);
3938  ptr_d_const = ptr_d;
3939  }
3940 
3941 #ifdef HAVE_TPETRA_DEBUG
3942  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3943  (static_cast<size_t> (ptr_d.dimension_0 ()) != lclNumRows + 1,
3944  std::logic_error, "(DynamicProfile branch) After packing ptr_d, "
3945  "ptr_d.dimension_0() = " << ptr_d.dimension_0 () << " != "
3946  "(lclNumRows+1) = " << (lclNumRows+1) << ".");
3947  {
3948  const auto valToCheck = Details::getEntryOnHost (ptr_d, lclNumRows);
3949  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3950  (valToCheck != lclTotalNumEntries, std::logic_error,
3951  "(DynamicProfile branch) After packing ptr_d, ptr_d(lclNumRows = "
3952  << lclNumRows << ") = " << valToCheck << " != total number of "
3953  "entries on the calling process = " << lclTotalNumEntries << ".");
3954  }
3955 #endif // HAVE_TPETRA_DEBUG
3956 
3957  // Allocate the array of packed column indices.
3958  ind_d = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries);
3959  // Pack the column indices. We have to do this sequentially on
3960  // host, since lclInds2D_ is an ArrayRCP<Array<LO>>, which
3961  // doesn't work in parallel kernels (its iterators aren't even
3962  // thread safe in debug mode).
3963  {
3964  auto ptr_h = Kokkos::create_mirror_view (ptr_d);
3965  Kokkos::deep_copy (ptr_h, ptr_d); // we need the entries on host
3966  auto ind_h = Kokkos::create_mirror_view (ind_d); // will fill on host
3967 
3968  // k_numRowEntries_ is a host View already, so we can use it here.
3969  typename row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
3970  for (size_t row = 0; row < lclNumRows; ++row) {
3971  const size_t numEnt = numRowEnt_h(row);
3972  std::copy (lclInds2D_[row].begin (),
3973  lclInds2D_[row].begin () + numEnt,
3974  ind_h.ptr_on_device () + ptr_h(row));
3975  }
3976  Kokkos::deep_copy (ind_d, ind_h);
3977  }
3978 
3979 #ifdef HAVE_TPETRA_DEBUG
3980  // Sanity check of packed row offsets.
3981  if (ptr_d.dimension_0 () != 0) {
3982  const size_t numOffsets = static_cast<size_t> (ptr_d.dimension_0 ());
3983  const size_t valToCheck = Details::getEntryOnHost (ptr_d, numOffsets - 1);
3984  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3985  (valToCheck != static_cast<size_t> (ind_d.dimension_0 ()),
3986  std::logic_error, "(DynamicProfile branch) After packing column "
3987  "indices, ptr_d(" << (numOffsets-1) << ") = " << valToCheck
3988  << " != ind_d.dimension_0() = " << ind_d.dimension_0 () << ".");
3989  }
3990 #endif // HAVE_TPETRA_DEBUG
3991  }
3992  else if (getProfileType () == StaticProfile) {
3993  // StaticProfile means that the graph's column indices are
3994  // currently stored in a 1-D format, with row offsets in
3995  // k_rowPtrs_ and local column indices in k_lclInds1D_.
3996 
3997 #ifdef HAVE_TPETRA_DEBUG
3998  // StaticProfile also means that the graph's array of row
3999  // offsets must already be allocated.
4000  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4001  (k_rowPtrs_.dimension_0 () == 0, std::logic_error,
4002  "(StaticProfile branch) k_rowPtrs_ has size zero, but shouldn't");
4003  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4004  (k_rowPtrs_.dimension_0 () != lclNumRows + 1, std::logic_error,
4005  "(StaticProfile branch) k_rowPtrs_.dimension_0() = "
4006  << k_rowPtrs_.dimension_0 () << " != (lclNumRows + 1) = "
4007  << (lclNumRows + 1) << ".");
4008  {
4009  const size_t numOffsets = k_rowPtrs_.dimension_0 ();
4010  const auto valToCheck =
4011  Details::getEntryOnHost (k_rowPtrs_, numOffsets - 1);
4012  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4013  (numOffsets != 0 &&
4014  k_lclInds1D_.dimension_0 () != valToCheck,
4015  std::logic_error, "(StaticProfile branch) numOffsets = " <<
4016  numOffsets << " != 0 and k_lclInds1D_.dimension_0() = " <<
4017  k_lclInds1D_.dimension_0 () << " != k_rowPtrs_(" << numOffsets <<
4018  ") = " << valToCheck << ".");
4019  }
4020 #endif // HAVE_TPETRA_DEBUG
4021 
4022  size_t allocSize = 0;
4023  try {
4024  allocSize = this->getNodeAllocationSize ();
4025  }
4026  catch (std::logic_error& e) {
4027  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4028  (true, std::logic_error, "In fillLocalGraph, getNodeAllocationSize "
4029  "threw std::logic_error: " << e.what ());
4030  }
4031  catch (std::runtime_error& e) {
4032  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4033  (true, std::runtime_error, "In fillLocalGraph, getNodeAllocationSize "
4034  "threw std::runtime_error: " << e.what ());
4035  }
4036  catch (std::exception& e) {
4037  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4038  (true, std::runtime_error, "In fillLocalGraph, getNodeAllocationSize "
4039  "threw std::exception: " << e.what ());
4040  }
4041  catch (...) {
4042  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4043  (true, std::runtime_error, "In fillLocalGraph, getNodeAllocationSize "
4044  "threw an exception not a subclass of std::exception.");
4045  }
4046 
4047  if (nodeNumEntries_ != allocSize) {
4048  // The graph's current 1-D storage is "unpacked." This means
4049  // the row offsets may differ from what the final row offsets
4050  // should be. This could happen, for example, if the user
4051  // specified StaticProfile in the constructor and set an upper
4052  // bound on the number of entries in each row, but didn't fill
4053  // all those entries.
4054 
4055 #ifdef HAVE_TPETRA_DEBUG
4056  if (k_rowPtrs_.dimension_0 () != 0) {
4057  const size_t numOffsets =
4058  static_cast<size_t> (k_rowPtrs_.dimension_0 ());
4059  const auto valToCheck =
4060  Details::getEntryOnHost (k_rowPtrs_, numOffsets - 1);
4061  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4062  (valToCheck != static_cast<size_t> (k_lclInds1D_.dimension_0 ()),
4063  std::logic_error, "(StaticProfile unpacked branch) Before "
4064  "allocating or packing, k_rowPtrs_(" << (numOffsets-1) << ") = "
4065  << valToCheck << " != k_lclInds1D_.dimension_0() = "
4066  << k_lclInds1D_.dimension_0 () << ".");
4067  }
4068 #endif // HAVE_TPETRA_DEBUG
4069 
4070  // Pack the row offsets into ptr_d, by doing a sum-scan of the
4071  // array of valid entry counts per row (k_numRowEntries_).
4072 
4073  // Total number of entries in the matrix on the calling
4074  // process. We will compute this in the loop below. It's
4075  // cheap to compute and useful as a sanity check.
4076  size_t lclTotalNumEntries = 0;
4077  {
4078  // Allocate the packed row offsets array.
4079  ptr_d = non_const_row_map_type ("Tpetra::CrsGraph::ptr", lclNumRows + 1);
4080  ptr_d_const = ptr_d;
4081 
4082  // It's ok that k_numRowEntries_ is a host View; the
4083  // function can handle this.
4084  typename row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
4085 #ifdef HAVE_TPETRA_DEBUG
4086  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4087  (static_cast<size_t> (numRowEnt_h.dimension_0 ()) != lclNumRows,
4088  std::logic_error, "(StaticProfile unpacked branch) "
4089  "numRowEnt_h.dimension_0() = " << numRowEnt_h.dimension_0 ()
4090  << " != getNodeNumRows() = " << lclNumRows << "");
4091 #endif // HAVE_TPETRA_DEBUG
4092 
4093  lclTotalNumEntries = computeOffsetsFromCounts (ptr_d, numRowEnt_h);
4094 
4095 #ifdef HAVE_TPETRA_DEBUG
4096  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4097  (static_cast<size_t> (ptr_d.dimension_0 ()) != lclNumRows + 1,
4098  std::logic_error, "(StaticProfile unpacked branch) After "
4099  "allocating ptr_d, ptr_d.dimension_0() = " << ptr_d.dimension_0 ()
4100  << " != lclNumRows+1 = " << (lclNumRows+1) << ".");
4101  {
4102  const auto valToCheck = Details::getEntryOnHost (ptr_d, lclNumRows);
4103  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4104  (valToCheck != lclTotalNumEntries, std::logic_error,
4105  "Tpetra::CrsGraph::fillLocalGraph: In StaticProfile unpacked "
4106  "branch, after filling ptr_d, ptr_d(lclNumRows=" << lclNumRows
4107  << ") = " << valToCheck << " != total number of entries on "
4108  "the calling process = " << lclTotalNumEntries << ".");
4109  }
4110 #endif // HAVE_TPETRA_DEBUG
4111  }
4112 
4113  // Allocate the array of packed column indices.
4114  ind_d = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries);
4115 
4116  // k_rowPtrs_ and k_lclInds1D_ are currently unpacked. Pack
4117  // them, using the packed row offsets array ptr_d that we
4118  // created above.
4119  //
4120  // FIXME (mfh 08 Aug 2014) If "Optimize Storage" is false (in
4121  // CrsMatrix?), we need to keep around the unpacked row
4122  // offsets and column indices.
4123 
4124  // Pack the column indices from unpacked k_lclInds1D_ into
4125  // packed ind_d. We will replace k_lclInds1D_ below.
4126  typedef pack_functor<
4127  typename local_graph_type::entries_type::non_const_type,
4128  row_map_type> inds_packer_type;
4129  inds_packer_type f (ind_d, k_lclInds1D_, ptr_d, k_rowPtrs_);
4130  {
4131  typedef typename decltype (ind_d)::execution_space exec_space;
4132  typedef Kokkos::RangePolicy<exec_space, LocalOrdinal> range_type;
4133  Kokkos::parallel_for (range_type (0, lclNumRows), f);
4134  }
4135 
4136 #ifdef HAVE_TPETRA_DEBUG
4137  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4138  (ptr_d.dimension_0 () == 0, std::logic_error, "(StaticProfile "
4139  "\"Optimize Storage\"=true branch) After packing, "
4140  "ptr_d.dimension_0() = 0. This probably means k_rowPtrs_ was "
4141  "never allocated.");
4142  if (ptr_d.dimension_0 () != 0) {
4143  const size_t numOffsets = static_cast<size_t> (ptr_d.dimension_0 ());
4144  const auto valToCheck = Details::getEntryOnHost (ptr_d, numOffsets - 1);
4145  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4146  (static_cast<size_t> (valToCheck) != ind_d.dimension_0 (),
4147  std::logic_error, "(StaticProfile \"Optimize Storage\"=true "
4148  "branch) After packing, ptr_d(" << (numOffsets-1) << ") = "
4149  << valToCheck << " != ind_d.dimension_0() = "
4150  << ind_d.dimension_0 () << ".");
4151  }
4152 #endif // HAVE_TPETRA_DEBUG
4153  }
4154  else { // We don't have to pack, so just set the pointers.
4155  ptr_d_const = k_rowPtrs_;
4156  ind_d = k_lclInds1D_;
4157 
4158 #ifdef HAVE_TPETRA_DEBUG
4159  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4160  (ptr_d_const.dimension_0 () == 0, std::logic_error, "(StaticProfile "
4161  "\"Optimize Storage\"=false branch) ptr_d_const.dimension_0() = 0. "
4162  "This probably means that k_rowPtrs_ was never allocated.");
4163  if (ptr_d_const.dimension_0 () != 0) {
4164  const size_t numOffsets =
4165  static_cast<size_t> (ptr_d_const.dimension_0 ());
4166  const size_t valToCheck =
4167  Details::getEntryOnHost (ptr_d_const, numOffsets - 1);
4168  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4169  (valToCheck != static_cast<size_t> (ind_d.dimension_0 ()),
4170  std::logic_error, "(StaticProfile \"Optimize Storage\"=false "
4171  "branch) ptr_d_const(" << (numOffsets-1) << ") = " << valToCheck
4172  << " != ind_d.dimension_0() = " << ind_d.dimension_0 () << ".");
4173  }
4174 #endif // HAVE_TPETRA_DEBUG
4175  }
4176  }
4177 
4178 #ifdef HAVE_TPETRA_DEBUG
4179  // Extra sanity checks.
4180  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4181  (static_cast<size_t> (ptr_d_const.dimension_0 ()) != lclNumRows + 1,
4182  std::logic_error, "After packing, ptr_d_const.dimension_0() = " <<
4183  ptr_d_const.dimension_0 () << " != lclNumRows+1 = " << (lclNumRows+1)
4184  << ".");
4185  if (ptr_d_const.dimension_0 () != 0) {
4186  const size_t numOffsets = static_cast<size_t> (ptr_d_const.dimension_0 ());
4187  const auto valToCheck = Details::getEntryOnHost (ptr_d_const, numOffsets - 1);
4188  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4189  (static_cast<size_t> (valToCheck) != ind_d.dimension_0 (),
4190  std::logic_error, "After packing, ptr_d_const(" << (numOffsets-1)
4191  << ") = " << valToCheck << " != ind_d.dimension_0() = "
4192  << ind_d.dimension_0 () << ".");
4193  }
4194 #endif // HAVE_TPETRA_DEBUG
4195 
4196  if (requestOptimizedStorage) {
4197  // With optimized storage, we don't need to store the 2-D column
4198  // indices array-of-arrays, or the array of row entry counts.
4199 
4200  // Free graph data structures that are only needed for 2-D or
4201  // unpacked 1-D storage.
4202  lclInds2D_ = Teuchos::null;
4203  k_numRowEntries_ = row_entries_type ();
4204 
4205  // Keep the new 1-D packed allocations.
4206  k_rowPtrs_ = ptr_d_const;
4207  k_lclInds1D_ = ind_d;
4208 
4209  // The graph is definitely StaticProfile now, whether or not it
4210  // was before.
4211  pftype_ = StaticProfile;
4212  storageStatus_ = Details::STORAGE_1D_PACKED;
4213  }
4214 
4215  // FIXME (mfh 28 Aug 2014) "Local Graph" sublist no longer used.
4216 
4217  // Build the local graph.
4218  lclGraph_ = local_graph_type (ind_d, ptr_d_const);
4219 
4220  // TODO (mfh 13 Mar 2014) getNodeNumDiags(), isUpperTriangular(),
4221  // and isLowerTriangular() depend on computeGlobalConstants(), in
4222  // particular the part where it looks at the local matrix. You
4223  // have to use global indices to determine which entries are
4224  // diagonal, or above or below the diagonal. However, lower or
4225  // upper triangularness is a local property.
4226  }
4227 
4228  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4229  void
4231  replaceColMap (const Teuchos::RCP<const map_type>& newColMap)
4232  {
4233  // NOTE: This safety check matches the code, but not the documentation of Crsgraph
4234  //
4235  // FIXME (mfh 18 Aug 2014) This will break if the calling process
4236  // has no entries, because in that case, currently it is neither
4237  // locally nor globally indexed. This will change once we get rid
4238  // of lazy allocation (so that the constructor allocates indices
4239  // and therefore commits to local vs. global).
4240  const char tfecfFuncName[] = "replaceColMap: ";
4241  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4242  isLocallyIndexed () || isGloballyIndexed (), std::runtime_error,
4243  "Requires matching maps and non-static graph.");
4244  colMap_ = newColMap;
4245  }
4246 
4247  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4248  void
4250  reindexColumns (const Teuchos::RCP<const map_type>& newColMap,
4251  const Teuchos::RCP<const import_type>& newImport,
4252  const bool sortIndicesInEachRow)
4253  {
4254  using Teuchos::REDUCE_MIN;
4255  using Teuchos::reduceAll;
4256  using Teuchos::RCP;
4257  typedef GlobalOrdinal GO;
4258  typedef LocalOrdinal LO;
4259  typedef typename local_graph_type::entries_type::non_const_type col_inds_type;
4260  const char tfecfFuncName[] = "reindexColumns: ";
4261 
4262  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4263  isFillComplete (), std::runtime_error, "The graph is fill complete "
4264  "(isFillComplete() returns true). You must call resumeFill() before "
4265  "you may call this method.");
4266 
4267  // mfh 19 Aug 2014: This method does NOT redistribute data; it
4268  // doesn't claim to do the work of an Import or Export. This
4269  // means that for all processes, the calling process MUST own all
4270  // column indices, in both the old column Map (if it exists) and
4271  // the new column Map. We check this via an all-reduce.
4272  //
4273  // Some processes may be globally indexed, others may be locally
4274  // indexed, and others (that have no graph entries) may be
4275  // neither. This method will NOT change the graph's current
4276  // state. If it's locally indexed, it will stay that way, and
4277  // vice versa. It would easy to add an option to convert indices
4278  // from global to local, so as to save a global-to-local
4279  // conversion pass. However, we don't do this here. The intended
4280  // typical use case is that the graph already has a column Map and
4281  // is locally indexed, and this is the case for which we optimize.
4282 
4283  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
4284 
4285  // Attempt to convert indices to the new column Map's version of
4286  // local. This will fail if on the calling process, the graph has
4287  // indices that are not on that process in the new column Map.
4288  // After the local conversion attempt, we will do an all-reduce to
4289  // see if any processes failed.
4290 
4291  // If this is false, then either the graph contains a column index
4292  // which is invalid in the CURRENT column Map, or the graph is
4293  // locally indexed but currently has no column Map. In either
4294  // case, there is no way to convert the current local indices into
4295  // global indices, so that we can convert them into the new column
4296  // Map's local indices. It's possible for this to be true on some
4297  // processes but not others, due to replaceColMap.
4298  bool allCurColIndsValid = true;
4299  // On the calling process, are all valid current column indices
4300  // also in the new column Map on the calling process? In other
4301  // words, does local reindexing suffice, or should the user have
4302  // done an Import or Export instead?
4303  bool localSuffices = true;
4304 
4305  // Final arrays for the local indices. We will allocate exactly
4306  // one of these ONLY if the graph is locally indexed on the
4307  // calling process, and ONLY if the graph has one or more entries
4308  // (is not empty) on the calling process. In that case, we
4309  // allocate the first (1-D storage) if the graph has a static
4310  // profile, else we allocate the second (2-D storage).
4311  typename local_graph_type::entries_type::non_const_type newLclInds1D;
4312  Teuchos::ArrayRCP<Teuchos::Array<LO> > newLclInds2D;
4313 
4314  // If indices aren't allocated, that means the calling process
4315  // owns no entries in the graph. Thus, there is nothing to
4316  // convert, and it trivially succeeds locally.
4317  if (indicesAreAllocated ()) {
4318  if (isLocallyIndexed ()) {
4319  if (hasColMap ()) { // locally indexed, and currently has a column Map
4320  const map_type& oldColMap = * (getColMap ());
4321  if (pftype_ == StaticProfile) {
4322  // Allocate storage for the new local indices.
4323  const size_t allocSize = this->getNodeAllocationSize ();
4324  newLclInds1D = col_inds_type ("Tpetra::CrsGraph::ind", allocSize);
4325  // Attempt to convert the new indices locally.
4326  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4327  const RowInfo rowInfo = this->getRowInfo (lclRow);
4328  const size_t beg = rowInfo.offset1D;
4329  const size_t end = beg + rowInfo.numEntries;
4330  for (size_t k = beg; k < end; ++k) {
4331  // FIXME (mfh 21 Aug 2014) This assumes UVM. Should
4332  // use a DualView instead.
4333  const LO oldLclCol = k_lclInds1D_(k);
4334  if (oldLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4335  allCurColIndsValid = false;
4336  break; // Stop at the first invalid index
4337  }
4338  const GO gblCol = oldColMap.getGlobalElement (oldLclCol);
4339 
4340  // The above conversion MUST succeed. Otherwise, the
4341  // current local index is invalid, which means that
4342  // the graph was constructed incorrectly.
4343  if (gblCol == Teuchos::OrdinalTraits<GO>::invalid ()) {
4344  allCurColIndsValid = false;
4345  break; // Stop at the first invalid index
4346  }
4347  else {
4348  const LO newLclCol = newColMap->getLocalElement (gblCol);
4349  if (newLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4350  localSuffices = false;
4351  break; // Stop at the first invalid index
4352  }
4353  // FIXME (mfh 21 Aug 2014) This assumes UVM. Should
4354  // use a DualView instead.
4355  newLclInds1D(k) = newLclCol;
4356  }
4357  } // for each entry in the current row
4358  } // for each locally owned row
4359  }
4360  else { // pftype_ == DynamicProfile
4361  // Allocate storage for the new local indices. We only
4362  // allocate the outer array here; we will allocate the
4363  // inner arrays below.
4364  newLclInds2D = Teuchos::arcp<Teuchos::Array<LO> > (lclNumRows);
4365 
4366  // Attempt to convert the new indices locally.
4367  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4368  const RowInfo rowInfo = this->getRowInfo (lclRow);
4369  newLclInds2D.resize (rowInfo.allocSize);
4370 
4371  Teuchos::ArrayView<const LO> oldLclRowView = getLocalView (rowInfo);
4372  Teuchos::ArrayView<LO> newLclRowView = (newLclInds2D[lclRow]) ();
4373 
4374  for (size_t k = 0; k < rowInfo.numEntries; ++k) {
4375  const LO oldLclCol = oldLclRowView[k];
4376  if (oldLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4377  allCurColIndsValid = false;
4378  break; // Stop at the first invalid index
4379  }
4380  const GO gblCol = oldColMap.getGlobalElement (oldLclCol);
4381 
4382  // The above conversion MUST succeed. Otherwise, the
4383  // local index is invalid and the graph is wrong.
4384  if (gblCol == Teuchos::OrdinalTraits<GO>::invalid ()) {
4385  allCurColIndsValid = false;
4386  break; // Stop at the first invalid index
4387  }
4388  else {
4389  const LO newLclCol = newColMap->getLocalElement (gblCol);
4390  if (newLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4391  localSuffices = false;
4392  break; // Stop at the first invalid index.
4393  }
4394  newLclRowView[k] = newLclCol;
4395  }
4396  } // for each entry in the current row
4397  } // for each locally owned row
4398  } // pftype_
4399  }
4400  else { // locally indexed, but no column Map
4401  // This case is only possible if replaceColMap() was called
4402  // with a null argument on the calling process. It's
4403  // possible, but it means that this method can't possibly
4404  // succeed, since we have no way of knowing how to convert
4405  // the current local indices to global indices.
4406  allCurColIndsValid = false;
4407  }
4408  }
4409  else { // globally indexed
4410  // If the graph is globally indexed, we don't need to save
4411  // local indices, but we _do_ need to know whether the current
4412  // global indices are valid in the new column Map. We may
4413  // need to do a getRemoteIndexList call to find this out.
4414  //
4415  // In this case, it doesn't matter whether the graph currently
4416  // has a column Map. We don't need the old column Map to
4417  // convert from global indices to the _new_ column Map's local
4418  // indices. Furthermore, we can use the same code, whether
4419  // the graph is static or dynamic profile.
4420 
4421  // Test whether the current global indices are in the new
4422  // column Map on the calling process.
4423  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4424  const RowInfo rowInfo = this->getRowInfo (lclRow);
4425  Teuchos::ArrayView<const GO> oldGblRowView = getGlobalView (rowInfo);
4426  for (size_t k = 0; k < rowInfo.numEntries; ++k) {
4427  const GO gblCol = oldGblRowView[k];
4428  if (! newColMap->isNodeGlobalElement (gblCol)) {
4429  localSuffices = false;
4430  break; // Stop at the first invalid index
4431  }
4432  } // for each entry in the current row
4433  } // for each locally owned row
4434  } // locally or globally indexed
4435  } // whether indices are allocated
4436 
4437  // Do an all-reduce to check both possible error conditions.
4438  int lclSuccess[2];
4439  lclSuccess[0] = allCurColIndsValid ? 1 : 0;
4440  lclSuccess[1] = localSuffices ? 1 : 0;
4441  int gblSuccess[2];
4442  gblSuccess[0] = 0;
4443  gblSuccess[1] = 0;
4444  RCP<const Teuchos::Comm<int> > comm =
4445  getRowMap ().is_null () ? Teuchos::null : getRowMap ()->getComm ();
4446  if (! comm.is_null ()) {
4447  reduceAll<int, int> (*comm, REDUCE_MIN, 2, lclSuccess, gblSuccess);
4448  }
4449 
4450  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4451  gblSuccess[0] == 0, std::runtime_error, "It is not possible to continue."
4452  " The most likely reason is that the graph is locally indexed, but the "
4453  "column Map is missing (null) on some processes, due to a previous call "
4454  "to replaceColMap().");
4455 
4456  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4457  gblSuccess[1] == 0, std::runtime_error, "On some process, the graph "
4458  "contains column indices that are in the old column Map, but not in the "
4459  "new column Map (on that process). This method does NOT redistribute "
4460  "data; it does not claim to do the work of an Import or Export operation."
4461  " This means that for all processess, the calling process MUST own all "
4462  "column indices, in both the old column Map and the new column Map. In "
4463  "this case, you will need to do an Import or Export operation to "
4464  "redistribute data.");
4465 
4466  // Commit the results.
4467  if (isLocallyIndexed ()) {
4468  if (pftype_ == StaticProfile) {
4469  k_lclInds1D_ = newLclInds1D;
4470  } else { // dynamic profile
4471  lclInds2D_ = newLclInds2D;
4472  }
4473  // We've reindexed, so we don't know if the indices are sorted.
4474  //
4475  // FIXME (mfh 17 Sep 2014) It could make sense to check this,
4476  // since we're already going through all the indices above. We
4477  // could also sort each row in place; that way, we would only
4478  // have to make one pass over the rows.
4479  indicesAreSorted_ = false;
4480  if (sortIndicesInEachRow) {
4481  // NOTE (mfh 17 Sep 2014) The graph must be locally indexed in
4482  // order to call this method.
4483  //
4484  // FIXME (mfh 17 Sep 2014) This violates the strong exception
4485  // guarantee. It would be better to sort the new index arrays
4486  // before committing them.
4487  const bool sorted = false; // need to resort
4488  const bool merged = true; // no need to merge, since no dups
4489  this->sortAndMergeAllIndices (sorted, merged);
4490  }
4491  }
4492  colMap_ = newColMap;
4493 
4494  if (newImport.is_null ()) {
4495  // FIXME (mfh 19 Aug 2014) Should use the above all-reduce to
4496  // check whether the input Import is null on any process.
4497  //
4498  // If the domain Map hasn't been set yet, we can't compute a new
4499  // Import object. Leave it what it is; it should be null, but
4500  // it doesn't matter. If the domain Map _has_ been set, then
4501  // compute a new Import object if necessary.
4502  if (! domainMap_.is_null ()) {
4503  if (! domainMap_->isSameAs (* newColMap)) {
4504  importer_ = Teuchos::rcp (new import_type (domainMap_, newColMap));
4505  } else {
4506  importer_ = Teuchos::null; // don't need an Import
4507  }
4508  }
4509  } else {
4510  // The caller gave us an Import object. Assume that it's valid.
4511  importer_ = newImport;
4512  }
4513  }
4514 
4515 
4516  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4517  void
4519  replaceDomainMapAndImporter (const Teuchos::RCP<const map_type>& newDomainMap,
4520  const Teuchos::RCP<const import_type>& newImporter)
4521  {
4522  const char prefix[] = "Tpetra::CrsGraph::replaceDomainMapAndImporter: ";
4523  TEUCHOS_TEST_FOR_EXCEPTION(
4524  colMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4525  "this method unless the graph already has a column Map.");
4526  TEUCHOS_TEST_FOR_EXCEPTION(
4527  newDomainMap.is_null (), std::invalid_argument,
4528  prefix << "The new domain Map must be nonnull.");
4529 
4530 #ifdef HAVE_TPETRA_DEBUG
4531  if (newImporter.is_null ()) {
4532  // It's not a good idea to put expensive operations in a macro
4533  // clause, even if they are side effect - free, because macros
4534  // don't promise that they won't evaluate their arguments more
4535  // than once. It's polite for them to do so, but not required.
4536  const bool colSameAsDom = colMap_->isSameAs (*newDomainMap);
4537  TEUCHOS_TEST_FOR_EXCEPTION(
4538  colSameAsDom, std::invalid_argument, "If the new Import is null, "
4539  "then the new domain Map must be the same as the current column Map.");
4540  }
4541  else {
4542  const bool colSameAsTgt =
4543  colMap_->isSameAs (* (newImporter->getTargetMap ()));
4544  const bool newDomSameAsSrc =
4545  newDomainMap->isSameAs (* (newImporter->getSourceMap ()));
4546  TEUCHOS_TEST_FOR_EXCEPTION(
4547  ! colSameAsTgt || ! newDomSameAsSrc, std::invalid_argument, "If the "
4548  "new Import is nonnull, then the current column Map must be the same "
4549  "as the new Import's target Map, and the new domain Map must be the "
4550  "same as the new Import's source Map.");
4551  }
4552 #endif // HAVE_TPETRA_DEBUG
4553 
4554  domainMap_ = newDomainMap;
4555  importer_ = Teuchos::rcp_const_cast<import_type> (newImporter);
4556  }
4557 
4558  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4562  {
4563  return lclGraph_;
4564  }
4565 
4566  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4567  void
4570  {
4571  using ::Tpetra::Details::ProfilingRegion;
4572  using Teuchos::ArrayView;
4573  using Teuchos::outArg;
4574  using Teuchos::reduceAll;
4575  typedef global_size_t GST;
4576  ProfilingRegion regionCGC ("Tpetra::CrsGraph::computeGlobalConstants");
4577 
4578  // Short circuit
4579  if(haveGlobalConstants_) return;
4580 
4581 #ifdef HAVE_TPETRA_DEBUG
4582  TEUCHOS_TEST_FOR_EXCEPTION(! hasColMap(), std::logic_error, "Tpetra::"
4583  "CrsGraph::computeGlobalConstants: At this point, the graph should have "
4584  "a column Map, but it does not. Please report this bug to the Tpetra "
4585  "developers.");
4586 #endif // HAVE_TPETRA_DEBUG
4587 
4588  if (! haveLocalConstants_) {
4589  computeLocalConstants();
4590  haveLocalConstants_ = true;
4591  } // if my process doesn't have local constants
4592 
4593  // Compute global constants from local constants. Processes that
4594  // already have local constants still participate in the
4595  // all-reduces, using their previously computed values.
4596  if (haveGlobalConstants_ == false) {
4597  // Promote all the nodeNum* and nodeMaxNum* quantities from
4598  // size_t to global_size_t, when doing the all-reduces for
4599  // globalNum* / globalMaxNum* results.
4600  //
4601  // FIXME (mfh 07 May 2013) Unfortunately, we either have to do
4602  // this in two all-reduces (one for the sum and the other for
4603  // the max), or use a custom MPI_Op that combines the sum and
4604  // the max. The latter might even be slower than two
4605  // all-reduces on modern network hardware. It would also be a
4606  // good idea to use nonblocking all-reduces (MPI 3), so that we
4607  // don't have to wait around for the first one to finish before
4608  // starting the second one.
4609  GST lcl[2], gbl[2];
4610  lcl[0] = static_cast<GST> (nodeNumEntries_);
4611  lcl[1] = static_cast<GST> (nodeNumDiags_);
4612  reduceAll<int,GST> (*getComm (), Teuchos::REDUCE_SUM,
4613  2, lcl, gbl);
4614  globalNumEntries_ = gbl[0];
4615  globalNumDiags_ = gbl[1];
4616  reduceAll<int,GST> (*getComm (), Teuchos::REDUCE_MAX,
4617  static_cast<GST> (nodeMaxNumRowEntries_),
4618  outArg (globalMaxNumRowEntries_));
4619  haveGlobalConstants_ = true;
4620  }
4621  }
4622 
4623 
4624  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4625  void
4628  {
4629  using ::Tpetra::Details::ProfilingRegion;
4630  using Teuchos::ArrayView;
4631  using Teuchos::outArg;
4632  using Teuchos::reduceAll;
4633  typedef LocalOrdinal LO;
4634  typedef GlobalOrdinal GO;
4635  ProfilingRegion regionCLC ("Tpetra::CrsGraph::computeLocalConstants");
4636 
4637  // Short circuit
4638  if(haveGlobalConstants_) return;
4639 
4640 #ifdef HAVE_TPETRA_DEBUG
4641  TEUCHOS_TEST_FOR_EXCEPTION(! hasColMap(), std::logic_error, "Tpetra::"
4642  "CrsGraph::computeLocalConstants: At this point, the graph should have "
4643  "a column Map, but it does not. Please report this bug to the Tpetra "
4644  "developers.");
4645 #endif // HAVE_TPETRA_DEBUG
4646 
4647  // If necessary, (re)compute the local constants: nodeNumDiags_,
4648  // lowerTriangular_, upperTriangular_, and nodeMaxNumRowEntries_.
4649  if (! haveLocalConstants_) {
4650  // We have actually already computed nodeNumEntries_.
4651  // nodeNumEntries_ gets updated by methods that insert or remove
4652  // indices (including setAllIndices and
4653  // expertStaticFillComplete). Before fillComplete, its count
4654  // may include duplicate column indices in the same row.
4655  // However, mergeRowIndices and
4656  // CrsMatrix::mergeRowIndicesAndValues both subtract off merged
4657  // indices in each row from the total count. Thus,
4658  // nodeNumEntries_ _should_ be accurate at this point, meaning
4659  // that we don't have to re-count it here.
4660 
4661  // Reset local properties
4662  upperTriangular_ = true;
4663  lowerTriangular_ = true;
4664  nodeMaxNumRowEntries_ = 0;
4665  nodeNumDiags_ = 0;
4666 
4667  // At this point, we know that we have both a row Map and a column Map.
4668  const map_type& rowMap = *rowMap_;
4669  const map_type& colMap = *colMap_;
4670 
4671  // Go through all the entries of the graph. Count the number of
4672  // diagonal elements we encounter, and figure out whether the
4673  // graph is lower or upper triangular. Diagonal elements are
4674  // determined using global indices, with respect to the whole
4675  // graph. However, lower or upper triangularity is a local
4676  // property, and is determined using local indices.
4677  //
4678  // At this point, indices have already been sorted in each row.
4679  // That makes finding out whether the graph is lower / upper
4680  // triangular easier.
4681  if (this->indicesAreAllocated () && this->hasRowInfo ()) {
4682  const LO numLocalRows = static_cast<LO> (this->getNodeNumRows ());
4683  for (LO localRow = 0; localRow < numLocalRows; ++localRow) {
4684  const GO globalRow = rowMap.getGlobalElement (localRow);
4685  // Find the local (column) index for the diagonal entry.
4686  // This process might not necessarily own _any_ entries in
4687  // the current row. If it doesn't, skip this row. It won't
4688  // affect any of the attributes (nodeNumDiagons_,
4689  // upperTriangular_, lowerTriangular_, or
4690  // nodeMaxNumRowEntries_) which this loop sets.
4691  const LO rlcid = colMap.getLocalElement (globalRow);
4692  // This process owns one or more entries in the current row.
4693  const RowInfo rowInfo = this->getRowInfo (localRow);
4694  ArrayView<const LO> rview = this->getLocalView (rowInfo);
4695  typename ArrayView<const LO>::iterator beg, end, cur;
4696  beg = rview.begin();
4697  end = beg + rowInfo.numEntries;
4698  if (beg != end) {
4699  for (cur = beg; cur != end; ++cur) {
4700  // is this the diagonal?
4701  if (rlcid == *cur) ++nodeNumDiags_;
4702  }
4703  // Local column indices are sorted in each row. That means
4704  // the smallest column index in this row (on this process)
4705  // is *beg, and the largest column index in this row (on
4706  // this process) is *(end - 1). We know that end - 1 is
4707  // valid because beg != end.
4708  const size_t smallestCol = static_cast<size_t> (*beg);
4709  const size_t largestCol = static_cast<size_t> (*(end - 1));
4710 
4711  if (smallestCol < static_cast<size_t> (localRow)) {
4712  upperTriangular_ = false;
4713  }
4714  if (static_cast<size_t> (localRow) < largestCol) {
4715  lowerTriangular_ = false;
4716  }
4717  }
4718  // Update the max number of entries over all rows.
4719  nodeMaxNumRowEntries_ = std::max (nodeMaxNumRowEntries_, rowInfo.numEntries);
4720  }
4721  }
4722  haveLocalConstants_ = true;
4723  } // if my process doesn't have local constants
4724  }
4725 
4726 
4727  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4728  void
4731  {
4732  using ::Tpetra::Details::ProfilingRegion;
4733  using Teuchos::arcp;
4734  using Teuchos::Array;
4735  typedef LocalOrdinal LO;
4736  typedef GlobalOrdinal GO;
4737  typedef device_type DT;
4738  typedef typename local_graph_type::row_map_type::non_const_value_type offset_type;
4739  typedef decltype (k_numRowEntries_) row_entries_type;
4740  typedef typename row_entries_type::non_const_value_type num_ent_type;
4741  typedef typename local_graph_type::entries_type::non_const_type
4742  lcl_col_inds_type;
4743  typedef Kokkos::View<GO*, typename lcl_col_inds_type::array_layout,
4744  device_type> gbl_col_inds_type;
4745  const char tfecfFuncName[] = "makeIndicesLocal: ";
4746  ProfilingRegion regionMakeIndicesLocal ("Tpetra::CrsGraph::makeIndicesLocal");
4747 
4748  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4749  (! this->hasColMap (), std::logic_error, "The graph does not have a "
4750  "column Map yet. This method should never be called in that case. "
4751  "Please report this bug to the Tpetra developers.");
4752  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4753  (this->getColMap ().is_null (), std::logic_error, "The graph claims "
4754  "that it has a column Map, because hasColMap() returns true. However, "
4755  "the result of getColMap() is null. This should never happen. Please "
4756  "report this bug to the Tpetra developers.");
4757 
4758  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
4759  const map_type& colMap = * (this->getColMap ());
4760 
4761  if (isGloballyIndexed () && lclNumRows != 0) {
4762  // This is a host View.
4763  typename row_entries_type::const_type h_numRowEnt = k_numRowEntries_;
4764 
4765  // allocate data for local indices
4766  if (getProfileType () == StaticProfile) {
4767  // If GO and LO are the same size, we can reuse the existing
4768  // array of 1-D index storage to convert column indices from
4769  // GO to LO. Otherwise, we'll just allocate a new buffer.
4770  constexpr bool LO_GO_same = std::is_same<LO, GO>::value;
4771  if (LO_GO_same) {
4772  // This prevents a build error (illegal assignment) if
4773  // LO_GO_same is _not_ true. Only the first branch
4774  // (returning k_gblInds1D_) should ever get taken.
4775  k_lclInds1D_ = Kokkos::Impl::if_c<LO_GO_same,
4777  lcl_col_inds_type>::select (k_gblInds1D_, k_lclInds1D_);
4778  }
4779  else {
4780  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4781  (k_rowPtrs_.dimension_0 () == 0, std::logic_error,
4782  "k_rowPtrs_.dimension_0() == 0. This should never happen at this "
4783  "point. Please report this bug to the Tpetra developers.");
4784 
4785  const auto numEnt = Details::getEntryOnHost (k_rowPtrs_, lclNumRows);
4786 
4787  // mfh 17 Dec 2016: We don't need initial zero-fill of
4788  // k_lclInds1D_, because we will fill it below anyway.
4789  // AllowPadding would only help for aligned access (e.g.,
4790  // for vectorization) if we also were to pad each row to the
4791  // same alignment, so we'll skip AllowPadding for now.
4792 
4793  // using Kokkos::AllowPadding;
4794  using Kokkos::view_alloc;
4795  using Kokkos::WithoutInitializing;
4796 
4797  // When giving the label as an argument to
4798  // Kokkos::view_alloc, the label must be a string and not a
4799  // char*, else the code won't compile. This is because
4800  // view_alloc also allows a raw pointer as its first
4801  // argument. See
4802  // https://github.com/kokkos/kokkos/issues/434. This is a
4803  // large allocation typically, so the overhead of creating
4804  // an std::string is minor.
4805  const std::string label ("Tpetra::CrsGraph::lclind");
4806  k_lclInds1D_ =
4807  lcl_col_inds_type (view_alloc (label, WithoutInitializing), numEnt);
4808  }
4809 
4810  auto lclColMap = colMap.getLocalMap ();
4811  // This is a "device mirror" of the host View h_numRowEnt.
4812  //
4813  // NOTE (mfh 27 Sep 2016) Currently, the right way to get a
4814  // Device instance is to use its default constructor. See the
4815  // following Kokkos issue:
4816  //
4817  // https://github.com/kokkos/kokkos/issues/442
4818  auto k_numRowEnt = Kokkos::create_mirror_view (device_type (), h_numRowEnt);
4819 
4821  const size_t numBad =
4822  convertColumnIndicesFromGlobalToLocal<LO, GO, DT, offset_type, num_ent_type> (k_lclInds1D_,
4823  k_gblInds1D_,
4824  k_rowPtrs_,
4825  lclColMap,
4826  k_numRowEnt);
4827  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4828  (numBad != 0, std::runtime_error, "When converting column indices "
4829  "from global to local, we encountered " << numBad
4830  << "ind" << (numBad != static_cast<size_t> (1) ? "ices" : "ex")
4831  << " that do" << (numBad != static_cast<size_t> (1) ? "es" : "")
4832  << " not live in the column Map on this process.");
4833 
4834  // We've converted column indices from global to local, so we
4835  // can deallocate the global column indices (which we know are
4836  // in 1-D storage, because the graph has static profile).
4837  k_gblInds1D_ = gbl_col_inds_type ();
4838  }
4839  else { // the graph has dynamic profile (2-D index storage)
4840  lclInds2D_ = Teuchos::arcp<Teuchos::Array<LO> > (lclNumRows);
4841  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4842  if (! gblInds2D_[lclRow].empty ()) {
4843  const GO* const ginds = gblInds2D_[lclRow].getRawPtr ();
4844  // NOTE (mfh 26 Jun 2016) It's always legal to cast the
4845  // number of entries in a row to LO, as long as the row
4846  // doesn't have too many duplicate entries.
4847  const LO rna = static_cast<LO> (gblInds2D_[lclRow].size ());
4848  const LO numEnt = static_cast<LO> (h_numRowEnt(lclRow));
4849  lclInds2D_[lclRow].resize (rna);
4850  LO* const linds = lclInds2D_[lclRow].getRawPtr ();
4851  for (LO j = 0; j < numEnt; ++j) {
4852  const GO gid = ginds[j];
4853  const LO lid = colMap.getLocalElement (gid);
4854  linds[j] = lid;
4855 #ifdef HAVE_TPETRA_DEBUG
4856  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4857  (linds[j] == Teuchos::OrdinalTraits<LO>::invalid(),
4858  std::logic_error,
4859  "Global column ginds[j=" << j << "]=" << ginds[j]
4860  << " of local row " << lclRow << " is not in the column Map. "
4861  "This should never happen. Please report this bug to the "
4862  "Tpetra developers.");
4863 #endif // HAVE_TPETRA_DEBUG
4864  }
4865  }
4866  }
4867  gblInds2D_ = Teuchos::null;
4868  }
4869  } // globallyIndexed() && lclNumRows > 0
4870 
4871  lclGraph_ = local_graph_type (k_lclInds1D_, k_rowPtrs_);
4872  indicesAreLocal_ = true;
4873  indicesAreGlobal_ = false;
4874  checkInternalState ();
4875  }
4876 
4877 
4878  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4879  void
4882  {
4883  using ::Tpetra::Details::ProfilingRegion;
4884 #ifdef HAVE_TPETRA_DEBUG
4885  const char tfecfFuncName[] = "makeColMap: ";
4886 #endif // HAVE_TPETRA_DEBUG
4887  ProfilingRegion regionSortAndMerge ("Tpetra::CrsGraph::makeColMap");
4888 
4889  // this->colMap_ should be null at this point, but we accept the
4890  // future possibility that it might not be (esp. if we decide
4891  // later to support graph structure changes after first
4892  // fillComplete, which CrsGraph does not currently (as of 12 Feb
4893  // 2017) support).
4894  Teuchos::RCP<const map_type> colMap = this->colMap_;
4895  const bool sortEachProcsGids =
4896  this->sortGhostsAssociatedWithEachProcessor_;
4897 
4898  // FIXME (mfh 12 Feb 2017) Details::makeColMap returns a
4899  // per-process error code. If an error does occur on a process,
4900  // Details::makeColMap does NOT promise that all processes will
4901  // notice that error. This is the caller's responsibility. For
4902  // now, we only propagate (to all processes) and report the error
4903  // in a debug build. In the future, we need to add the
4904  // local/global error handling scheme used in BlockCrsMatrix to
4905  // this class.
4906 #ifdef HAVE_TPETRA_DEBUG
4907  using Teuchos::outArg;
4908  using Teuchos::REDUCE_MIN;
4909  using Teuchos::reduceAll;
4910 
4911  std::ostringstream errStrm;
4912  const int lclErrCode =
4913  Details::makeColMap (colMap, this->getDomainMap (), *this,
4914  sortEachProcsGids, &errStrm);
4915  auto comm = this->getComm ();
4916  if (! comm.is_null ()) {
4917  const int lclSuccess = (lclErrCode == 0) ? 1 : 0;
4918  int gblSuccess = 0; // output argument
4919  reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess,
4920  outArg (gblSuccess));
4921  if (gblSuccess != 1) {
4922  std::ostringstream os;
4923  Tpetra::Details::gathervPrint (os, errStrm.str (), *comm);
4924  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4925  (true, std::runtime_error,
4926  "makeColMap reports an error on at least one process."
4927  << std::endl << os.str ());
4928  }
4929  }
4930 #else // NOT HAVE_TPETRA_DEBUG
4931  (void) Details::makeColMap (colMap, this->getDomainMap (), *this,
4932  sortEachProcsGids, NULL);
4933 #endif // HAVE_TPETRA_DEBUG
4934  // See above. We want to admit the possibility of makeColMap
4935  // actually revising an existing column Map, even though that
4936  // doesn't currently (as of 10 May 2017) happen.
4937  this->colMap_ = colMap;
4938 
4939  checkInternalState ();
4940  }
4941 
4942 
4943  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4944  void
4946  sortAndMergeAllIndices (const bool sorted, const bool merged)
4947  {
4948  using ::Tpetra::Details::ProfilingRegion;
4949  typedef LocalOrdinal LO;
4950  typedef typename Kokkos::View<LO*, device_type>::HostMirror::execution_space
4951  host_execution_space;
4952  typedef Kokkos::RangePolicy<host_execution_space, LO> range_type;
4953  const char tfecfFuncName[] = "sortAndMergeAllIndices: ";
4954  ProfilingRegion regionSortAndMerge ("Tpetra::CrsGraph::sortAndMergeAllIndices");
4955 
4956  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4957  (this->isGloballyIndexed (), std::logic_error,
4958  "This method may only be called after makeIndicesLocal." );
4959 
4960  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4961  (! merged && this->isStorageOptimized (), std::logic_error,
4962  "The graph is already storage optimized, so we shouldn't be merging any "
4963  "indices. Please report this bug to the Tpetra developers.");
4964 
4965  if (! sorted || ! merged) {
4966  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
4967  size_t totalNumDups = 0;
4968  // FIXME (mfh 08 May 2017) This may assume CUDA UVM.
4969  Kokkos::parallel_reduce (range_type (0, lclNumRows),
4970  [this, sorted, merged] (const LO& lclRow, size_t& numDups) {
4971  const RowInfo rowInfo = this->getRowInfo (lclRow);
4972  if (! sorted) {
4973  this->sortRowIndices (rowInfo);
4974  }
4975  if (! merged) {
4976  numDups += this->mergeRowIndices (rowInfo);
4977  }
4978  }, totalNumDups);
4979  this->nodeNumEntries_ -= totalNumDups;
4980  this->indicesAreSorted_ = true; // we just sorted every row
4981  this->noRedundancies_ = true; // we just merged every row
4982  }
4983  }
4984 
4985 
4986  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4987  void
4990  {
4991  using ::Tpetra::Details::ProfilingRegion;
4992  using Teuchos::ParameterList;
4993  using Teuchos::RCP;
4994  using Teuchos::rcp;
4995  const char tfecfFuncName[] = "makeImportExport: ";
4996  ProfilingRegion regionMIE ("Tpetra::CrsGraph::makeImportExport");
4997 
4998  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4999  (! this->hasColMap (), std::logic_error,
5000  "This method may not be called unless the graph has a column Map.");
5001  RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
5002 
5003  // Don't do any checks to see if we need to create the Import, if
5004  // it exists already.
5005  //
5006  // FIXME (mfh 25 Mar 2013) This will become incorrect if we
5007  // change CrsGraph in the future to allow changing the column
5008  // Map after fillComplete. For now, the column Map is fixed
5009  // after the first fillComplete call.
5010  if (importer_.is_null ()) {
5011  // Create the Import instance if necessary.
5012  if (domainMap_ != colMap_ && (! domainMap_->isSameAs (*colMap_))) {
5013  if (params.is_null () || ! params->isSublist ("Import")) {
5014  importer_ = rcp (new import_type (domainMap_, colMap_));
5015  } else {
5016  RCP<ParameterList> importSublist = sublist (params, "Import", true);
5017  importer_ = rcp (new import_type (domainMap_, colMap_, importSublist));
5018  }
5019  }
5020  }
5021 
5022  // Don't do any checks to see if we need to create the Export, if
5023  // it exists already.
5024  if (exporter_.is_null ()) {
5025  // Create the Export instance if necessary.
5026  if (rangeMap_ != rowMap_ && ! rangeMap_->isSameAs (*rowMap_)) {
5027  if (params.is_null () || ! params->isSublist ("Export")) {
5028  exporter_ = rcp (new export_type (rowMap_, rangeMap_));
5029  }
5030  else {
5031  RCP<ParameterList> exportSublist = sublist (params, "Export", true);
5032  exporter_ = rcp (new export_type (rowMap_, rangeMap_, exportSublist));
5033  }
5034  }
5035  }
5036  }
5037 
5038 
5039  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5040  std::string
5043  {
5044  std::ostringstream oss;
5045  oss << dist_object_type::description ();
5046  if (isFillComplete ()) {
5047  oss << "{status = fill complete"
5048  << ", global rows = " << getGlobalNumRows()
5049  << ", global cols = " << getGlobalNumCols()
5050  << ", global num entries = " << getGlobalNumEntries()
5051  << "}";
5052  }
5053  else {
5054  oss << "{status = fill not complete"
5055  << ", global rows = " << getGlobalNumRows()
5056  << "}";
5057  }
5058  return oss.str();
5059  }
5060 
5061 
5062  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5063  void
5065  describe (Teuchos::FancyOStream &out,
5066  const Teuchos::EVerbosityLevel verbLevel) const
5067  {
5068  const char tfecfFuncName[] = "describe()";
5069  using Teuchos::ArrayView;
5070  using Teuchos::Comm;
5071  using Teuchos::RCP;
5072  using Teuchos::VERB_DEFAULT;
5073  using Teuchos::VERB_NONE;
5074  using Teuchos::VERB_LOW;
5075  using Teuchos::VERB_MEDIUM;
5076  using Teuchos::VERB_HIGH;
5077  using Teuchos::VERB_EXTREME;
5078  using std::endl;
5079  using std::setw;
5080 
5081  Teuchos::EVerbosityLevel vl = verbLevel;
5082  if (vl == VERB_DEFAULT) vl = VERB_LOW;
5083  RCP<const Comm<int> > comm = this->getComm();
5084  const int myImageID = comm->getRank(),
5085  numImages = comm->getSize();
5086  size_t width = 1;
5087  for (size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
5088  ++width;
5089  }
5090  width = std::max<size_t> (width, static_cast<size_t> (11)) + 2;
5091  Teuchos::OSTab tab (out);
5092  // none: print nothing
5093  // low: print O(1) info from node 0
5094  // medium: print O(P) info, num entries per node
5095  // high: print O(N) info, num entries per row
5096  // extreme: print O(NNZ) info: print graph indices
5097  //
5098  // for medium and higher, print constituent objects at specified verbLevel
5099  if (vl != VERB_NONE) {
5100  if (myImageID == 0) out << this->description() << std::endl;
5101  // O(1) globals, minus what was already printed by description()
5102  if (isFillComplete() && myImageID == 0) {
5103  out << "Global number of diagonals = " << globalNumDiags_ << std::endl;
5104  out << "Global max number of row entries = " << globalMaxNumRowEntries_ << std::endl;
5105  }
5106  // constituent objects
5107  if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
5108  if (myImageID == 0) out << "\nRow map: " << std::endl;
5109  rowMap_->describe(out,vl);
5110  if (colMap_ != Teuchos::null) {
5111  if (myImageID == 0) out << "\nColumn map: " << std::endl;
5112  colMap_->describe(out,vl);
5113  }
5114  if (domainMap_ != Teuchos::null) {
5115  if (myImageID == 0) out << "\nDomain map: " << std::endl;
5116  domainMap_->describe(out,vl);
5117  }
5118  if (rangeMap_ != Teuchos::null) {
5119  if (myImageID == 0) out << "\nRange map: " << std::endl;
5120  rangeMap_->describe(out,vl);
5121  }
5122  }
5123  // O(P) data
5124  if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
5125  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
5126  if (myImageID == imageCtr) {
5127  out << "Node ID = " << imageCtr << std::endl
5128  << "Node number of entries = " << nodeNumEntries_ << std::endl
5129  << "Node number of diagonals = " << nodeNumDiags_ << std::endl
5130  << "Node max number of entries = " << nodeMaxNumRowEntries_ << std::endl;
5131  if (! indicesAreAllocated ()) {
5132  out << "Indices are not allocated." << std::endl;
5133  }
5134  }
5135  comm->barrier();
5136  comm->barrier();
5137  comm->barrier();
5138  }
5139  }
5140  // O(N) and O(NNZ) data
5141  if (vl == VERB_HIGH || vl == VERB_EXTREME) {
5142  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5143  ! hasRowInfo (), std::runtime_error, ": reduce verbosity level; "
5144  "graph row information was deleted at fillComplete().");
5145  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
5146  if (myImageID == imageCtr) {
5147  out << std::setw(width) << "Node ID"
5148  << std::setw(width) << "Global Row"
5149  << std::setw(width) << "Num Entries";
5150  if (vl == VERB_EXTREME) {
5151  out << " Entries";
5152  }
5153  out << std::endl;
5154  const LocalOrdinal lclNumRows =
5155  static_cast<LocalOrdinal> (this->getNodeNumRows ());
5156  for (LocalOrdinal r=0; r < lclNumRows; ++r) {
5157  const RowInfo rowinfo = this->getRowInfo (r);
5158  GlobalOrdinal gid = rowMap_->getGlobalElement(r);
5159  out << std::setw(width) << myImageID
5160  << std::setw(width) << gid
5161  << std::setw(width) << rowinfo.numEntries;
5162  if (vl == VERB_EXTREME) {
5163  out << " ";
5164  if (isGloballyIndexed()) {
5165  ArrayView<const GlobalOrdinal> rowview = getGlobalView(rowinfo);
5166  for (size_t j=0; j < rowinfo.numEntries; ++j) out << rowview[j] << " ";
5167  }
5168  else if (isLocallyIndexed()) {
5169  ArrayView<const LocalOrdinal> rowview = getLocalView(rowinfo);
5170  for (size_t j=0; j < rowinfo.numEntries; ++j) out << colMap_->getGlobalElement(rowview[j]) << " ";
5171  }
5172  }
5173  out << std::endl;
5174  }
5175  }
5176  comm->barrier();
5177  comm->barrier();
5178  comm->barrier();
5179  }
5180  }
5181  }
5182  }
5183 
5184 
5185  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5186  bool
5189  {
5190  (void) source; // forestall "unused variable" compiler warnings
5191 
5192  // It's not clear what kind of compatibility checks on sizes can
5193  // be performed here. Epetra_CrsGraph doesn't check any sizes for
5194  // compatibility.
5195  return true;
5196  }
5197 
5198 
5199  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5200  void
5203  size_t numSameIDs,
5204  const Teuchos::ArrayView<const LocalOrdinal> &permuteToLIDs,
5205  const Teuchos::ArrayView<const LocalOrdinal> &permuteFromLIDs)
5206  {
5207  using Teuchos::Array;
5208  using Teuchos::ArrayView;
5209  typedef LocalOrdinal LO;
5210  typedef GlobalOrdinal GO;
5211  const char tfecfFuncName[] = "copyAndPermute";
5213  typedef RowGraph<LO, GO, node_type> row_graph_type;
5214 
5215  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5216  permuteToLIDs.size() != permuteFromLIDs.size(), std::runtime_error,
5217  ": permuteToLIDs and permuteFromLIDs must have the same size.");
5218  // Make sure that the source object has the right type. We only
5219  // actually need it to be a RowGraph, with matching first three
5220  // template parameters. If it's a CrsGraph, we can use view mode
5221  // instead of copy mode to get each row's data.
5222  //
5223  // FIXME (mfh 07 Jul 2013) It should not be necessary for any of
5224  // the template parameters but GO to match. GO has to match
5225  // because the graph has to send indices as global ordinals, if
5226  // the source and target graphs do not have the same column Map.
5227  // If LO doesn't match, the graphs could communicate using global
5228  // indices. It could be possible that Node affects the graph's
5229  // storage format, but packAndPrepare should assume a common
5230  // communication format in any case.
5231  const row_graph_type* srcRowGraph = dynamic_cast<const row_graph_type*> (&source);
5232  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5233  srcRowGraph == NULL, std::invalid_argument,
5234  ": The source object must be a RowGraph with matching first three "
5235  "template parameters.");
5236 
5237  // If the source object is actually a CrsGraph, we can use view
5238  // mode instead of copy mode to access the entries in each row,
5239  // if the graph is not fill complete.
5240  const this_type* srcCrsGraph = dynamic_cast<const this_type*> (&source);
5241 
5242  const map_type& srcRowMap = * (srcRowGraph->getRowMap ());
5243  const map_type& tgtRowMap = * (this->getRowMap ());
5244  const bool src_filled = srcRowGraph->isFillComplete ();
5245  Array<GO> row_copy;
5246  LO myid = 0;
5247 
5248  //
5249  // "Copy" part of "copy and permute."
5250  //
5251  if (src_filled || srcCrsGraph == NULL) {
5252  // If the source graph is fill complete, we can't use view mode,
5253  // because the data might be stored in a different format not
5254  // compatible with the expectations of view mode. Also, if the
5255  // source graph is not a CrsGraph, we can't use view mode,
5256  // because RowGraph only provides copy mode access to the data.
5257  for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
5258  const GO gid = srcRowMap.getGlobalElement (myid);
5259  size_t row_length = srcRowGraph->getNumEntriesInGlobalRow (gid);
5260  row_copy.resize (row_length);
5261  size_t check_row_length = 0;
5262  srcRowGraph->getGlobalRowCopy (gid, row_copy (), check_row_length);
5263  this->insertGlobalIndices (gid, row_copy ());
5264  }
5265  } else {
5266  for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
5267  const GO gid = srcRowMap.getGlobalElement (myid);
5268  ArrayView<const GO> row;
5269  srcCrsGraph->getGlobalRowView (gid, row);
5270  this->insertGlobalIndices (gid, row);
5271  }
5272  }
5273 
5274  //
5275  // "Permute" part of "copy and permute."
5276  //
5277  if (src_filled || srcCrsGraph == NULL) {
5278  for (LO i = 0; i < permuteToLIDs.size (); ++i) {
5279  const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs[i]);
5280  const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs[i]);
5281  size_t row_length = srcRowGraph->getNumEntriesInGlobalRow (srcgid);
5282  row_copy.resize (row_length);
5283  size_t check_row_length = 0;
5284  srcRowGraph->getGlobalRowCopy (srcgid, row_copy (), check_row_length);
5285  this->insertGlobalIndices (mygid, row_copy ());
5286  }
5287  } else {
5288  for (LO i = 0; i < permuteToLIDs.size (); ++i) {
5289  const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs[i]);
5290  const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs[i]);
5291  ArrayView<const GO> row;
5292  srcCrsGraph->getGlobalRowView (srcgid, row);
5293  this->insertGlobalIndices (mygid, row);
5294  }
5295  }
5296  }
5297 
5298 
5299  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5300  void
5302  packAndPrepare (const SrcDistObject& source,
5303  const Teuchos::ArrayView<const LocalOrdinal> &exportLIDs,
5304  Teuchos::Array<GlobalOrdinal> &exports,
5305  const Teuchos::ArrayView<size_t> & numPacketsPerLID,
5306  size_t& constantNumPackets,
5307  Distributor &distor)
5308  {
5309  using Teuchos::Array;
5310  const char tfecfFuncName[] = "packAndPrepare";
5311  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5312  exportLIDs.size() != numPacketsPerLID.size(), std::runtime_error,
5313  ": exportLIDs and numPacketsPerLID must have the same size.");
5314  typedef RowGraph<LocalOrdinal, GlobalOrdinal, node_type> row_graph_type;
5315  const row_graph_type& srcGraph = dynamic_cast<const row_graph_type&> (source);
5316 
5317  // We don't check whether src_graph has had fillComplete called,
5318  // because it doesn't matter whether the *source* graph has been
5319  // fillComplete'd. The target graph can not be fillComplete'd yet.
5320  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5321  this->isFillComplete (), std::runtime_error,
5322  ": The target graph of an Import or Export must not be fill complete.");
5323 
5324  srcGraph.pack (exportLIDs, exports, numPacketsPerLID, constantNumPackets, distor);
5325  }
5326 
5327 
5328  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5329  void
5331  pack (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5332  Teuchos::Array<GlobalOrdinal>& exports,
5333  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5334  size_t& constantNumPackets,
5335  Distributor& distor) const
5336  {
5337  using Teuchos::Array;
5338  typedef LocalOrdinal LO;
5339  typedef GlobalOrdinal GO;
5340  const char tfecfFuncName[] = "pack";
5341  (void) distor; // forestall "unused argument" compiler warning
5342 
5343  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5344  exportLIDs.size() != numPacketsPerLID.size(), std::runtime_error,
5345  ": exportLIDs and numPacketsPerLID must have the same size.");
5346 
5347  const map_type& srcMap = * (this->getMap ());
5348  constantNumPackets = 0;
5349 
5350  // Set numPacketsPerLID[i] to the number of entries owned by the
5351  // calling process in (local) row exportLIDs[i] of the graph, that
5352  // the caller wants us to send out. Compute the total number of
5353  // packets (that is, entries) owned by this process in all the
5354  // rows that the caller wants us to send out.
5355  size_t totalNumPackets = 0;
5356  Array<GO> row;
5357  for (LO i = 0; i < exportLIDs.size (); ++i) {
5358  const GO GID = srcMap.getGlobalElement (exportLIDs[i]);
5359  size_t row_length = this->getNumEntriesInGlobalRow (GID);
5360  numPacketsPerLID[i] = row_length;
5361  totalNumPackets += row_length;
5362  }
5363 
5364  exports.resize (totalNumPackets);
5365 
5366  // Loop again over the rows to export, and pack rows of indices
5367  // into the output buffer.
5368  size_t exportsOffset = 0;
5369  for (LO i = 0; i < exportLIDs.size (); ++i) {
5370  const GO GID = srcMap.getGlobalElement (exportLIDs[i]);
5371  size_t row_length = this->getNumEntriesInGlobalRow (GID);
5372  row.resize (row_length);
5373  size_t check_row_length = 0;
5374  this->getGlobalRowCopy (GID, row (), check_row_length);
5375  typename Array<GO>::const_iterator row_iter = row.begin();
5376  typename Array<GO>::const_iterator row_end = row.end();
5377  size_t j = 0;
5378  for (; row_iter != row_end; ++row_iter, ++j) {
5379  exports[exportsOffset+j] = *row_iter;
5380  }
5381  exportsOffset += row.size();
5382  }
5383  }
5384 
5385 
5386  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5387  void
5389  unpackAndCombine (const Teuchos::ArrayView<const LocalOrdinal> &importLIDs,
5390  const Teuchos::ArrayView<const GlobalOrdinal> &imports,
5391  const Teuchos::ArrayView<size_t> &numPacketsPerLID,
5392  size_t constantNumPackets,
5393  Distributor& /* distor */,
5394  CombineMode /* CM */)
5395  {
5396  using Teuchos::ArrayView;
5397  typedef LocalOrdinal LO;
5398  typedef GlobalOrdinal GO;
5399 
5400  // FIXME (mfh 02 Apr 2012) REPLACE combine mode has a perfectly
5401  // reasonable meaning, whether or not the matrix is fill complete.
5402  // It's just more work to implement.
5403 
5404  // We are not checking the value of the CombineMode input
5405  // argument. For CrsGraph, we only support import/export
5406  // operations if fillComplete has not yet been called. Any
5407  // incoming column-indices are inserted into the target graph. In
5408  // this context, CombineMode values of ADD vs INSERT are
5409  // equivalent. What is the meaning of REPLACE for CrsGraph? If a
5410  // duplicate column-index is inserted, it will be compressed out
5411  // when fillComplete is called.
5412  //
5413  // Note: I think REPLACE means that an existing row is replaced by
5414  // the imported row, i.e., the existing indices are cleared. CGB,
5415  // 6/17/2010
5416 
5417  const char tfecfFuncName[] = "unpackAndCombine: ";
5418  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5419  importLIDs.size() != numPacketsPerLID.size(), std::runtime_error,
5420  "importLIDs and numPacketsPerLID must have the same size.");
5421  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5422  isFillComplete (), std::runtime_error,
5423  "Import or Export operations are not allowed on the destination "
5424  "CrsGraph if it is fill complete.");
5425  size_t importsOffset = 0;
5426 
5427  typedef typename ArrayView<const LO>::const_iterator iter_type;
5428  iter_type impLIDiter = importLIDs.begin();
5429  iter_type impLIDend = importLIDs.end();
5430 
5431  for (size_t i = 0; impLIDiter != impLIDend; ++impLIDiter, ++i) {
5432  LO row_length = numPacketsPerLID[i];
5433 
5434  const GO* const row_raw = (row_length == 0) ? NULL : &imports[importsOffset];
5435  ArrayView<const GlobalOrdinal> row (row_raw, row_length);
5436  insertGlobalIndicesFiltered (this->getMap ()->getGlobalElement (*impLIDiter), row);
5437  importsOffset += row_length;
5438  }
5439  }
5440 
5441 
5442  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5443  void
5445  removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& newMap)
5446  {
5447  using Teuchos::Comm;
5448  using Teuchos::null;
5449  using Teuchos::ParameterList;
5450  using Teuchos::RCP;
5451 
5452  // We'll set all the state "transactionally," so that this method
5453  // satisfies the strong exception guarantee. This object's state
5454  // won't be modified until the end of this method.
5455  RCP<const map_type> rowMap, domainMap, rangeMap, colMap;
5456  RCP<import_type> importer;
5457  RCP<export_type> exporter;
5458 
5459  rowMap = newMap;
5460  RCP<const Comm<int> > newComm =
5461  (newMap.is_null ()) ? null : newMap->getComm ();
5462 
5463  if (! domainMap_.is_null ()) {
5464  if (domainMap_.getRawPtr () == rowMap_.getRawPtr ()) {
5465  // Common case: original domain and row Maps are identical.
5466  // In that case, we need only replace the original domain Map
5467  // with the new Map. This ensures that the new domain and row
5468  // Maps _stay_ identical.
5469  domainMap = newMap;
5470  } else {
5471  domainMap = domainMap_->replaceCommWithSubset (newComm);
5472  }
5473  }
5474  if (! rangeMap_.is_null ()) {
5475  if (rangeMap_.getRawPtr () == rowMap_.getRawPtr ()) {
5476  // Common case: original range and row Maps are identical. In
5477  // that case, we need only replace the original range Map with
5478  // the new Map. This ensures that the new range and row Maps
5479  // _stay_ identical.
5480  rangeMap = newMap;
5481  } else {
5482  rangeMap = rangeMap_->replaceCommWithSubset (newComm);
5483  }
5484  }
5485  if (! colMap.is_null ()) {
5486  colMap = colMap_->replaceCommWithSubset (newComm);
5487  }
5488 
5489  // (Re)create the Export and / or Import if necessary.
5490  if (! newComm.is_null ()) {
5491  RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
5492  //
5493  // The operations below are collective on the new communicator.
5494  //
5495  // (Re)create the Export object if necessary. If I haven't
5496  // called fillComplete yet, I don't have a rangeMap, so I must
5497  // first check if the _original_ rangeMap is not null. Ditto
5498  // for the Import object and the domain Map.
5499  if (! rangeMap_.is_null () &&
5500  rangeMap != rowMap &&
5501  ! rangeMap->isSameAs (*rowMap)) {
5502  if (params.is_null () || ! params->isSublist ("Export")) {
5503  exporter = rcp (new export_type (rowMap, rangeMap));
5504  }
5505  else {
5506  RCP<ParameterList> exportSublist = sublist (params, "Export", true);
5507  exporter = rcp (new export_type (rowMap, rangeMap, exportSublist));
5508  }
5509  }
5510  // (Re)create the Import object if necessary.
5511  if (! domainMap_.is_null () &&
5512  domainMap != colMap &&
5513  ! domainMap->isSameAs (*colMap)) {
5514  if (params.is_null () || ! params->isSublist ("Import")) {
5515  importer = rcp (new import_type (domainMap, colMap));
5516  } else {
5517  RCP<ParameterList> importSublist = sublist (params, "Import", true);
5518  importer = rcp (new import_type (domainMap, colMap, importSublist));
5519  }
5520  }
5521  } // if newComm is not null
5522 
5523  // Defer side effects until the end. If no destructors throw
5524  // exceptions (they shouldn't anyway), then this method satisfies
5525  // the strong exception guarantee.
5526  exporter_ = exporter;
5527  importer_ = importer;
5528  rowMap_ = rowMap;
5529  // mfh 31 Mar 2013: DistObject's map_ is the row Map of a CrsGraph
5530  // or CrsMatrix. CrsGraph keeps a redundant pointer (rowMap_) to
5531  // the same object. We might want to get rid of this redundant
5532  // pointer sometime, but for now, we'll leave it alone and just
5533  // set map_ to the same object.
5534  this->map_ = rowMap;
5535  domainMap_ = domainMap;
5536  rangeMap_ = rangeMap;
5537  colMap_ = colMap;
5538  }
5539 
5540  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5541  bool
5543  hasRowInfo () const
5544  {
5545  if (this->indicesAreAllocated () &&
5546  this->getProfileType () == StaticProfile &&
5547  this->k_rowPtrs_.dimension_0 () == 0) {
5548  return false;
5549  } else {
5550  return true;
5551  }
5552  }
5553 
5554  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5555  void
5557  getLocalDiagOffsets (const Kokkos::View<size_t*, device_type, Kokkos::MemoryUnmanaged>& offsets) const
5558  {
5559  typedef LocalOrdinal LO;
5560  typedef GlobalOrdinal GO;
5561  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
5562 
5563  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5564  (! hasColMap (), std::runtime_error, "The graph must have a column Map.");
5565  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
5566  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5567  (static_cast<LO> (offsets.dimension_0 ()) < lclNumRows,
5568  std::invalid_argument, "offsets.dimension_0() = " <<
5569  offsets.dimension_0 () << " < getNodeNumRows() = " << lclNumRows << ".");
5570 
5571  const map_type& rowMap = * (this->getRowMap ());
5572  const map_type& colMap = * (this->getColMap ());
5573 
5574 #ifdef HAVE_TPETRA_DEBUG
5575  bool allRowMapDiagEntriesInColMap = true;
5576  bool allDiagEntriesFound = true;
5577  bool allOffsetsCorrect = true;
5578  bool noOtherWeirdness = true;
5579  std::vector<std::pair<LO, size_t> > wrongOffsets;
5580 #endif // HAVE_TPETRA_DEBUG
5581 
5582  // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
5583  // the subset of Map functionality that we need below.
5584  auto lclRowMap = rowMap.getLocalMap ();
5585  auto lclColMap = colMap.getLocalMap ();
5586 
5587  // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
5588  // setup, at least on the host. For CUDA, we have to use LocalMap
5589  // (that comes from each of the two Maps).
5590 
5591  const bool sorted = this->isSorted ();
5592  if (isFillComplete ()) {
5593  auto lclGraph = this->getLocalGraph ();
5594  Details::getGraphDiagOffsets (offsets, lclRowMap, lclColMap,
5595  lclGraph.row_map, lclGraph.entries,
5596  sorted);
5597  }
5598  else {
5599  // NOTE (mfh 22 Feb 2017): We have to run this code on host,
5600  // since the graph is not fill complete. The previous version
5601  // of this code assumed UVM; this version does not.
5602  auto offsets_h = Kokkos::create_mirror_view (offsets);
5603 
5604  for (LO lclRowInd = 0; lclRowInd < lclNumRows; ++lclRowInd) {
5605  // Find the diagonal entry. Since the row Map and column Map
5606  // may differ, we have to compare global row and column
5607  // indices, not local.
5608  const GO gblRowInd = lclRowMap.getGlobalElement (lclRowInd);
5609  const GO gblColInd = gblRowInd;
5610  const LO lclColInd = lclColMap.getLocalElement (gblColInd);
5611 
5612  if (lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
5613 #ifdef HAVE_TPETRA_DEBUG
5614  allRowMapDiagEntriesInColMap = false;
5615 #endif // HAVE_TPETRA_DEBUG
5616  offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
5617  }
5618  else {
5619  const RowInfo rowInfo = this->getRowInfo (lclRowInd);
5620  if (static_cast<LO> (rowInfo.localRow) == lclRowInd &&
5621  rowInfo.numEntries > 0) {
5622 
5623  auto colInds = this->getLocalKokkosRowView (rowInfo);
5624  const size_t hint = 0; // not needed for this algorithm
5625  const size_t offset =
5626  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
5627  lclColInd, hint, sorted);
5628  offsets_h(lclRowInd) = offset;
5629 
5630 #ifdef HAVE_TPETRA_DEBUG
5631  // Now that we have what we think is an offset, make sure
5632  // that it really does point to the diagonal entry. Offsets
5633  // are _relative_ to each row, not absolute (for the whole
5634  // (local) graph).
5635  Teuchos::ArrayView<const LO> lclColInds;
5636  try {
5637  this->getLocalRowView (lclRowInd, lclColInds);
5638  }
5639  catch (...) {
5640  noOtherWeirdness = false;
5641  }
5642  // Don't continue with error checking if the above failed.
5643  if (noOtherWeirdness) {
5644  const size_t numEnt = lclColInds.size ();
5645  if (offset >= numEnt) {
5646  // Offsets are relative to each row, so this means that
5647  // the offset is out of bounds.
5648  allOffsetsCorrect = false;
5649  wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
5650  } else {
5651  const LO actualLclColInd = lclColInds[offset];
5652  const GO actualGblColInd = lclColMap.getGlobalElement (actualLclColInd);
5653  if (actualGblColInd != gblColInd) {
5654  allOffsetsCorrect = false;
5655  wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
5656  }
5657  }
5658  }
5659 #endif // HAVE_TPETRA_DEBUG
5660  }
5661  else { // either row is empty, or something went wrong w/ getRowInfo()
5662  offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
5663 #ifdef HAVE_TPETRA_DEBUG
5664  allDiagEntriesFound = false;
5665 #endif // HAVE_TPETRA_DEBUG
5666  }
5667  } // whether lclColInd is a valid local column index
5668  } // for each local row
5669 
5670  Kokkos::deep_copy (offsets, offsets_h);
5671  } // whether the graph is fill complete
5672 
5673 #ifdef HAVE_TPETRA_DEBUG
5674  if (wrongOffsets.size () != 0) {
5675  std::ostringstream os;
5676  os << "Proc " << this->getComm ()->getRank () << ": Wrong offsets: [";
5677  for (size_t k = 0; k < wrongOffsets.size (); ++k) {
5678  os << "(" << wrongOffsets[k].first << ","
5679  << wrongOffsets[k].second << ")";
5680  if (k + 1 < wrongOffsets.size ()) {
5681  os << ", ";
5682  }
5683  }
5684  os << "]" << std::endl;
5685  std::cerr << os.str ();
5686  }
5687 #endif // HAVE_TPETRA_DEBUG
5688 
5689 #ifdef HAVE_TPETRA_DEBUG
5690  using Teuchos::reduceAll;
5691  using std::endl;
5692  Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
5693  const bool localSuccess =
5694  allRowMapDiagEntriesInColMap && allDiagEntriesFound && allOffsetsCorrect;
5695  const int numResults = 5;
5696  int lclResults[5];
5697  lclResults[0] = allRowMapDiagEntriesInColMap ? 1 : 0;
5698  lclResults[1] = allDiagEntriesFound ? 1 : 0;
5699  lclResults[2] = allOffsetsCorrect ? 1 : 0;
5700  lclResults[3] = noOtherWeirdness ? 1 : 0;
5701  // min-all-reduce will compute least rank of all the processes
5702  // that didn't succeed.
5703  lclResults[4] = ! localSuccess ? comm->getRank () : comm->getSize ();
5704 
5705  int gblResults[5];
5706  gblResults[0] = 0;
5707  gblResults[1] = 0;
5708  gblResults[2] = 0;
5709  gblResults[3] = 0;
5710  gblResults[4] = 0;
5711  reduceAll<int, int> (*comm, Teuchos::REDUCE_MIN,
5712  numResults, lclResults, gblResults);
5713 
5714  if (gblResults[0] != 1 || gblResults[1] != 1 || gblResults[2] != 1
5715  || gblResults[3] != 1) {
5716  std::ostringstream os; // build error message
5717  os << "Issue(s) that we noticed (on Process " << gblResults[4] << ", "
5718  "possibly among others): " << endl;
5719  if (gblResults[0] == 0) {
5720  os << " - The column Map does not contain at least one diagonal entry "
5721  "of the graph." << endl;
5722  }
5723  if (gblResults[1] == 0) {
5724  os << " - On one or more processes, some row does not contain a "
5725  "diagonal entry." << endl;
5726  }
5727  if (gblResults[2] == 0) {
5728  os << " - On one or more processes, some offsets are incorrect."
5729  << endl;
5730  }
5731  if (gblResults[3] == 0) {
5732  os << " - One or more processes had some other error."
5733  << endl;
5734  }
5735  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
5736  }
5737 #endif // HAVE_TPETRA_DEBUG
5738  }
5739 
5740  namespace { // (anonymous)
5741 
5742  // mfh 21 Jan 2016: This is useful for getLocalDiagOffsets (see
5743  // below). The point is to avoid the deep copy between the input
5744  // Teuchos::ArrayRCP and the internally used Kokkos::View. We
5745  // can't use UVM to avoid the deep copy with CUDA, because the
5746  // ArrayRCP is a host pointer, while the input to the graph's
5747  // getLocalDiagOffsets method is a device pointer. Assigning a
5748  // host pointer to a device pointer is incorrect unless the host
5749  // pointer points to host pinned memory. The goal is to get rid
5750  // of the Teuchos::ArrayRCP overload anyway, so we accept the deep
5751  // copy for backwards compatibility.
5752  //
5753  // We have to use template magic because
5754  // "staticGraph_->getLocalDiagOffsets(offsetsHosts)" won't compile
5755  // if device_type::memory_space is not Kokkos::HostSpace (as is
5756  // the case with CUDA).
5757 
5758  template<class DeviceType,
5759  const bool memSpaceIsHostSpace =
5760  std::is_same<typename DeviceType::memory_space,
5761  Kokkos::HostSpace>::value>
5762  struct HelpGetLocalDiagOffsets {};
5763 
5764  template<class DeviceType>
5765  struct HelpGetLocalDiagOffsets<DeviceType, true> {
5766  typedef DeviceType device_type;
5767  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
5768  Kokkos::MemoryUnmanaged> device_offsets_type;
5769  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
5770  Kokkos::MemoryUnmanaged> host_offsets_type;
5771 
5772  static device_offsets_type
5773  getDeviceOffsets (const host_offsets_type& hostOffsets)
5774  {
5775  // Host and device are the same; no need to allocate a
5776  // temporary device View.
5777  return hostOffsets;
5778  }
5779 
5780  static void
5781  copyBackIfNeeded (const host_offsets_type& /* hostOffsets */,
5782  const device_offsets_type& /* deviceOffsets */)
5783  { /* copy back not needed; host and device are the same */ }
5784  };
5785 
5786  template<class DeviceType>
5787  struct HelpGetLocalDiagOffsets<DeviceType, false> {
5788  typedef DeviceType device_type;
5789  // We have to do a deep copy, since host memory space != device
5790  // memory space. Thus, the device View is managed (we need to
5791  // allocate a temporary device View).
5792  typedef Kokkos::View<size_t*, device_type> device_offsets_type;
5793  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
5794  Kokkos::MemoryUnmanaged> host_offsets_type;
5795 
5796  static device_offsets_type
5797  getDeviceOffsets (const host_offsets_type& hostOffsets)
5798  {
5799  // Host memory space != device memory space, so we must
5800  // allocate a temporary device View for the graph.
5801  return device_offsets_type ("offsets", hostOffsets.dimension_0 ());
5802  }
5803 
5804  static void
5805  copyBackIfNeeded (const host_offsets_type& hostOffsets,
5806  const device_offsets_type& deviceOffsets)
5807  {
5808  Kokkos::deep_copy (hostOffsets, deviceOffsets);
5809  }
5810  };
5811  } // namespace (anonymous)
5812 
5813 
5814  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5815  void
5817  getLocalDiagOffsets (Teuchos::ArrayRCP<size_t>& offsets) const
5818  {
5819  typedef LocalOrdinal LO;
5820  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
5821  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5822  (! this->hasColMap (), std::runtime_error,
5823  "The graph does not yet have a column Map.");
5824  const LO myNumRows = static_cast<LO> (this->getNodeNumRows ());
5825  if (static_cast<LO> (offsets.size ()) != myNumRows) {
5826  // NOTE (mfh 21 Jan 2016) This means that the method does not
5827  // satisfy the strong exception guarantee (no side effects
5828  // unless successful).
5829  offsets.resize (myNumRows);
5830  }
5831 
5832  // mfh 21 Jan 2016: This method unfortunately takes a
5833  // Teuchos::ArrayRCP, which is host memory. The graph wants a
5834  // device pointer. We can't access host memory from the device;
5835  // that's the wrong direction for UVM. (It's the right direction
5836  // for inefficient host pinned memory, but we don't want to use
5837  // that here.) Thus, if device memory space != host memory space,
5838  // we allocate and use a temporary device View to get the offsets.
5839  // If the two spaces are equal, the template magic makes the deep
5840  // copy go away.
5841  typedef HelpGetLocalDiagOffsets<device_type> helper_type;
5842  typedef typename helper_type::host_offsets_type host_offsets_type;
5843  // Unmanaged host View that views the output array.
5844  host_offsets_type hostOffsets (offsets.getRawPtr (), myNumRows);
5845  // Allocate temp device View if host != device, else reuse host array.
5846  auto deviceOffsets = helper_type::getDeviceOffsets (hostOffsets);
5847  // NOT recursion; this calls the overload that takes a device View.
5848  this->getLocalDiagOffsets (deviceOffsets);
5849  helper_type::copyBackIfNeeded (hostOffsets, deviceOffsets);
5850  }
5851 
5852  template <class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5853  bool
5856  return true;
5857  }
5858 
5859 } // namespace Tpetra
5860 
5861 //
5862 // Explicit instantiation macros
5863 //
5864 // Must be expanded from within the Tpetra namespace!
5865 //
5866 #define TPETRA_CRSGRAPH_GRAPH_INSTANT(LO,GO,NODE) template class CrsGraph< LO , GO , NODE >;
5867 
5868 // WARNING: These macros exist only for backwards compatibility.
5869 // We will remove them at some point.
5870 #define TPETRA_CRSGRAPH_SORTROWINDICESANDVALUES_INSTANT(S,LO,GO,NODE)
5871 #define TPETRA_CRSGRAPH_MERGEROWINDICESANDVALUES_INSTANT(S,LO,GO,NODE)
5872 #define TPETRA_CRSGRAPH_ALLOCATEVALUES1D_INSTANT(S,LO,GO,NODE)
5873 #define TPETRA_CRSGRAPH_ALLOCATEVALUES2D_INSTANT(S,LO,GO,NODE)
5874 
5875 #define TPETRA_CRSGRAPH_INSTANT(S,LO,GO,NODE) \
5876  TPETRA_CRSGRAPH_SORTROWINDICESANDVALUES_INSTANT(S,LO,GO,NODE) \
5877  TPETRA_CRSGRAPH_MERGEROWINDICESANDVALUES_INSTANT(S,LO,GO,NODE) \
5878  TPETRA_CRSGRAPH_ALLOCATEVALUES1D_INSTANT(S,LO,GO,NODE) \
5879  TPETRA_CRSGRAPH_ALLOCATEVALUES2D_INSTANT(S,LO,GO,NODE)
5880 
5881 #endif // TPETRA_CRSGRAPH_DEF_HPP
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types...
GlobalOrdinal getIndexBase() const
Returns the index base for global indices for this graph.
bool isNodeGlobalElement(GlobalOrdinal globalIndex) const
Whether the given global index is owned by this Map on the calling process.
node_type ::device_type device_type
This class&#39; Kokkos device type.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
int makeColMap(Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &colMap, const Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &domMap, const RowGraph< LO, GO, NT > &graph, const bool sortEachProcsGids=true, std::ostream *errStrm=NULL)
Make the graph&#39;s column Map.
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular...
void deep_copy(MultiVector< DS, DL, DG, DN, dstClassic > &dst, const MultiVector< SS, SL, SG, SN, srcClassic > &src)
Copy the contents of the MultiVector src into dst.
void removeEmptyProcessesInPlace(Teuchos::RCP< DistObjectType > &input, const Teuchos::RCP< const Map< typename DistObjectType::local_ordinal_type, typename DistObjectType::global_ordinal_type, typename DistObjectType::node_type > > &newMap)
Remove processes which contain no elements in this object&#39;s Map.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Returns the communicator.
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.
LocalOrdinal getLocalElement(GlobalOrdinal globalIndex) const
The local index corresponding to the given global index.
Teuchos::RCP< node_type > getNode() const
Returns the underlying node.
Implementation details of Tpetra.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
size_t global_size_t
Global size_t object.
Kokkos::StaticCrsGraph< LO, Kokkos::LayoutLeft, execution_space > local_graph_type
The type of the part of the sparse graph on each MPI process.
Insert new values that don&#39;t currently exist.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Creates a one-to-one version of the given Map where each GID is owned by only one process...
OffsetType convertColumnIndicesFromGlobalToLocal(const Kokkos::View< LO *, DT > &lclColInds, const Kokkos::View< const GO *, DT > &gblColInds, const Kokkos::View< const OffsetType *, DT > &ptr, const LocalMap< LO, GO, DT > &lclColMap, const Kokkos::View< const NumEntType *, DT > &numRowEnt)
Convert a (StaticProfile) CrsGraph&#39;s global column indices into local column indices.
Declare and define the function Tpetra::Details::computeOffsetsFromCounts, an implementation detail o...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Sets up and executes a communication plan for a Tpetra DistObject.
CombineMode
Rule for combining data in an Import or Export.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
GlobalOrdinal getGlobalElement(LocalOrdinal localIndex) const
The global index corresponding to the given local index.
Abstract base class for objects that can be the source of an Import or Export operation.
OffsetsViewType::non_const_value_type computeOffsetsFromConstantCount(const OffsetsViewType &ptr, const CountType &count)
Compute offsets from a constant count.
node_type node_type
This class&#39; Kokkos Node type.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
device_type::execution_space execution_space
This class&#39; Kokkos execution space.
Declaration and definition of Tpetra::Details::getEntryOnHost.
Kokkos::View< GO *, execution_space > t_GlobalOrdinal_1D
Type of the k_gblInds1D_ array of global column indices.
bool isNodeLocalElement(LocalOrdinal localIndex) const
Whether the given local index is valid for this Map on the calling process.