Tpetra parallel linear algebra  Version of the Day
Tpetra_CrsMatrix_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_CRSMATRIX_DEF_HPP
43 #define TPETRA_CRSMATRIX_DEF_HPP
44 
52 
53 #include "Tpetra_RowMatrix.hpp"
54 #include "Tpetra_Import_Util.hpp"
55 #include "Tpetra_Import_Util2.hpp"
58 #include "Tpetra_Details_getDiagCopyWithoutOffsets.hpp"
59 #include "Tpetra_Details_gathervPrint.hpp"
61 //#include "Tpetra_Util.hpp" // comes in from Tpetra_CrsGraph_decl.hpp
62 #include "Teuchos_SerialDenseMatrix.hpp"
63 #include "Kokkos_Sparse_getDiagCopy.hpp"
64 #include "Tpetra_Details_copyConvert.hpp"
65 #include "Tpetra_Details_Environment.hpp"
69 #include <typeinfo>
70 #include <vector>
71 
72 namespace Tpetra {
73 //
74 // Users must never rely on anything in the Details namespace.
75 //
76 namespace Details {
77 
87 template<class Scalar>
88 struct AbsMax {
90  Scalar operator() (const Scalar& x, const Scalar& y) {
91  typedef Teuchos::ScalarTraits<Scalar> STS;
92  return std::max (STS::magnitude (x), STS::magnitude (y));
93  }
94 };
95 
96 } // namespace Details
97 } // namespace Tpetra
98 
99 namespace Tpetra {
100 
101  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
103  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
104  size_t maxNumEntriesPerRow,
105  ProfileType pftype,
106  const Teuchos::RCP<Teuchos::ParameterList>& params) :
107  dist_object_type (rowMap),
108  storageStatus_ (pftype == StaticProfile ?
109  Details::STORAGE_1D_UNPACKED :
110  Details::STORAGE_2D),
111  fillComplete_ (false),
112  frobNorm_ (-STM::one ())
113  {
114  const char tfecfFuncName[] = "CrsMatrix(RCP<const Map>, size_t, "
115  "ProfileType[, RCP<ParameterList>]): ";
116  Teuchos::RCP<crs_graph_type> graph;
117  try {
118  graph = Teuchos::rcp (new crs_graph_type (rowMap, maxNumEntriesPerRow,
119  pftype, params));
120  }
121  catch (std::exception& e) {
122  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
123  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
124  "size_t, ProfileType[, RCP<ParameterList>]) threw an exception: "
125  << e.what ());
126  }
127  // myGraph_ not null means that the matrix owns the graph. That's
128  // different than the const CrsGraph constructor, where the matrix
129  // does _not_ own the graph.
130  myGraph_ = graph;
131  staticGraph_ = myGraph_;
132  resumeFill (params);
134  }
135 
136  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
138  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
139  const Teuchos::ArrayRCP<const size_t>& NumEntriesPerRowToAlloc,
140  ProfileType pftype,
141  const Teuchos::RCP<Teuchos::ParameterList>& params) :
142  dist_object_type (rowMap),
143  storageStatus_ (pftype == StaticProfile ?
144  Details::STORAGE_1D_UNPACKED :
145  Details::STORAGE_2D),
146  fillComplete_ (false),
147  frobNorm_ (-STM::one ())
148  {
149  const char tfecfFuncName[] = "CrsMatrix(RCP<const Map>, "
150  "ArrayRCP<const size_t>, ProfileType[, RCP<ParameterList>]): ";
151  Teuchos::RCP<crs_graph_type> graph;
152  try {
153  graph = Teuchos::rcp (new crs_graph_type (rowMap, NumEntriesPerRowToAlloc,
154  pftype, params));
155  }
156  catch (std::exception &e) {
157  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
158  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
159  "ArrayRCP<const size_t>, ProfileType[, RCP<ParameterList>]) threw "
160  "an exception: " << e.what ());
161  }
162  // myGraph_ not null means that the matrix owns the graph. That's
163  // different than the const CrsGraph constructor, where the matrix
164  // does _not_ own the graph.
165  myGraph_ = graph;
166  staticGraph_ = graph;
167  resumeFill (params);
169  }
170 
171  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
173  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
174  const Teuchos::RCP<const map_type>& colMap,
175  size_t maxNumEntriesPerRow,
176  ProfileType pftype,
177  const Teuchos::RCP<Teuchos::ParameterList>& params) :
178  dist_object_type (rowMap),
179  storageStatus_ (pftype == StaticProfile ?
180  Details::STORAGE_1D_UNPACKED :
181  Details::STORAGE_2D),
182  fillComplete_ (false),
183  frobNorm_ (-STM::one ())
184  {
185  const char tfecfFuncName[] = "CrsMatrix(RCP<const Map>, RCP<const Map>, "
186  "size_t, ProfileType[, RCP<ParameterList>]): ";
187 
188 #ifdef HAVE_TPETRA_DEBUG
189  // An artifact of debugging something a while back.
190  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
191  (! staticGraph_.is_null (), std::logic_error,
192  "staticGraph_ is not null at the beginning of the constructor. "
193  "Please report this bug to the Tpetra developers.");
194  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
195  (! myGraph_.is_null (), std::logic_error,
196  "myGraph_ is not null at the beginning of the constructor. "
197  "Please report this bug to the Tpetra developers.");
198 #endif // HAVE_TPETRA_DEBUG
199 
200  Teuchos::RCP<crs_graph_type> graph;
201  try {
202  graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap,
203  maxNumEntriesPerRow,
204  pftype, params));
205  }
206  catch (std::exception &e) {
207  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
208  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
209  "RCP<const Map>, size_t, ProfileType[, RCP<ParameterList>]) threw an "
210  "exception: " << e.what ());
211  }
212  // myGraph_ not null means that the matrix owns the graph. That's
213  // different than the const CrsGraph constructor, where the matrix
214  // does _not_ own the graph.
215  myGraph_ = graph;
216  staticGraph_ = myGraph_;
217  resumeFill (params);
219  }
220 
221  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
223  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
224  const Teuchos::RCP<const map_type>& colMap,
225  const Teuchos::ArrayRCP<const size_t>& numEntPerRow,
226  ProfileType pftype,
227  const Teuchos::RCP<Teuchos::ParameterList>& params) :
228  dist_object_type (rowMap),
229  storageStatus_ (pftype == StaticProfile ?
230  Details::STORAGE_1D_UNPACKED :
231  Details::STORAGE_2D),
232  fillComplete_ (false),
233  frobNorm_ (-STM::one ())
234  {
235  const char tfecfFuncName[] = "CrsMatrix(RCP<const Map>, RCP<const Map>, "
236  "ArrayRCP<const size_t>, ProfileType[, RCP<ParameterList>]): ";
237  Teuchos::RCP<crs_graph_type> graph;
238  try {
239  graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap, numEntPerRow,
240  pftype, params));
241  }
242  catch (std::exception &e) {
243  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
244  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
245  "RCP<const Map>, ArrayRCP<const size_t>, ProfileType[, "
246  "RCP<ParameterList>]) threw an exception: " << e.what ());
247  }
248  // myGraph_ not null means that the matrix owns the graph. That's
249  // different than the const CrsGraph constructor, where the matrix
250  // does _not_ own the graph.
251  myGraph_ = graph;
252  staticGraph_ = graph;
253  resumeFill (params);
255  }
256 
257  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
259  CrsMatrix (const Teuchos::RCP<const crs_graph_type>& graph,
260  const Teuchos::RCP<Teuchos::ParameterList>& /* params */) :
261  dist_object_type (graph->getRowMap ()),
262  staticGraph_ (graph),
263  storageStatus_ (Details::STORAGE_1D_PACKED),
264  fillComplete_ (false),
265  frobNorm_ (-STM::one ())
266  {
267  typedef typename local_matrix_type::values_type values_type;
268  const char tfecfFuncName[] = "CrsMatrix(RCP<const CrsGraph>[, "
269  "RCP<ParameterList>]): ";
270  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
271  (graph.is_null (), std::runtime_error, "Input graph is null.");
272  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
273  (! graph->isFillComplete (), std::runtime_error, "Input graph is not "
274  "fill complete. You must call fillComplete on the graph before using "
275  "it to construct a CrsMatrix. Note that calling resumeFill on the "
276  "graph makes it not fill complete, even if you had previously called "
277  "fillComplete. In that case, you must call fillComplete on the graph "
278  "again.");
279 
280  // The graph is fill complete, so it is locally indexed and has a
281  // fixed structure. This means we can allocate the (1-D) array of
282  // values and build the local matrix right now. Note that the
283  // local matrix's number of columns comes from the column Map, not
284  // the domain Map.
285 
286  const size_t numCols = graph->getColMap ()->getNodeNumElements ();
287  auto lclGraph = graph->getLocalGraph ();
288  const size_t numEnt = lclGraph.entries.dimension_0 ();
289  values_type val ("Tpetra::CrsMatrix::val", numEnt);
290 
291  this->lclMatrix_ = local_matrix_type ("Tpetra::CrsMatrix::lclMatrix_",
292  numCols, val, lclGraph);
293  // FIXME (22 Jun 2016) I would very much like to get rid of
294  // k_values1D_ at some point. I find it confusing to have all
295  // these extra references lying around.
296  this->k_values1D_ = this->lclMatrix_.values;
297 
299  }
300 
301  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
303  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
304  const Teuchos::RCP<const map_type>& colMap,
305  const typename local_matrix_type::row_map_type& rowPointers,
306  const typename local_graph_type::entries_type::non_const_type& columnIndices,
307  const typename local_matrix_type::values_type& values,
308  const Teuchos::RCP<Teuchos::ParameterList>& params) :
309  dist_object_type (rowMap),
310  storageStatus_ (Details::STORAGE_1D_PACKED),
311  fillComplete_ (false),
312  frobNorm_ (-STM::one ())
313  {
314  using Teuchos::RCP;
315  const char tfecfFuncName[] = "Tpetra::CrsMatrix(RCP<const Map>, "
316  "RCP<const Map>, ptr, ind, val[, params]): ";
317  const char suffix[] = ". Please report this bug to the Tpetra developers.";
318 
319  // Check the user's input. Note that this might throw only on
320  // some processes but not others, causing deadlock. We prefer
321  // deadlock due to exceptions to segfaults, because users can
322  // catch exceptions.
323  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
324  (values.dimension_0 () != columnIndices.dimension_0 (),
325  std::invalid_argument, "Input arrays don't have matching dimensions. "
326  "values.dimension_0() = " << values.dimension_0 () << " != "
327  "columnIndices.dimension_0() = " << columnIndices.dimension_0 () << ".");
328 #ifdef HAVE_TPETRA_DEBUG
329  if (rowPointers.dimension_0 () != 0) {
330  const size_t numEnt =
331  Details::getEntryOnHost (rowPointers, rowPointers.dimension_0 () - 1);
332  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
333  (numEnt != static_cast<size_t> (columnIndices.dimension_0 ()) ||
334  numEnt != static_cast<size_t> (values.dimension_0 ()),
335  std::invalid_argument, "Last entry of rowPointers says that the matrix"
336  " has " << numEnt << " entr" << (numEnt != 1 ? "ies" : "y") << ", but "
337  "the dimensions of columnIndices and values don't match this. "
338  "columnIndices.dimension_0() = " << columnIndices.dimension_0 () <<
339  " and values.dimension_0() = " << values.dimension_0 () << ".");
340  }
341 #endif // HAVE_TPETRA_DEBUG
342 
343  RCP<crs_graph_type> graph;
344  try {
345  graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap, rowPointers,
346  columnIndices, params));
347  }
348  catch (std::exception& e) {
349  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
350  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
351  "RCP<const Map>, ptr, ind[, params]) threw an exception: "
352  << e.what ());
353  }
354  // The newly created CrsGraph _must_ have a local graph at this
355  // point. We don't really care whether CrsGraph's constructor
356  // deep-copies or shallow-copies the input, but the dimensions
357  // have to be right. That's how we tell whether the CrsGraph has
358  // a local graph.
359  auto lclGraph = graph->getLocalGraph ();
360  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
361  (lclGraph.row_map.dimension_0 () != rowPointers.dimension_0 () ||
362  lclGraph.entries.dimension_0 () != columnIndices.dimension_0 (),
363  std::logic_error, "CrsGraph's constructor (rowMap, colMap, ptr, "
364  "ind[, params]) did not set the local graph correctly." << suffix);
365  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
366  (lclGraph.entries.dimension_0 () != values.dimension_0 (),
367  std::logic_error, "CrsGraph's constructor (rowMap, colMap, ptr, ind[, "
368  "params]) did not set the local graph correctly. "
369  "lclGraph.entries.dimension_0() = " << lclGraph.entries.dimension_0 ()
370  << " != values.dimension_0() = " << values.dimension_0 () << suffix);
371 
372  // myGraph_ not null means that the matrix owns the graph. This
373  // is true because the column indices come in as nonconst,
374  // implying shared ownership.
375  myGraph_ = graph;
376  staticGraph_ = graph;
377 
378  // The graph may not be fill complete yet. However, it is locally
379  // indexed (since we have a column Map) and has a fixed structure
380  // (due to the input arrays). This means we can allocate the
381  // (1-D) array of values and build the local matrix right now.
382  // Note that the local matrix's number of columns comes from the
383  // column Map, not the domain Map.
384 
385  const size_t numCols = graph->getColMap ()->getNodeNumElements ();
386  lclMatrix_ = local_matrix_type ("Tpetra::CrsMatrix::lclMatrix_",
387  numCols, values, lclGraph);
388  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
389  (lclMatrix_.values.dimension_0 () != values.dimension_0 (),
390  std::logic_error, "Local matrix's constructor did not set the values "
391  "correctly. lclMatrix_.values.dimension_0() = " <<
392  lclMatrix_.values.dimension_0 () << " != values.dimension_0() = " <<
393  values.dimension_0 () << suffix);
394 
395  // FIXME (22 Jun 2016) I would very much like to get rid of
396  // k_values1D_ at some point. I find it confusing to have all
397  // these extra references lying around.
398  this->k_values1D_ = this->lclMatrix_.values;
399 
401  }
402 
403  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
405  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
406  const Teuchos::RCP<const map_type>& colMap,
407  const Teuchos::ArrayRCP<size_t>& ptr,
408  const Teuchos::ArrayRCP<LocalOrdinal>& ind,
409  const Teuchos::ArrayRCP<Scalar>& val,
410  const Teuchos::RCP<Teuchos::ParameterList>& params) :
411  dist_object_type (rowMap),
412  storageStatus_ (Details::STORAGE_1D_PACKED),
413  fillComplete_ (false),
414  frobNorm_ (-STM::one ())
415  {
416  using Kokkos::Compat::getKokkosViewDeepCopy;
417  using Teuchos::av_reinterpret_cast;
418  using Teuchos::RCP;
419  typedef typename local_matrix_type::values_type values_type;
420  typedef impl_scalar_type IST;
421  const char tfecfFuncName[] = "Tpetra::CrsMatrix(RCP<const Map>, "
422  "RCP<const Map>, ptr, ind, val[, params]): ";
423 
424  RCP<crs_graph_type> graph;
425  try {
426  graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap, ptr,
427  ind, params));
428  }
429  catch (std::exception& e) {
430  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
431  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
432  "RCP<const Map>, ArrayRCP<size_t>, ArrayRCP<LocalOrdinal>[, "
433  "RCP<ParameterList>]) threw an exception: " << e.what ());
434  }
435  // myGraph_ not null means that the matrix owns the graph. This
436  // is true because the column indices come in as nonconst,
437  // implying shared ownership.
438  myGraph_ = graph;
439  staticGraph_ = graph;
440 
441  // The graph may not be fill complete yet. However, it is locally
442  // indexed (since we have a column Map) and has a fixed structure
443  // (due to the input arrays). This means we can allocate the
444  // (1-D) array of values and build the local matrix right now.
445  // Note that the local matrix's number of columns comes from the
446  // column Map, not the domain Map.
447 
448  // The graph _must_ have a local graph at this point. We don't
449  // really care whether CrsGraph's constructor deep-copies or
450  // shallow-copies the input, but the dimensions have to be right.
451  // That's how we tell whether the CrsGraph has a local graph.
452  auto lclGraph = staticGraph_->getLocalGraph ();
453  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
454  (static_cast<size_t> (lclGraph.row_map.dimension_0 ()) != static_cast<size_t> (ptr.size ()) ||
455  static_cast<size_t> (lclGraph.entries.dimension_0 ()) != static_cast<size_t> (ind.size ()),
456  std::logic_error, "CrsGraph's constructor (rowMap, colMap, ptr, "
457  "ind[, params]) did not set the local graph correctly. Please "
458  "report this bug to the Tpetra developers.");
459 
460  const size_t numCols = staticGraph_->getColMap ()->getNodeNumElements ();
461  values_type valIn = getKokkosViewDeepCopy<device_type> (av_reinterpret_cast<IST> (val ()));
462  this->lclMatrix_ = local_matrix_type ("Tpetra::CrsMatrix::lclMatrix_",
463  numCols, valIn, lclGraph);
464  // FIXME (22 Jun 2016) I would very much like to get rid of
465  // k_values1D_ at some point. I find it confusing to have all
466  // these extra references lying around.
467  this->k_values1D_ = this->lclMatrix_.values;
468 
470  }
471 
472  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
474  CrsMatrix (const Teuchos::RCP<const map_type>& rowMap,
475  const Teuchos::RCP<const map_type>& colMap,
476  const local_matrix_type& lclMatrix,
477  const Teuchos::RCP<Teuchos::ParameterList>& params) :
478  dist_object_type (rowMap),
479  lclMatrix_ (lclMatrix),
480  k_values1D_ (lclMatrix.values),
481  storageStatus_ (Details::STORAGE_1D_PACKED),
482  fillComplete_ (true),
483  frobNorm_ (-STM::one ())
484  {
485  const char tfecfFuncName[] = "Tpetra::CrsMatrix(RCP<const Map>, "
486  "RCP<const Map>, local_matrix_type[, RCP<ParameterList>]): ";
487  Teuchos::RCP<crs_graph_type> graph;
488  try {
489  graph = Teuchos::rcp (new crs_graph_type (rowMap, colMap,
490  lclMatrix.graph, params));
491  }
492  catch (std::exception& e) {
493  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
494  (true, std::runtime_error, "CrsGraph constructor (RCP<const Map>, "
495  "RCP<const Map>, local_graph_type[, RCP<ParameterList>]) threw an "
496  "exception: " << e.what ());
497  }
498  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
499  (!graph->isFillComplete (), std::logic_error, "CrsGraph constructor (RCP"
500  "<const Map>, RCP<const Map>, local_graph_type[, RCP<ParameterList>]) "
501  "did not produce a fill-complete graph. Please report this bug to the "
502  "Tpetra developers.");
503  // myGraph_ not null means that the matrix owns the graph. This
504  // is true because the column indices come in as nonconst through
505  // the matrix, implying shared ownership.
506  myGraph_ = graph;
507  staticGraph_ = graph;
509 
510  // Sanity checks at the end.
511 #ifdef HAVE_TPETRA_DEBUG
512  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillActive (), std::logic_error,
513  "We're at the end of fillComplete(), but isFillActive() is true. "
514  "Please report this bug to the Tpetra developers.");
515  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillComplete (), std::logic_error,
516  "We're at the end of fillComplete(), but isFillComplete() is false. "
517  "Please report this bug to the Tpetra developers.");
518 #endif // HAVE_TPETRA_DEBUG
520  }
521 
522  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
525  {}
526 
527  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
528  Teuchos::RCP<const Teuchos::Comm<int> >
530  getComm () const {
531  return getCrsGraphRef ().getComm ();
532  }
533 
534  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
535  Teuchos::RCP<Node>
537  getNode () const {
538  return getCrsGraphRef ().getNode ();
539  }
540 
541  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
544  getProfileType () const {
545  return this->getCrsGraphRef ().getProfileType ();
546  }
547 
548  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
549  bool
551  isFillComplete () const {
552  return fillComplete_;
553  }
554 
555  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
556  bool
558  isFillActive () const {
559  return ! fillComplete_;
560  }
561 
562  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
563  bool
566  return this->getCrsGraphRef ().isStorageOptimized ();
567  }
568 
569  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
570  bool
573  return getCrsGraphRef ().isLocallyIndexed ();
574  }
575 
576  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
577  bool
580  return getCrsGraphRef ().isGloballyIndexed ();
581  }
582 
583  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
584  bool
586  hasColMap () const {
587  return getCrsGraphRef ().hasColMap ();
588  }
589 
590  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
594  return getCrsGraphRef ().getGlobalNumEntries ();
595  }
596 
597  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
598  size_t
601  return getCrsGraphRef ().getNodeNumEntries ();
602  }
603 
604  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
608  return getCrsGraphRef ().getGlobalNumRows ();
609  }
610 
611  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
615  return getCrsGraphRef ().getGlobalNumCols ();
616  }
617 
618  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
619  size_t
621  getNodeNumRows () const {
622  return getCrsGraphRef ().getNodeNumRows ();
623  }
624 
625  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
626  size_t
628  getNodeNumCols () const {
629  return getCrsGraphRef ().getNodeNumCols ();
630  }
631 
632  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
636  return getCrsGraphRef ().getGlobalNumDiags ();
637  }
638 
639  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
640  size_t
643  return getCrsGraphRef ().getNodeNumDiags ();
644  }
645 
646  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
647  size_t
649  getNumEntriesInGlobalRow (GlobalOrdinal globalRow) const {
650  return getCrsGraphRef ().getNumEntriesInGlobalRow (globalRow);
651  }
652 
653  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
654  size_t
656  getNumEntriesInLocalRow (LocalOrdinal localRow) const {
657  return getCrsGraphRef ().getNumEntriesInLocalRow (localRow);
658  }
659 
660  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
661  size_t
664  return getCrsGraphRef ().getGlobalMaxNumRowEntries ();
665  }
666 
667  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
668  size_t
671  return getCrsGraphRef ().getNodeMaxNumRowEntries ();
672  }
673 
674  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
675  GlobalOrdinal
677  getIndexBase () const {
678  return getRowMap ()->getIndexBase ();
679  }
680 
681  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
682  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
684  getRowMap () const {
685  return getCrsGraphRef ().getRowMap ();
686  }
687 
688  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
689  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
691  getColMap () const {
692  return getCrsGraphRef ().getColMap ();
693  }
694 
695  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
696  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
698  getDomainMap () const {
699  return getCrsGraphRef ().getDomainMap ();
700  }
701 
702  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
703  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
705  getRangeMap () const {
706  return getCrsGraphRef ().getRangeMap ();
707  }
708 
709  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
710  Teuchos::RCP<const RowGraph<LocalOrdinal, GlobalOrdinal, Node> >
712  getGraph () const {
713  if (staticGraph_ != Teuchos::null) {
714  return staticGraph_;
715  }
716  return myGraph_;
717  }
718 
719  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
720  Teuchos::RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node, classic> >
722  getCrsGraph () const {
723  if (staticGraph_ != Teuchos::null) {
724  return staticGraph_;
725  }
726  return myGraph_;
727  }
728 
729  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
732  getCrsGraphRef () const {
733  if (! this->staticGraph_.is_null ()) {
734  return * (this->staticGraph_);
735  }
736  else {
737 #ifdef HAVE_TPETRA_DEBUG
738  const char tfecfFuncName[] = "getCrsGraphRef: ";
739  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
740  (this->myGraph_.is_null (), std::logic_error,
741  "Both staticGraph_ and myGraph_ are null. "
742  "Please report this bug to the Tpetra developers.");
743 #endif // HAVE_TPETRA_DEBUG
744  return * (this->myGraph_);
745  }
746  }
747 
748  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
749  bool
752  return getCrsGraphRef ().isLowerTriangular ();
753  }
754 
755  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
756  bool
759  return getCrsGraphRef ().isUpperTriangular ();
760  }
761 
762  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
763  bool
765  isStaticGraph () const {
766  return myGraph_.is_null ();
767  }
768 
769  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
770  bool
773  return true;
774  }
775 
776  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
777  bool
780  return true;
781  }
782 
783  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
784  void
786  allocateValues (ELocalGlobal lg, GraphAllocationStatus gas)
787  {
788  using ::Tpetra::Details::ProfilingRegion;
789  const char tfecfFuncName[] = "allocateValues: ";
790  ProfilingRegion regionAllocateValues ("Tpetra::CrsMatrix::allocateValues");
791 
792 #ifdef HAVE_TPETRA_DEBUG
793  const char suffix[] = " Please report this bug to the Tpetra developers.";
794 
795  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
796  (this->staticGraph_.is_null (), std::logic_error,
797  "staticGraph_ is null." << suffix);
798 
799  // If the graph indices are already allocated, then gas should be
800  // GraphAlreadyAllocated. Otherwise, gas should be
801  // GraphNotYetAllocated.
802  if ((gas == GraphAlreadyAllocated) != this->staticGraph_->indicesAreAllocated ()) {
803  const char err1[] = "The caller has asserted that the graph is ";
804  const char err2[] = "already allocated, but the static graph says "
805  "that its indices are ";
806  const char err3[] = "already allocated. Please report this bug to "
807  "the Tpetra developers.";
808  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
809  (gas == GraphAlreadyAllocated && ! this->staticGraph_->indicesAreAllocated (),
810  std::logic_error, err1 << err2 << "not " << err3);
811  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
812  (gas != GraphAlreadyAllocated && this->staticGraph_->indicesAreAllocated (),
813  std::logic_error, err1 << "not " << err2 << err3);
814  }
815 
816  // If the graph is unallocated, then it had better be a
817  // matrix-owned graph. ("Matrix-owned graph" means that the
818  // matrix gets to define the graph structure. If the CrsMatrix
819  // constructor that takes an RCP<const CrsGraph> was used, then
820  // the matrix does _not_ own the graph.)
821  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
822  (! this->staticGraph_->indicesAreAllocated () &&
823  this->myGraph_.is_null (), std::logic_error,
824  "The static graph says that its indices are not allocated, "
825  "but the graph is not owned by the matrix." << suffix);
826 #endif // HAVE_TPETRA_DEBUG
827 
828  if (gas == GraphNotYetAllocated) {
829 #ifdef HAVE_TPETRA_DEBUG
830  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
831  (this->myGraph_.is_null (), std::logic_error,
832  "gas = GraphNotYetAllocated, but myGraph_ is null." << suffix);
833 #endif // HAVE_TPETRA_DEBUG
834  try {
835  this->myGraph_->allocateIndices (lg);
836  }
837  catch (std::exception& e) {
838  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
839  (true, std::runtime_error, "CrsGraph::allocateIndices "
840  "threw an exception: " << e.what ());
841  }
842  catch (...) {
843  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
844  (true, std::runtime_error, "CrsGraph::allocateIndices "
845  "threw an exception not a subclass of std::exception.");
846  }
847  }
848 
849  // Allocate matrix values.
850  if (this->getProfileType () == StaticProfile) {
851  // "Static profile" means that the number of matrix entries in
852  // each row was fixed at the time the CrsMatrix constructor was
853  // called. This lets us use 1-D storage for the matrix's
854  // values. ("1-D storage" means the same as that used by the
855  // three arrays in the classic compressed sparse row format.)
856 
857 #ifdef HAVE_TPETRA_DEBUG
858  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
859  (this->staticGraph_.is_null (), std::logic_error,
860  "this->getProfileType() == StaticProfile, but staticGraph_ is null."
861  << suffix);
862 #endif // HAVE_TPETRA_DEBUG
863 
864  const size_t lclNumRows = this->staticGraph_->getNodeNumRows ();
865  typename Graph::local_graph_type::row_map_type k_ptrs =
866  this->staticGraph_->k_rowPtrs_;
867  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
868  (k_ptrs.dimension_0 () != lclNumRows+1, std::logic_error,
869  "With StaticProfile, row offsets array has length "
870  << k_ptrs.dimension_0 () << " != (lclNumRows+1) = "
871  << (lclNumRows+1) << ".");
872 
873  const size_t lclTotalNumEntries =
874  Details::getEntryOnHost (k_ptrs, lclNumRows);
875 
876  // Allocate array of (packed???) matrix values.
877  typedef typename local_matrix_type::values_type values_type;
878  this->k_values1D_ =
879  values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
880  }
881  else {
882  // "Dynamic profile" means the number of matrix entries in each
883  // row is not fixed and may expand. Thus, we store the matrix's
884  // values in "2-D storage," meaning an array of arrays. The
885  // outer array has as many inner arrays as there are rows in the
886  // matrix, and each inner array stores the values in that row.
887  try {
888  this->values2D_ =
889  this->staticGraph_->template allocateValues2D<impl_scalar_type> ();
890  }
891  catch (std::exception& e) {
892  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
893  (true, std::runtime_error, "CrsGraph::allocateValues2D threw an "
894  "exception: " << e.what ());
895  }
896  catch (...) {
897  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
898  (true, std::runtime_error, "CrsGraph::allocateValues2D threw an "
899  "exception not a subclass of std::exception.");
900  }
901  }
902  }
903 
904  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
905  void
907  getAllValues (Teuchos::ArrayRCP<const size_t>& rowPointers,
908  Teuchos::ArrayRCP<const LocalOrdinal>& columnIndices,
909  Teuchos::ArrayRCP<const Scalar>& values) const
910  {
911  using Teuchos::RCP;
912  const char tfecfFuncName[] = "getAllValues: ";
913  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
914  columnIndices.size () != values.size (), std::runtime_error,
915  "Requires that columnIndices and values are the same size.");
916 
917  RCP<const crs_graph_type> relevantGraph = getCrsGraph ();
918  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
919  relevantGraph.is_null (), std::runtime_error,
920  "Requires that getCrsGraph() is not null.");
921  try {
922  rowPointers = relevantGraph->getNodeRowPtrs ();
923  }
924  catch (std::exception &e) {
925  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
926  true, std::runtime_error,
927  "Caught exception while calling graph->getNodeRowPtrs(): "
928  << e.what ());
929  }
930  try {
931  columnIndices = relevantGraph->getNodePackedIndices ();
932  }
933  catch (std::exception &e) {
934  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
935  true, std::runtime_error,
936  "Caught exception while calling graph->getNodePackedIndices(): "
937  << e.what ());
938  }
939  Teuchos::ArrayRCP<const impl_scalar_type> vals =
940  Kokkos::Compat::persistingView (k_values1D_);
941  values = Teuchos::arcp_reinterpret_cast<const Scalar> (vals);
942  }
943 
944  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
945  void
947  fillLocalGraphAndMatrix (const Teuchos::RCP<Teuchos::ParameterList>& params)
948  {
950  using ::Tpetra::Details::ProfilingRegion;
951  using Kokkos::create_mirror_view;
952  using Teuchos::arcp_const_cast;
953  using Teuchos::Array;
954  using Teuchos::ArrayRCP;
955  using Teuchos::null;
956  using Teuchos::RCP;
957  using Teuchos::rcp;
958  typedef typename local_matrix_type::row_map_type row_map_type;
959  typedef typename Graph::local_graph_type::entries_type::non_const_type lclinds_1d_type;
960  typedef typename local_matrix_type::values_type values_type;
961  ProfilingRegion regionFLGAM ("Tpetra::CrsGraph::fillLocalGraphAndMatrix");
962 
963 #ifdef HAVE_TPETRA_DEBUG
964  const char tfecfFuncName[] = "fillLocalGraphAndMatrix (called from "
965  "fillComplete or expertStaticFillComplete): ";
966 #endif // HAVE_TPETRA_DEBUG
967 
968 #ifdef HAVE_TPETRA_DEBUG
969  // fillComplete() only calls fillLocalGraphAndMatrix() if the
970  // matrix owns the graph, which means myGraph_ is not null.
971  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
972  (myGraph_.is_null (), std::logic_error, "The nonconst graph (myGraph_) "
973  "is null. This means that the matrix has a const (a.k.a. \"static\") "
974  "graph. fillComplete or expertStaticFillComplete should never call "
975  "fillLocalGraphAndMatrix in that case. "
976  "Please report this bug to the Tpetra developers.");
977 #endif // HAVE_TPETRA_DEBUG
978 
979  const size_t lclNumRows = this->getNodeNumRows ();
980 
981  // This method's goal is to fill in the three arrays (compressed
982  // sparse row format) that define the sparse graph's and matrix's
983  // structure, and the sparse matrix's values.
984  //
985  // Use the nonconst version of row_map_type for k_ptrs,
986  // because row_map_type is const and we need to modify k_ptrs here.
987  typename row_map_type::non_const_type k_ptrs;
988  row_map_type k_ptrs_const;
989  lclinds_1d_type k_inds;
990  values_type k_vals;
991 
992  // Get references to the data in myGraph_, so we can modify them
993  // as well. Note that we only call fillLocalGraphAndMatrix() if
994  // the matrix owns the graph, which means myGraph_ is not null.
995  lclinds_1d_type k_lclInds1D_ = myGraph_->k_lclInds1D_;
996 
997  typedef decltype (myGraph_->k_numRowEntries_) row_entries_type;
998 
999  if (getProfileType () == DynamicProfile) {
1000  // Pack 2-D storage (DynamicProfile) into 1-D packed storage.
1001  //
1002  // DynamicProfile means that the matrix's column indices and
1003  // values are currently stored in a 2-D "unpacked" format, in
1004  // the arrays-of-arrays myGraph_->lclInds2D_ (for column
1005  // indices) and values2D_ (for values). We allocate 1-D storage
1006  // (k_inds resp. k_vals), and then copy from 2-D storage
1007  // (lclInds2D_ resp. values2D_) into 1-D storage (k_inds
1008  // resp. k_vals).
1009 
1010  // We're be packing on host. k_numRowEntries_ lives on host,
1011  // and computeOffsetsFromCounts accepts a host View for counts,
1012  // even if offsets is a device View. (Furthermore, the "host"
1013  // View may very well live in CudaUVMSpace, so doing this has no
1014  // penalty, other than requiring synchronization between Cuda
1015  // and host. UVM memory gets grumpy if both device and host
1016  // attempt to access it at the same time without an intervening
1017  // fence.)
1018  typename row_entries_type::const_type numRowEnt_h =
1019  myGraph_->k_numRowEntries_;
1020 #ifdef HAVE_TPETRA_DEBUG
1021  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1022  (static_cast<size_t> (numRowEnt_h.dimension_0 ()) != lclNumRows,
1023  std::logic_error, "(DynamicProfile branch) numRowEnt_h has the "
1024  "wrong length. numRowEnt_h.dimension_0() = "
1025  << numRowEnt_h.dimension_0 () << " != getNodeNumRows() = "
1026  << lclNumRows << ".");
1027 #endif // HAVE_TPETRA_DEBUG
1028 
1029  // We're packing on host (since we can't read Teuchos data
1030  // structures on device), so let's fill the packed row offsets
1031  // on host first.
1032  k_ptrs = typename row_map_type::non_const_type ("Tpetra::CrsGraph::ptr",
1033  lclNumRows+1);
1034  typename row_map_type::non_const_type::HostMirror h_ptrs =
1035  create_mirror_view (k_ptrs);
1036 
1037  // Pack the row offsets into k_ptrs, by doing a sum-scan of
1038  // the array of valid entry counts per row.
1039  //
1040  // Return value is the total number of entries in the matrix on
1041  // the calling process. It's cheap to compute and useful as a
1042  // sanity check.
1043  const size_t lclTotalNumEntries =
1044  computeOffsetsFromCounts (h_ptrs, numRowEnt_h);
1045 #ifdef HAVE_TPETRA_DEBUG
1046  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1047  (static_cast<size_t> (h_ptrs.dimension_0 ()) != lclNumRows + 1,
1048  std::logic_error, "(DynamicProfile branch) After packing h_ptrs, "
1049  "h_ptrs.dimension_0() = " << h_ptrs.dimension_0 () << " != "
1050  "(lclNumRows+1) = " << (lclNumRows+1) << ".");
1051  {
1052  const size_t h_ptrs_lastEnt = h_ptrs(lclNumRows); // it's a host View
1053  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1054  (h_ptrs_lastEnt != lclTotalNumEntries, std::logic_error,
1055  "(DynamicProfile branch) After packing h_ptrs, h_ptrs(lclNumRows="
1056  << lclNumRows << ") = " << h_ptrs_lastEnt << " != total number "
1057  "of entries on the calling process = " << lclTotalNumEntries << ".");
1058  }
1059 #endif // HAVE_TPETRA_DEBUG
1060 
1061  // Allocate the arrays of packed column indices and values.
1062  k_inds = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries);
1063  k_vals = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
1064 
1065  // We need host views of the above, since 2-D storage lives on host.
1066  typename lclinds_1d_type::HostMirror h_inds = create_mirror_view (k_inds);
1067  typename values_type::HostMirror h_vals = create_mirror_view (k_vals);
1068 
1069  // Pack the column indices and values on the host.
1070  ArrayRCP<Array<LocalOrdinal> > lclInds2D = myGraph_->lclInds2D_;
1071  for (size_t row = 0; row < lclNumRows; ++row) {
1072  const size_t numEnt = numRowEnt_h(row);
1073  std::copy (lclInds2D[row].begin(),
1074  lclInds2D[row].begin() + numEnt,
1075  h_inds.ptr_on_device() + h_ptrs(row));
1076  std::copy (values2D_[row].begin(),
1077  values2D_[row].begin() + numEnt,
1078  h_vals.ptr_on_device() + h_ptrs(row));
1079  }
1080 
1081  // Copy the packed column indices and values to the device.
1082  Kokkos::deep_copy (k_inds, h_inds);
1083  Kokkos::deep_copy (k_vals, h_vals);
1084  // Copy the packed row offsets to the device too.
1085  // We didn't actually need them on device before.
1086  Kokkos::deep_copy (k_ptrs, h_ptrs);
1087  k_ptrs_const = k_ptrs; // const version of k_ptrs
1088 
1089 #ifdef HAVE_TPETRA_DEBUG
1090  // Sanity check of packed row offsets.
1091  if (k_ptrs.dimension_0 () != 0) {
1092  const size_t numOffsets = static_cast<size_t> (k_ptrs.dimension_0 ());
1093  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1094  (numOffsets != lclNumRows + 1, std::logic_error, "(DynamicProfile "
1095  "branch) After copying into k_ptrs, k_ptrs.dimension_0() = " <<
1096  numOffsets << " != (lclNumRows+1) = " << (lclNumRows+1) << ".");
1097 
1098  const auto valToCheck = Details::getEntryOnHost (k_ptrs, numOffsets-1);
1099  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1100  (static_cast<size_t> (valToCheck) != k_vals.dimension_0 (),
1101  std::logic_error, "(DynamicProfile branch) After packing, k_ptrs("
1102  << (numOffsets-1) << ") = " << valToCheck << " != "
1103  "k_vals.dimension_0() = " << k_vals.dimension_0 () << ".");
1104  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1105  (static_cast<size_t> (valToCheck) != k_inds.dimension_0 (),
1106  std::logic_error, "(DynamicProfile branch) After packing, k_ptrs("
1107  << (numOffsets-1) << ") = " << valToCheck << " != "
1108  "k_inds.dimension_0() = " << k_inds.dimension_0 () << ".");
1109  }
1110 #endif // HAVE_TPETRA_DEBUG
1111  }
1112  else if (getProfileType () == StaticProfile) {
1113  // StaticProfile means that the matrix's column indices and
1114  // values are currently stored in a 1-D format, with row offsets
1115  // in k_rowPtrs_ and local column indices in k_lclInds1D_.
1116 
1117  // StaticProfile also means that the graph's array of row
1118  // offsets must already be allocated.
1119  typename Graph::local_graph_type::row_map_type curRowOffsets =
1120  myGraph_->k_rowPtrs_;
1121 
1122 #ifdef HAVE_TPETRA_DEBUG
1123  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1124  (curRowOffsets.dimension_0 () == 0, std::logic_error,
1125  "(StaticProfile branch) curRowOffsets.dimension_0() == 0.");
1126  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1127  (curRowOffsets.dimension_0 () != lclNumRows + 1, std::logic_error,
1128  "(StaticProfile branch) curRowOffsets.dimension_0() = "
1129  << curRowOffsets.dimension_0 () << " != lclNumRows + 1 = "
1130  << (lclNumRows + 1) << ".")
1131  {
1132  const size_t numOffsets = curRowOffsets.dimension_0 ();
1133  const auto valToCheck =
1134  Details::getEntryOnHost (curRowOffsets, numOffsets - 1);
1135  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1136  (numOffsets != 0 &&
1137  myGraph_->k_lclInds1D_.dimension_0 () != valToCheck,
1138  std::logic_error, "(StaticProfile branch) numOffsets = " <<
1139  numOffsets << " != 0 and myGraph_->k_lclInds1D_.dimension_0() = "
1140  << myGraph_->k_lclInds1D_.dimension_0 () << " != curRowOffsets("
1141  << numOffsets << ") = " << valToCheck << ".");
1142  }
1143 #endif // HAVE_TPETRA_DEBUG
1144 
1145  if (myGraph_->nodeNumEntries_ != myGraph_->getNodeAllocationSize ()) {
1146  // The matrix's current 1-D storage is "unpacked." This means
1147  // the row offsets may differ from what the final row offsets
1148  // should be. This could happen, for example, if the user
1149  // specified StaticProfile in the constructor and set an upper
1150  // bound on the number of entries per row, but didn't fill all
1151  // those entries.
1152 #ifdef HAVE_TPETRA_DEBUG
1153  if (curRowOffsets.dimension_0 () != 0) {
1154  const size_t numOffsets =
1155  static_cast<size_t> (curRowOffsets.dimension_0 ());
1156  const auto valToCheck =
1157  Details::getEntryOnHost (curRowOffsets, numOffsets-1);
1158  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1159  (static_cast<size_t> (valToCheck) !=
1160  static_cast<size_t> (k_values1D_.dimension_0 ()),
1161  std::logic_error, "(StaticProfile unpacked branch) Before "
1162  "allocating or packing, curRowOffsets(" << (numOffsets-1) << ") = "
1163  << valToCheck << " != k_values1D_.dimension_0()"
1164  " = " << k_values1D_.dimension_0 () << ".");
1165  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1166  (static_cast<size_t> (valToCheck) !=
1167  static_cast<size_t> (myGraph_->k_lclInds1D_.dimension_0 ()),
1168  std::logic_error, "(StaticProfile unpacked branch) Before "
1169  "allocating or packing, curRowOffsets(" << (numOffsets-1) << ") = "
1170  << valToCheck
1171  << " != myGraph_->k_lclInds1D_.dimension_0() = "
1172  << myGraph_->k_lclInds1D_.dimension_0 () << ".");
1173  }
1174 #endif // HAVE_TPETRA_DEBUG
1175 
1176  // Pack the row offsets into k_ptrs, by doing a sum-scan of
1177  // the array of valid entry counts per row.
1178 
1179  // Total number of entries in the matrix on the calling
1180  // process. We will compute this in the loop below. It's
1181  // cheap to compute and useful as a sanity check.
1182  size_t lclTotalNumEntries = 0;
1183  // This will be a host view of packed row offsets.
1184  typename row_map_type::non_const_type::HostMirror h_ptrs;
1185  {
1186  // Allocate the packed row offsets array. We use a nonconst
1187  // temporary (packedRowOffsets) here, because k_ptrs is
1188  // const. We will assign packedRowOffsets to k_ptrs below.
1189  typename row_map_type::non_const_type
1190  packedRowOffsets ("Tpetra::CrsGraph::ptr", lclNumRows + 1);
1191  typename row_entries_type::const_type numRowEnt_h =
1192  myGraph_->k_numRowEntries_;
1193  // We're computing offsets on device. This function can
1194  // handle numRowEnt_h being a host View.
1195  lclTotalNumEntries =
1196  computeOffsetsFromCounts (packedRowOffsets, numRowEnt_h);
1197  // packedRowOffsets is modifiable; k_ptrs isn't, so we have
1198  // to use packedRowOffsets in the loop above and assign here.
1199  k_ptrs = packedRowOffsets;
1200  k_ptrs_const = k_ptrs;
1201  }
1202 
1203 #ifdef HAVE_TPETRA_DEBUG
1204  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1205  (static_cast<size_t> (k_ptrs.dimension_0 ()) != lclNumRows + 1,
1206  std::logic_error,
1207  "(StaticProfile unpacked branch) After packing k_ptrs, "
1208  "k_ptrs.dimension_0() = " << k_ptrs.dimension_0 () << " != "
1209  "lclNumRows+1 = " << (lclNumRows+1) << ".");
1210  {
1211  const auto valToCheck = Details::getEntryOnHost (k_ptrs, lclNumRows);
1212  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1213  (valToCheck != lclTotalNumEntries, std::logic_error,
1214  "(StaticProfile unpacked branch) After filling k_ptrs, "
1215  "k_ptrs(lclNumRows=" << lclNumRows << ") = " << valToCheck
1216  << " != total number of entries on the calling process = "
1217  << lclTotalNumEntries << ".");
1218  }
1219 #endif // HAVE_TPETRA_DEBUG
1220 
1221  // Allocate the arrays of packed column indices and values.
1222  k_inds = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries);
1223  k_vals = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
1224 
1225  // curRowOffsets (myGraph_->k_rowPtrs_) (???), k_lclInds1D_,
1226  // and k_values1D_ are currently unpacked. Pack them, using
1227  // the packed row offsets array k_ptrs that we created above.
1228  //
1229  // FIXME (mfh 06 Aug 2014) If "Optimize Storage" is false, we
1230  // need to keep around the unpacked row offsets, column
1231  // indices, and values arrays.
1232 
1233  // Pack the column indices from unpacked k_lclInds1D_ into
1234  // packed k_inds. We will replace k_lclInds1D_ below.
1235  typedef pack_functor<typename Graph::local_graph_type::entries_type::non_const_type,
1236  typename Graph::local_graph_type::row_map_type>
1237  inds_packer_type;
1238  inds_packer_type indsPacker (k_inds, myGraph_->k_lclInds1D_,
1239  k_ptrs, curRowOffsets);
1240  typedef typename decltype (k_inds)::execution_space exec_space;
1241  typedef Kokkos::RangePolicy<exec_space, LocalOrdinal> range_type;
1242  Kokkos::parallel_for (range_type (0, lclNumRows), indsPacker);
1243 
1244  // Pack the values from unpacked k_values1D_ into packed
1245  // k_vals. We will replace k_values1D_ below.
1246  typedef pack_functor<values_type, row_map_type> vals_packer_type;
1247  vals_packer_type valsPacker (k_vals, this->k_values1D_,
1248  k_ptrs, curRowOffsets);
1249  Kokkos::parallel_for (range_type (0, lclNumRows), valsPacker);
1250 
1251 #ifdef HAVE_TPETRA_DEBUG
1252  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1253  (k_ptrs.dimension_0 () == 0, std::logic_error,
1254  "(StaticProfile \"Optimize Storage\" = "
1255  "true branch) After packing, k_ptrs.dimension_0() = 0. This "
1256  "probably means that k_rowPtrs_ was never allocated.");
1257  if (k_ptrs.dimension_0 () != 0) {
1258  const size_t numOffsets = static_cast<size_t> (k_ptrs.dimension_0 ());
1259  const auto valToCheck = Details::getEntryOnHost (k_ptrs, numOffsets - 1);
1260  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1261  (static_cast<size_t> (valToCheck) != k_vals.dimension_0 (),
1262  std::logic_error,
1263  "(StaticProfile \"Optimize Storage\"=true branch) After packing, "
1264  "k_ptrs(" << (numOffsets-1) << ") = " << valToCheck <<
1265  " != k_vals.dimension_0() = " << k_vals.dimension_0 () << ".");
1266  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1267  (static_cast<size_t> (valToCheck) != k_inds.dimension_0 (),
1268  std::logic_error,
1269  "(StaticProfile \"Optimize Storage\"=true branch) After packing, "
1270  "k_ptrs(" << (numOffsets-1) << ") = " << valToCheck <<
1271  " != k_inds.dimension_0() = " << k_inds.dimension_0 () << ".");
1272  }
1273 #endif // HAVE_TPETRA_DEBUG
1274  }
1275  else { // We don't have to pack, so just set the pointers.
1276  k_ptrs_const = myGraph_->k_rowPtrs_;
1277  k_inds = myGraph_->k_lclInds1D_;
1278  k_vals = this->k_values1D_;
1279 
1280 #ifdef HAVE_TPETRA_DEBUG
1281  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1282  (k_ptrs_const.dimension_0 () == 0, std::logic_error,
1283  "(StaticProfile \"Optimize Storage\"=false branch) "
1284  "k_ptrs_const.dimension_0() = 0. This probably means that "
1285  "k_rowPtrs_ was never allocated.");
1286  if (k_ptrs_const.dimension_0 () != 0) {
1287  const size_t numOffsets = static_cast<size_t> (k_ptrs_const.dimension_0 ());
1288  const auto valToCheck = Details::getEntryOnHost (k_ptrs_const, numOffsets - 1);
1289  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1290  (static_cast<size_t> (valToCheck) != k_vals.dimension_0 (),
1291  std::logic_error,
1292  "(StaticProfile \"Optimize Storage\"=false branch) "
1293  "k_ptrs_const(" << (numOffsets-1) << ") = " << valToCheck
1294  << " != k_vals.dimension_0() = " << k_vals.dimension_0 () << ".");
1295  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1296  (static_cast<size_t> (valToCheck) != k_inds.dimension_0 (),
1297  std::logic_error,
1298  "(StaticProfile \"Optimize Storage\" = false branch) "
1299  "k_ptrs_const(" << (numOffsets-1) << ") = " << valToCheck
1300  << " != k_inds.dimension_0() = " << k_inds.dimension_0 () << ".");
1301  }
1302 #endif // HAVE_TPETRA_DEBUG
1303  }
1304  }
1305 
1306 #ifdef HAVE_TPETRA_DEBUG
1307  // Extra sanity checks.
1308  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1309  (static_cast<size_t> (k_ptrs_const.dimension_0 ()) != lclNumRows + 1,
1310  std::logic_error, "After packing, k_ptrs_const.dimension_0() = " <<
1311  k_ptrs_const.dimension_0 () << " != lclNumRows+1 = " << (lclNumRows+1)
1312  << ".");
1313  if (k_ptrs_const.dimension_0 () != 0) {
1314  const size_t numOffsets = static_cast<size_t> (k_ptrs_const.dimension_0 ());
1315  const size_t k_ptrs_const_numOffsetsMinus1 =
1316  Details::getEntryOnHost (k_ptrs_const, numOffsets - 1);
1317  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1318  (k_ptrs_const_numOffsetsMinus1 != k_vals.dimension_0 (),
1319  std::logic_error, "After packing, k_ptrs_const(" << (numOffsets-1) <<
1320  ") = " << k_ptrs_const_numOffsetsMinus1 << " != k_vals.dimension_0()"
1321  " = " << k_vals.dimension_0 () << ".");
1322  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1323  (k_ptrs_const_numOffsetsMinus1 != k_inds.dimension_0 (),
1324  std::logic_error, "After packing, k_ptrs_const(" << (numOffsets-1) <<
1325  ") = " << k_ptrs_const_numOffsetsMinus1 << " != k_inds.dimension_0()"
1326  " = " << k_inds.dimension_0 () << ".");
1327  }
1328 #endif // HAVE_TPETRA_DEBUG
1329 
1330  // May we ditch the old allocations for the packed (and otherwise
1331  // "optimized") allocations, later in this routine? Optimize
1332  // storage if the graph is not static, or if the graph already has
1333  // optimized storage.
1334  const bool defaultOptStorage =
1335  ! isStaticGraph () || staticGraph_->isStorageOptimized ();
1336  const bool requestOptimizedStorage =
1337  (! params.is_null () && params->get ("Optimize Storage", defaultOptStorage)) ||
1338  (params.is_null () && defaultOptStorage);
1339 
1340  // The graph has optimized storage when indices are allocated,
1341  // myGraph_->k_numRowEntries_ is empty, and there are more than
1342  // zero rows on this process. It's impossible for the graph to
1343  // have dynamic profile (getProfileType() == DynamicProfile) and
1344  // be optimized (isStorageOptimized()).
1345  if (requestOptimizedStorage) {
1346  // Free the old, unpacked, unoptimized allocations.
1347  // Change the graph from dynamic to static allocation profile
1348 
1349  // Free graph data structures that are only needed for 2-D or
1350  // unpacked 1-D storage.
1351  myGraph_->lclInds2D_ = null; // legacy KokkosClassic 2-D storage
1352  myGraph_->k_numRowEntries_ = row_entries_type ();
1353 
1354  // Free the matrix's 2-D storage.
1355  this->values2D_ = null;
1356 
1357  // Keep the new 1-D packed allocations.
1358  myGraph_->k_rowPtrs_ = k_ptrs_const;
1359  myGraph_->k_lclInds1D_ = k_inds;
1360  this->k_values1D_ = k_vals;
1361 
1362  // Whatever graph was before, it's StaticProfile now.
1363  myGraph_->pftype_ = StaticProfile;
1364  myGraph_->storageStatus_ = Details::STORAGE_1D_PACKED;
1365  this->storageStatus_ = Details::STORAGE_1D_PACKED;
1366  }
1367 
1368  // Make the local graph, using the arrays of row offsets and
1369  // column indices that we built above. The local graph should be
1370  // null, but we delete it first so that any memory can be freed
1371  // before we allocate the new one.
1372  //
1373  // FIXME (mfh 06,28 Aug 2014) It would make more sense for
1374  // Tpetra::CrsGraph to have a protected method that accepts k_inds
1375  // and k_ptrs, and creates the local graph lclGraph_.
1376  myGraph_->lclGraph_ =
1377  typename Graph::local_graph_type (k_inds, k_ptrs_const);
1378 
1379  // Make the local matrix, using the local graph and vals array.
1380  lclMatrix_ = local_matrix_type ("Tpetra::CrsMatrix::lclMatrix_",
1381  getNodeNumCols (), k_vals,
1382  myGraph_->lclGraph_);
1383  }
1384 
1385  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1386  void
1388  fillLocalMatrix (const Teuchos::RCP<Teuchos::ParameterList>& params)
1389  {
1390  using ::Tpetra::Details::ProfilingRegion;
1391  using Kokkos::create_mirror_view;
1392  using Teuchos::ArrayRCP;
1393  using Teuchos::Array;
1394  using Teuchos::null;
1395  using Teuchos::RCP;
1396  using Teuchos::rcp;
1397  typedef LocalOrdinal LO;
1398  typedef typename Graph::local_graph_type::row_map_type row_map_type;
1399  typedef typename row_map_type::non_const_type non_const_row_map_type;
1400  typedef typename local_matrix_type::values_type values_type;
1401 #ifdef HAVE_TPETRA_DEBUG
1402  const char tfecfFuncName[] = "fillLocalMatrix (called from fillComplete): ";
1403 #endif // HAVE_TPETRA_DEBUG
1404  ProfilingRegion regionFLM ("Tpetra::CrsGraph::fillLocalMatrix");
1405 
1406  const size_t lclNumRows = getNodeNumRows();
1407  const map_type& rowMap = * (getRowMap ());
1408  RCP<node_type> node = rowMap.getNode ();
1409 
1410  // The goals of this routine are first, to allocate and fill
1411  // packed 1-D storage (see below for an explanation) in the vals
1412  // array, and second, to give vals to the local matrix and
1413  // finalize the local matrix. We only need k_ptrs, the packed 1-D
1414  // row offsets, within the scope of this routine, since we're only
1415  // filling the local matrix here (use fillLocalGraphAndMatrix() to
1416  // fill both the graph and the matrix at the same time).
1417 
1418  // get data from staticGraph_
1419  ArrayRCP<Array<LO> > lclInds2D = staticGraph_->lclInds2D_;
1420  size_t nodeNumEntries = staticGraph_->nodeNumEntries_;
1421  size_t nodeNumAllocated = staticGraph_->getNodeAllocationSize ();
1422  row_map_type k_rowPtrs_ = staticGraph_->lclGraph_.row_map;
1423 
1424  row_map_type k_ptrs; // "packed" row offsets array
1425  values_type k_vals; // "packed" values array
1426 
1427  // May we ditch the old allocations for the packed (and otherwise
1428  // "optimized") allocations, later in this routine? Request
1429  // optimized storage by default.
1430  bool requestOptimizedStorage = true;
1431  const bool default_OptimizeStorage =
1432  ! isStaticGraph () || staticGraph_->isStorageOptimized ();
1433  if (! params.is_null () && ! params->get ("Optimize Storage", default_OptimizeStorage)) {
1434  requestOptimizedStorage = false;
1435  }
1436  // If we're not allowed to change a static graph, then we can't
1437  // change the storage of the matrix, either. This means that if
1438  // the graph's storage isn't already optimized, we can't optimize
1439  // the matrix's storage either. Check and give warning, as
1440  // appropriate.
1441  if (! staticGraph_->isStorageOptimized () && requestOptimizedStorage) {
1442  TPETRA_ABUSE_WARNING(true, std::runtime_error,
1443  "You requested optimized storage by setting the"
1444  "\"Optimize Storage\" flag to \"true\" in the parameter list, or by virtue"
1445  "of default behavior. However, the associated CrsGraph was filled separately"
1446  "and requested not to optimize storage. Therefore, the CrsMatrix cannot"
1447  "optimize storage.");
1448  requestOptimizedStorage = false;
1449  }
1450 
1451  typedef decltype (staticGraph_->k_numRowEntries_) row_entries_type;
1452 
1453  if (getProfileType() == DynamicProfile) {
1454  // Pack 2-D storage (DynamicProfile) into 1-D packed storage.
1455  //
1456  // DynamicProfile means that the matrix's values are currently
1457  // stored in a 2-D "unpacked" format, in the array-of-arrays
1458  // values2D_. We allocate 1-D storage and then copy from 2-D
1459  // storage in values2D_ into 1-D storage in k_vals. Since we're
1460  // only allocating the local matrix here, not the local graph,
1461  // we don't need to keep the row offsets array, but we do need
1462  // it here temporarily in order to convert to 1-D storage. (The
1463  // allocStorage() function needs it.) We'll free ptrs later in
1464  // this method.
1465  //
1466  // FIXME (mfh 08 Aug 2014) If we're in this method, then the
1467  // graph should already have packed 1-D storage. Why can't we
1468  // just use the graph's current row offsets array?
1469 
1470  // Pack the row offsets into k_ptrs, by doing a sum-scan of
1471  // the array of valid entry counts per row.
1472  //
1473  // Total number of entries in the matrix on the calling
1474  // process. We will compute this in the loop below. It's
1475  // cheap to compute and useful as a sanity check.
1476  size_t lclTotalNumEntries = 0;
1477  // This will be a host view of packed row offsets.
1478  typename non_const_row_map_type::HostMirror h_ptrs;
1479 
1480  typename row_entries_type::const_type numRowEnt_h =
1481  staticGraph_->k_numRowEntries_;
1482  {
1483  non_const_row_map_type packedRowOffsets ("Tpetra::CrsGraph::ptr",
1484  lclNumRows+1);
1485  // NOTE (mfh 27 Jun 2016) We need h_ptrs on host anyway, so
1486  // let's just compute offsets on host.
1487  h_ptrs = create_mirror_view (packedRowOffsets);
1489  lclTotalNumEntries = computeOffsetsFromCounts (h_ptrs, numRowEnt_h);
1490  Kokkos::deep_copy (packedRowOffsets, h_ptrs);
1491  k_ptrs = packedRowOffsets;
1492  }
1493 
1494 #ifdef HAVE_TPETRA_DEBUG
1495  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1496  (static_cast<size_t> (k_ptrs.dimension_0 ()) != lclNumRows + 1,
1497  std::logic_error, "In DynamicProfile branch, after packing k_ptrs, "
1498  "k_ptrs.dimension_0() = " << k_ptrs.dimension_0 () << " != "
1499  "(lclNumRows+1) = " << (lclNumRows+1) << ".");
1500  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1501  (static_cast<size_t> (h_ptrs.dimension_0 ()) != lclNumRows + 1,
1502  std::logic_error, "In DynamicProfile branch, after packing h_ptrs, "
1503  "h_ptrs.dimension_0() = " << h_ptrs.dimension_0 () << " != "
1504  "(lclNumRows+1) = " << (lclNumRows+1) << ".");
1505  {
1506  const auto valToCheck = Details::getEntryOnHost (k_ptrs, lclNumRows);
1507  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1508  (static_cast<size_t> (valToCheck) != lclTotalNumEntries,
1509  std::logic_error, "(DynamicProfile branch) After packing k_ptrs, "
1510  "k_ptrs(lclNumRows = " << lclNumRows << ") = " << valToCheck
1511  << " != total number of entries on the calling process = "
1512  << lclTotalNumEntries << ".");
1513  }
1514 #endif // HAVE_TPETRA_DEBUG
1515 
1516  // Allocate the array of packed values.
1517  k_vals = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
1518  // We need a host view of the above, since 2-D storage lives on host.
1519  typename values_type::HostMirror h_vals = create_mirror_view (k_vals);
1520  // Pack the values on the host.
1521  for (size_t lclRow = 0; lclRow < lclNumRows; ++lclRow) {
1522  const size_t numEnt = numRowEnt_h(lclRow);
1523  std::copy (values2D_[lclRow].begin(),
1524  values2D_[lclRow].begin() + numEnt,
1525  h_vals.ptr_on_device() + h_ptrs(lclRow));
1526  }
1527  // Copy the packed values to the device.
1528  Kokkos::deep_copy (k_vals, h_vals);
1529 
1530 #ifdef HAVE_TPETRA_DEBUG
1531  // Sanity check of packed row offsets.
1532  if (k_ptrs.dimension_0 () != 0) {
1533  const size_t numOffsets = static_cast<size_t> (k_ptrs.dimension_0 ());
1534  const auto valToCheck =
1535  Details::getEntryOnHost (k_ptrs, numOffsets - 1);
1536  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1537  (static_cast<size_t> (valToCheck) != k_vals.dimension_0 (),
1538  std::logic_error, "(DynamicProfile branch) After packing, k_ptrs("
1539  << (numOffsets-1) << ") = " << valToCheck << " != "
1540  "k_vals.dimension_0() = " << k_vals.dimension_0 () << ".");
1541  }
1542 #endif // HAVE_TPETRA_DEBUG
1543  }
1544  else if (getProfileType () == StaticProfile) {
1545  // StaticProfile means that the matrix's values are currently
1546  // stored in a 1-D format. However, this format is "unpacked";
1547  // it doesn't necessarily have the same row offsets as indicated
1548  // by the ptrs array returned by allocRowPtrs. This could
1549  // happen, for example, if the user specified StaticProfile in
1550  // the constructor and fixed the number of matrix entries in
1551  // each row, but didn't fill all those entries.
1552  //
1553  // As above, we don't need to keep the "packed" row offsets
1554  // array ptrs here, but we do need it here temporarily, so we
1555  // have to allocate it. We'll free ptrs later in this method.
1556  //
1557  // Note that this routine checks whether storage has already
1558  // been packed. This is a common case for solution of nonlinear
1559  // PDEs using the finite element method, as long as the
1560  // structure of the sparse matrix does not change between linear
1561  // solves.
1562  if (nodeNumEntries != nodeNumAllocated) {
1563  // We have to pack the 1-D storage, since the user didn't fill
1564  // up all requested storage.
1565  non_const_row_map_type tmpk_ptrs ("Tpetra::CrsGraph::ptr",
1566  lclNumRows+1);
1567  // Total number of entries in the matrix on the calling
1568  // process. We will compute this in the loop below. It's
1569  // cheap to compute and useful as a sanity check.
1570  size_t lclTotalNumEntries = 0;
1571  k_ptrs = tmpk_ptrs;
1572  {
1573  typename row_entries_type::const_type numRowEnt_d =
1574  staticGraph_->k_numRowEntries_;
1576  // This function can handle the counts being a host View.
1577  lclTotalNumEntries = computeOffsetsFromCounts (tmpk_ptrs, numRowEnt_d);
1578  }
1579 
1580  // Allocate the "packed" values array.
1581  // It has exactly the right number of entries.
1582  k_vals = values_type ("Tpetra::CrsMatrix::val", lclTotalNumEntries);
1583 
1584  // Pack k_values1D_ into k_vals. We will replace k_values1D_ below.
1585  typedef pack_functor<values_type, row_map_type> packer_type;
1586  packer_type valsPacker (k_vals, k_values1D_, tmpk_ptrs, k_rowPtrs_);
1587 
1588  typedef typename decltype (k_vals)::execution_space exec_space;
1589  typedef Kokkos::RangePolicy<exec_space, LocalOrdinal> range_type;
1590  Kokkos::parallel_for (range_type (0, lclNumRows), valsPacker);
1591  }
1592  else { // We don't have to pack, so just set the pointer.
1593  k_vals = k_values1D_;
1594  }
1595  }
1596 
1597  // May we ditch the old allocations for the packed one?
1598  if (requestOptimizedStorage) {
1599  // The user requested optimized storage, so we can dump the
1600  // unpacked 2-D and 1-D storage, and keep the packed storage.
1601  values2D_ = null;
1602  k_values1D_ = k_vals;
1603  this->storageStatus_ = Details::STORAGE_1D_PACKED;
1604  }
1605 
1606  // Build the local sparse matrix object. At this point, the local
1607  // matrix certainly has a column Map. Remember that the local
1608  // matrix's number of columns comes from the column Map, not the
1609  // domain Map.
1610  lclMatrix_ = local_matrix_type ("Tpetra::CrsMatrix::lclMatrix_",
1611  getColMap ()->getNodeNumElements (),
1612  k_vals,
1613  staticGraph_->getLocalGraph ());
1614  }
1615 
1616  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1617  void
1619  insertLocalValues (const LocalOrdinal localRow,
1620  const Teuchos::ArrayView<const LocalOrdinal>& indices,
1621  const Teuchos::ArrayView<const Scalar>& values)
1622  {
1623  using Teuchos::Array;
1624  using Teuchos::ArrayView;
1625  using Teuchos::av_reinterpret_cast;
1626  using Teuchos::toString;
1627  using std::endl;
1628  const char tfecfFuncName[] = "insertLocalValues";
1629 
1630  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillActive (), std::runtime_error,
1631  ": Fill is not active. After calling fillComplete, you must call "
1632  "resumeFill before you may insert entries into the matrix again.");
1633  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isStaticGraph (), std::runtime_error,
1634  " cannot insert indices with static graph; use replaceLocalValues() instead.");
1635  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(myGraph_->isGloballyIndexed(),
1636  std::runtime_error, ": graph indices are global; use insertGlobalValues().");
1637  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! hasColMap (), std::runtime_error,
1638  " cannot insert local indices without a column map.");
1639  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(values.size() != indices.size(),
1640  std::runtime_error, ": values.size() must equal indices.size().");
1641  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1642  ! getRowMap()->isNodeLocalElement(localRow), std::runtime_error,
1643  ": Local row index " << localRow << " does not belong to this process.");
1644 
1645  if (! myGraph_->indicesAreAllocated ()) {
1646  try {
1647  allocateValues (LocalIndices, GraphNotYetAllocated);
1648  }
1649  catch (std::exception& e) {
1650  TEUCHOS_TEST_FOR_EXCEPTION(
1651  true, std::runtime_error, "Tpetra::CrsMatrix::insertLocalValues: "
1652  "allocateValues(LocalIndices,GraphNotYetAllocated) threw an "
1653  "exception: " << e.what ());
1654  }
1655  }
1656 
1657  const size_t numEntriesToAdd = static_cast<size_t> (indices.size ());
1658 #ifdef HAVE_TPETRA_DEBUG
1659  // In a debug build, if the matrix has a column Map, test whether
1660  // any of the given column indices are not in the column Map.
1661  // Keep track of the invalid column indices so we can tell the
1662  // user about them.
1663  if (hasColMap ()) {
1664  const map_type& colMap = * (getColMap ());
1665  Array<LocalOrdinal> badColInds;
1666  bool allInColMap = true;
1667  for (size_t k = 0; k < numEntriesToAdd; ++k) {
1668  if (! colMap.isNodeLocalElement (indices[k])) {
1669  allInColMap = false;
1670  badColInds.push_back (indices[k]);
1671  }
1672  }
1673  if (! allInColMap) {
1674  std::ostringstream os;
1675  os << "Tpetra::CrsMatrix::insertLocalValues: You attempted to insert "
1676  "entries in owned row " << localRow << ", at the following column "
1677  "indices: " << toString (indices) << "." << endl;
1678  os << "Of those, the following indices are not in the column Map on "
1679  "this process: " << toString (badColInds) << "." << endl << "Since "
1680  "the matrix has a column Map already, it is invalid to insert "
1681  "entries at those locations.";
1682  TEUCHOS_TEST_FOR_EXCEPTION(! allInColMap, std::invalid_argument, os.str ());
1683  }
1684  }
1685 #endif // HAVE_TPETRA_DEBUG
1686 
1687 #ifdef HAVE_TPETRA_DEBUG
1688  RowInfo rowInfo;
1689  try {
1690  rowInfo = myGraph_->getRowInfo (localRow);
1691  } catch (std::exception& e) {
1692  TEUCHOS_TEST_FOR_EXCEPTION(
1693  true, std::runtime_error, "Tpetra::CrsMatrix::insertLocalValues: "
1694  "myGraph_->getRowInfo threw an exception: " << e.what ());
1695  }
1696 #else
1697  RowInfo rowInfo = myGraph_->getRowInfo (localRow);
1698 #endif // HAVE_TPETRA_DEBUG
1699 
1700  const size_t curNumEntries = rowInfo.numEntries;
1701  const size_t newNumEntries = curNumEntries + numEntriesToAdd;
1702  if (newNumEntries > rowInfo.allocSize) {
1703  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1704  getProfileType() == StaticProfile, std::runtime_error,
1705  ": new indices exceed statically allocated graph structure.");
1706 
1707  // Make space for the new matrix entries.
1708  try {
1709  rowInfo = myGraph_->template updateLocalAllocAndValues<impl_scalar_type> (rowInfo,
1710  newNumEntries,
1711  values2D_[localRow]);
1712  } catch (std::exception& e) {
1713  TEUCHOS_TEST_FOR_EXCEPTION(
1714  true, std::runtime_error, "Tpetra::CrsMatrix::insertLocalValues: "
1715  "myGraph_->updateGlobalAllocAndValues threw an exception: "
1716  << e.what ());
1717  }
1718  }
1719  typename Graph::SLocalGlobalViews indsView;
1720  indsView.linds = indices;
1721 
1722 #ifdef HAVE_TPETRA_DEBUG
1723  ArrayView<impl_scalar_type> valsView;
1724  try {
1725  valsView = this->getViewNonConst (rowInfo);
1726  } catch (std::exception& e) {
1727  TEUCHOS_TEST_FOR_EXCEPTION(
1728  true, std::runtime_error, "Tpetra::CrsMatrix::insertLocalValues: "
1729  "getViewNonConst threw an exception: " << e.what ());
1730  }
1731 #else
1732  ArrayView<impl_scalar_type> valsView = this->getViewNonConst (rowInfo);
1733 #endif // HAVE_TPETRA_DEBUG
1734 
1735  ArrayView<const impl_scalar_type> valsIn =
1736  av_reinterpret_cast<const impl_scalar_type> (values);
1737  try {
1738  myGraph_->template insertIndicesAndValues<impl_scalar_type> (rowInfo, indsView,
1739  valsView, valsIn,
1740  LocalIndices,
1741  LocalIndices);
1742  } catch (std::exception& e) {
1743  TEUCHOS_TEST_FOR_EXCEPTION(
1744  true, std::runtime_error, "Tpetra::CrsMatrix::insertLocalValues: "
1745  "myGraph_->insertIndicesAndValues threw an exception: "
1746  << e.what ());
1747  }
1748 
1749 #ifdef HAVE_TPETRA_DEBUG
1750  const size_t chkNewNumEntries = myGraph_->getNumEntriesInLocalRow (localRow);
1751  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1752  chkNewNumEntries != newNumEntries, std::logic_error,
1753  ": The row should have " << newNumEntries << " entries after insert, but "
1754  "instead has " << chkNewNumEntries << ". Please report this bug to the "
1755  "Tpetra developers.");
1756  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isLocallyIndexed(), std::logic_error,
1757  ": At end of insertLocalValues(), this CrsMatrix is not locally indexed. "
1758  "Please report this bug to the Tpetra developers.");
1759 #endif // HAVE_TPETRA_DEBUG
1760  }
1761 
1762  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1763  void
1765  insertLocalValues (const LocalOrdinal localRow,
1766  const LocalOrdinal numEnt,
1767  const Scalar vals[],
1768  const LocalOrdinal cols[])
1769  {
1770  Teuchos::ArrayView<const LocalOrdinal> colsT (cols, numEnt);
1771  Teuchos::ArrayView<const Scalar> valsT (vals, numEnt);
1772  this->insertLocalValues (localRow, colsT, valsT);
1773  }
1774 
1775  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1776  void
1778  insertLocalValuesFiltered (const LocalOrdinal localRow,
1779  const Teuchos::ArrayView<const LocalOrdinal>& indices,
1780  const Teuchos::ArrayView<const Scalar>& values)
1781  {
1782  using Teuchos::Array;
1783  using Teuchos::ArrayView;
1784  using Teuchos::av_reinterpret_cast;
1785  const char tfecfFuncName[] = "insertLocalValues: ";
1786 
1787  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillActive (), std::runtime_error,
1788  "Requires that fill is active.");
1789  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isStaticGraph (), std::runtime_error,
1790  "Cannot insert indices with static graph; use replaceLocalValues() instead.");
1791  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(myGraph_->isGloballyIndexed(),
1792  std::runtime_error, "Graph indices are global; use insertGlobalValues().");
1793  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1794  ! hasColMap (), std::runtime_error, "The matrix has no column Map yet, "
1795  "so you cannot insert local indices. If you created the matrix without "
1796  "a column Map (or without a fill-complete graph), you must call "
1797  "fillComplete to create the column Map, before you may work with local "
1798  "indices.");
1799  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1800  values.size () != indices.size (), std::runtime_error, "values.size() = "
1801  << values.size () << " != indices.size() = " << indices.size ()<< ".");
1802  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1803  ! getRowMap()->isNodeLocalElement (localRow), std::runtime_error,
1804  "Local row index " << localRow << " does not belong to this process.");
1805  if (! myGraph_->indicesAreAllocated ()) {
1806  allocateValues (LocalIndices, GraphNotYetAllocated);
1807  }
1808  // Use the graph to filter incoming entries whose column indices
1809  // aren't in the column Map.
1810  Array<LocalOrdinal> f_inds (indices);
1811  ArrayView<const impl_scalar_type> valsIn =
1812  av_reinterpret_cast<const impl_scalar_type> (values);
1813  Array<impl_scalar_type> f_vals (valsIn);
1814  const size_t numFilteredEntries =
1815  myGraph_->template filterLocalIndicesAndValues<impl_scalar_type> (f_inds (),
1816  f_vals ());
1817  if (numFilteredEntries > 0) {
1818  RowInfo rowInfo = myGraph_->getRowInfo (localRow);
1819  const size_t curNumEntries = rowInfo.numEntries;
1820  const size_t newNumEntries = curNumEntries + numFilteredEntries;
1821  if (newNumEntries > rowInfo.allocSize) {
1822  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1823  getProfileType () == StaticProfile, std::runtime_error,
1824  ": new indices exceed statically allocated graph structure. "
1825  "newNumEntries (" << newNumEntries << " > rowInfo.allocSize ("
1826  << rowInfo.allocSize << ").");
1827  // Make space for the new matrix entries.
1828  rowInfo =
1829  myGraph_->template updateLocalAllocAndValues<impl_scalar_type> (rowInfo,
1830  newNumEntries,
1831  values2D_[localRow]);
1832  }
1833  typename Graph::SLocalGlobalViews inds_view;
1834  inds_view.linds = f_inds (0, numFilteredEntries);
1835  myGraph_->template insertIndicesAndValues<impl_scalar_type> (rowInfo, inds_view,
1836  this->getViewNonConst (rowInfo),
1837  f_vals, LocalIndices,
1838  LocalIndices);
1839 #ifdef HAVE_TPETRA_DEBUG
1840  const size_t chkNewNumEntries = myGraph_->getNumEntriesInLocalRow (localRow);
1841  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(chkNewNumEntries != newNumEntries,
1842  std::logic_error, ": Internal logic error. Please contact Tpetra team.");
1843 #endif // HAVE_TPETRA_DEBUG
1844  }
1845 #ifdef HAVE_TPETRA_DEBUG
1846  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isLocallyIndexed(), std::logic_error,
1847  ": At end of insertLocalValues(), this CrsMatrix is not locally indexed. "
1848  "Please report this bug to the Tpetra developers.");
1849 #endif // HAVE_TPETRA_DEBUG
1850  }
1851 
1852 
1853  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
1854  void
1856  insertGlobalValues (const GlobalOrdinal globalRow,
1857  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
1858  const Teuchos::ArrayView<const Scalar>& values)
1859  {
1860  using Teuchos::Array;
1861  using Teuchos::ArrayView;
1862  using Teuchos::av_reinterpret_cast;
1863  using Teuchos::toString;
1864  using std::endl;
1865  typedef LocalOrdinal LO;
1866  typedef GlobalOrdinal GO;
1867  typedef typename ArrayView<const GO>::size_type size_type;
1868  const char tfecfFuncName[] = "insertGlobalValues: ";
1869 
1870 #ifdef HAVE_TPETRA_DEBUG
1871  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1872  values.size () != indices.size (), std::runtime_error,
1873  "values.size() = " << values.size() << " != indices.size() = "
1874  << indices.size() << ".");
1875 #endif // HAVE_TPETRA_DEBUG
1876 
1877  const LO localRow = getRowMap ()->getLocalElement (globalRow);
1878 
1879  if (localRow == OTL::invalid ()) { // globalRow _not_ owned by calling process
1880  insertNonownedGlobalValues (globalRow, indices, values);
1881  }
1882  else { // globalRow _is_ owned by calling process
1883  if (this->isStaticGraph ()) {
1884  // Uh oh! Not allowed to insert into owned rows in that case.
1885  std::ostringstream err;
1886  const int myRank = getRowMap ()->getComm ()->getRank ();
1887  const int numProcs = getRowMap ()->getComm ()->getSize ();
1888 
1889  err << "The matrix was constructed with a constant (\"static\") graph, "
1890  "yet the given global row index " << globalRow << " is in the row "
1891  "Map on the calling process (with rank " << myRank << ", of " <<
1892  numProcs << " process(es)). In this case, you may not insert new "
1893  "entries into rows owned by the calling process.";
1894 
1895  if (! getRowMap ()->isNodeGlobalElement (globalRow)) {
1896  err << " Furthermore, GID->LID conversion with the row Map claims that "
1897  "the global row index is owned on the calling process, yet "
1898  "getRowMap()->isNodeGlobalElement(globalRow) returns false. That's"
1899  " weird! This might indicate a Map bug. Please report this to the"
1900  " Tpetra developers.";
1901  }
1902  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1903  this->isStaticGraph (), std::runtime_error, err.str ());
1904  }
1905 
1906  if (! myGraph_->indicesAreAllocated ()) {
1907  try {
1908  allocateValues (GlobalIndices, GraphNotYetAllocated);
1909  }
1910  catch (std::exception& e) {
1911  TEUCHOS_TEST_FOR_EXCEPTION(
1912  true, std::runtime_error, "Tpetra::CrsMatrix::insertGlobalValues: "
1913  "allocateValues(GlobalIndices,GraphNotYetAllocated) threw an "
1914  "exception: " << e.what ());
1915  }
1916  }
1917 
1918  const size_type numEntriesToInsert = indices.size ();
1919  // If the matrix has a column Map, check at this point whether
1920  // the column indices belong to the column Map.
1921  //
1922  // FIXME (mfh 16 May 2013) We may want to consider deferring the
1923  // test to the CrsGraph method, since it may have to do this
1924  // anyway.
1925  if (hasColMap ()) {
1926  const map_type& colMap = * (getColMap ());
1927  // In a debug build, keep track of the nonowned ("bad") column
1928  // indices, so that we can display them in the exception
1929  // message. In a release build, just ditch the loop early if
1930  // we encounter a nonowned column index.
1931 #ifdef HAVE_TPETRA_DEBUG
1932  Array<GO> badColInds;
1933 #endif // HAVE_TPETRA_DEBUG
1934  bool allInColMap = true;
1935  for (size_type k = 0; k < numEntriesToInsert; ++k) {
1936  if (! colMap.isNodeGlobalElement (indices[k])) {
1937  allInColMap = false;
1938 #ifdef HAVE_TPETRA_DEBUG
1939  badColInds.push_back (indices[k]);
1940 #else
1941  break;
1942 #endif // HAVE_TPETRA_DEBUG
1943  }
1944  }
1945  if (! allInColMap) {
1946  std::ostringstream os;
1947  os << "You attempted to insert entries in owned row " << globalRow
1948  << ", at the following column indices: " << toString (indices)
1949  << "." << endl;
1950 #ifdef HAVE_TPETRA_DEBUG
1951  os << "Of those, the following indices are not in the column Map on "
1952  "this process: " << toString (badColInds) << "." << endl << "Since "
1953  "the matrix has a column Map already, it is invalid to insert "
1954  "entries at those locations.";
1955 #else
1956  os << "At least one of those indices is not in the column Map on this "
1957  "process." << endl << "It is invalid to insert into columns not in "
1958  "the column Map on the process that owns the row.";
1959 #endif // HAVE_TPETRA_DEBUG
1960  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1961  ! allInColMap, std::invalid_argument, os.str ());
1962  }
1963  }
1964 
1965  typename Graph::SLocalGlobalViews inds_view;
1966  ArrayView<const impl_scalar_type> vals_view;
1967 
1968  inds_view.ginds = indices;
1969  vals_view = av_reinterpret_cast<const impl_scalar_type> (values);
1970 
1971 #ifdef HAVE_TPETRA_DEBUG
1972  RowInfo rowInfo;
1973  try {
1974  rowInfo = myGraph_->getRowInfo (localRow);
1975  } catch (std::exception& e) {
1976  TEUCHOS_TEST_FOR_EXCEPTION(
1977  true, std::runtime_error, "myGraph_->getRowInfo(localRow=" << localRow
1978  << ") threw an exception: " << e.what ());
1979  }
1980 #else
1981  RowInfo rowInfo = myGraph_->getRowInfo (localRow);
1982 #endif // HAVE_TPETRA_DEBUG
1983 
1984  const size_t curNumEntries = rowInfo.numEntries;
1985  const size_t newNumEntries =
1986  curNumEntries + static_cast<size_t> (numEntriesToInsert);
1987  if (newNumEntries > rowInfo.allocSize) {
1988  TEUCHOS_TEST_FOR_EXCEPTION(
1989  getProfileType () == StaticProfile && newNumEntries > rowInfo.allocSize,
1990  std::runtime_error, "Tpetra::CrsMatrix::insertGlobalValues: new "
1991  "indices exceed statically allocated graph structure. curNumEntries"
1992  " (" << curNumEntries << ") + numEntriesToInsert (" <<
1993  numEntriesToInsert << ") > allocSize (" << rowInfo.allocSize << ").");
1994 
1995  // Update allocation only as much as necessary
1996  try {
1997  rowInfo =
1998  myGraph_->template updateGlobalAllocAndValues<impl_scalar_type> (rowInfo,
1999  newNumEntries,
2000  values2D_[localRow]);
2001  } catch (std::exception& e) {
2002  TEUCHOS_TEST_FOR_EXCEPTION(
2003  true, std::runtime_error, "myGraph_->updateGlobalAllocAndValues"
2004  "(...) threw an exception: " << e.what ());
2005  }
2006  }
2007  try {
2008  if (isGloballyIndexed ()) {
2009  // lg=GlobalIndices, I=GlobalIndices means the method calls
2010  // getGlobalViewNonConst() and does direct copying, which
2011  // should be reasonably fast.
2012  myGraph_->template insertIndicesAndValues<impl_scalar_type> (rowInfo, inds_view,
2013  this->getViewNonConst (rowInfo),
2014  vals_view,
2015  GlobalIndices, GlobalIndices);
2016  }
2017  else {
2018  // lg=GlobalIndices, I=LocalIndices means the method calls
2019  // the Map's getLocalElement() method once per entry to
2020  // insert. This may be slow.
2021  myGraph_->template insertIndicesAndValues<impl_scalar_type> (rowInfo, inds_view,
2022  this->getViewNonConst (rowInfo),
2023  vals_view,
2024  GlobalIndices, LocalIndices);
2025  }
2026  }
2027  catch (std::exception& e) {
2028  TEUCHOS_TEST_FOR_EXCEPTION(
2029  true, std::runtime_error, "myGraph_->insertIndicesAndValues(...) "
2030  "threw an exception: " << e.what ());
2031  }
2032 
2033 #ifdef HAVE_TPETRA_DEBUG
2034  const size_t chkNewNumEntries = myGraph_->getNumEntriesInLocalRow (localRow);
2035  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(chkNewNumEntries != newNumEntries,
2036  std::logic_error, ": There should be a total of " << newNumEntries
2037  << " entries in the row, but the graph now reports " << chkNewNumEntries
2038  << " entries. Please report this bug to the Tpetra developers.");
2039 #endif // HAVE_TPETRA_DEBUG
2040  }
2041  }
2042 
2043 
2044  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2045  void
2047  insertGlobalValues (const GlobalOrdinal globalRow,
2048  const LocalOrdinal numEnt,
2049  const Scalar vals[],
2050  const GlobalOrdinal inds[])
2051  {
2052  Teuchos::ArrayView<const GlobalOrdinal> indsT (inds, numEnt);
2053  Teuchos::ArrayView<const Scalar> valsT (vals, numEnt);
2054  this->insertGlobalValues (globalRow, indsT, valsT);
2055  }
2056 
2057 
2058  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2059  void
2061  insertGlobalValuesFiltered (const GlobalOrdinal globalRow,
2062  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2063  const Teuchos::ArrayView<const Scalar>& values)
2064  {
2065  using Teuchos::Array;
2066  using Teuchos::ArrayView;
2067  using Teuchos::av_reinterpret_cast;
2068  typedef LocalOrdinal LO;
2069  typedef GlobalOrdinal GO;
2070  typedef impl_scalar_type ST;
2071  const char tfecfFuncName[] = "insertGlobalValuesFiltered: ";
2072 
2073  // mfh 14 Dec 2012: Defer test for static graph until we know that
2074  // globalRow is in the row Map. If it's not in the row Map, it
2075  // doesn't matter whether or not the graph is static; the data
2076  // just get stashed for later use by globalAssemble().
2077  //
2078  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2079  // isStaticGraph(), std::runtime_error,
2080  // ": matrix was constructed with static graph. Cannot insert new entries.");
2081 #ifdef HAVE_TPETRA_DEBUG
2082  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2083  values.size () != indices.size (), std::runtime_error,
2084  "values.size() = " << values.size() << " != indices.size() = "
2085  << indices.size() << ".");
2086 #endif // HAVE_TPETRA_DEBUG
2087 
2088  ArrayView<const ST> valsIn = av_reinterpret_cast<const ST> (values);
2089  const LO lrow = getRowMap ()->getLocalElement (globalRow);
2090 
2091  if (lrow != Teuchos::OrdinalTraits<LO>::invalid ()) { // globalRow is in our row Map.
2092  // If the matrix has a static graph, this process is now allowed
2093  // to insert into rows it owns.
2094  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2095  this->isStaticGraph (), std::runtime_error,
2096  "The matrix was constructed with a static graph. In that case, "
2097  "it is forbidden to insert new entries into rows owned by the "
2098  "calling process.");
2099  if (! myGraph_->indicesAreAllocated ()) {
2100  allocateValues (GlobalIndices, GraphNotYetAllocated);
2101  }
2102  typename Graph::SLocalGlobalViews inds_view;
2103  ArrayView<const ST> vals_view;
2104 
2105  // We have to declare these Arrays here rather than in the
2106  // hasColMap() if branch, so that views to them will remain
2107  // valid for the whole scope.
2108  Array<GO> filtered_indices;
2109  Array<ST> filtered_values;
2110  if (hasColMap ()) { // We have a column Map.
2111  // Use column Map to filter the indices and corresponding
2112  // values, so that we only insert entries into columns we own.
2113  filtered_indices.assign (indices.begin (), indices.end ());
2114  filtered_values.assign (valsIn.begin (), valsIn.end ());
2115  const size_t numFilteredEntries =
2116  myGraph_->template filterGlobalIndicesAndValues<ST> (filtered_indices (),
2117  filtered_values ());
2118  inds_view.ginds = filtered_indices (0, numFilteredEntries);
2119  vals_view = filtered_values (0, numFilteredEntries);
2120  }
2121  else { // we don't have a column Map.
2122  inds_view.ginds = indices;
2123  vals_view = valsIn;
2124  }
2125  const size_t numFilteredEntries = vals_view.size ();
2126  // add the new indices and values
2127  if (numFilteredEntries > 0) {
2128  RowInfo rowInfo = myGraph_->getRowInfo (lrow);
2129  const size_t curNumEntries = rowInfo.numEntries;
2130  const size_t newNumEntries = curNumEntries + numFilteredEntries;
2131  if (newNumEntries > rowInfo.allocSize) {
2132  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2133  getProfileType () == StaticProfile, std::runtime_error,
2134  "New indices exceed statically allocated graph structure.");
2135 
2136  // Update allocation only as much as necessary
2137  rowInfo = myGraph_->template updateGlobalAllocAndValues<ST> (rowInfo,
2138  newNumEntries,
2139  values2D_[lrow]);
2140  }
2141  if (isGloballyIndexed ()) {
2142  // lg=GlobalIndices, I=GlobalIndices means the method calls
2143  // getGlobalViewNonConst() and does direct copying, which
2144  // should be reasonably fast.
2145  myGraph_->template insertIndicesAndValues<ST> (rowInfo, inds_view,
2146  this->getViewNonConst (rowInfo),
2147  vals_view,
2148  GlobalIndices, GlobalIndices);
2149  }
2150  else {
2151  // lg=GlobalIndices, I=LocalIndices means the method calls
2152  // the Map's getLocalElement() method once per entry to
2153  // insert. This may be slow.
2154  myGraph_->template insertIndicesAndValues<ST> (rowInfo, inds_view,
2155  this->getViewNonConst (rowInfo),
2156  vals_view,
2157  GlobalIndices, LocalIndices);
2158  }
2159 #ifdef HAVE_TPETRA_DEBUG
2160  {
2161  const size_t chkNewNumEntries = myGraph_->getNumEntriesInLocalRow (lrow);
2162  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(chkNewNumEntries != newNumEntries,
2163  std::logic_error, ": There should be a total of " << newNumEntries
2164  << " entries in the row, but the graph now reports " << chkNewNumEntries
2165  << " entries. Please report this bug to the Tpetra developers.");
2166  }
2167 #endif // HAVE_TPETRA_DEBUG
2168  }
2169  }
2170  else { // The calling process doesn't own the given row.
2171  insertNonownedGlobalValues (globalRow, indices, values);
2172  }
2173  }
2174 
2175 
2176  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2177  LocalOrdinal
2179  replaceLocalValues (const LocalOrdinal localRow,
2180  const Teuchos::ArrayView<const LocalOrdinal>& lclCols,
2181  const Teuchos::ArrayView<const Scalar>& vals) const
2182  {
2183  using Kokkos::MemoryUnmanaged;
2184  using Kokkos::View;
2185  typedef impl_scalar_type IST;
2186  typedef LocalOrdinal LO;
2187  typedef device_type DD;
2188  typedef typename View<LO*, DD>::HostMirror::device_type HD;
2189  // inputInds and inputVals come from the user, so they are host data.
2190  typedef View<const IST*, HD, MemoryUnmanaged> ISVT; // impl scalar view type
2191  typedef View<const LO*, HD, MemoryUnmanaged> LIVT; // lcl ind view type
2192 
2193  LIVT lclColsIn (lclCols.getRawPtr (), lclCols.size ());
2194  const IST* valsRaw = reinterpret_cast<const IST*> (vals.getRawPtr ());
2195  ISVT valsIn (valsRaw, vals.size ());
2196  return this->template replaceLocalValues<LIVT, ISVT> (localRow,
2197  lclColsIn,
2198  valsIn);
2199  }
2200 
2201  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2202  LocalOrdinal
2204  replaceLocalValues (const LocalOrdinal localRow,
2205  const LocalOrdinal numEnt,
2206  const Scalar inputVals[],
2207  const LocalOrdinal inputCols[]) const
2208  {
2209  using Kokkos::MemoryUnmanaged;
2210  using Kokkos::View;
2211  typedef impl_scalar_type IST;
2212  typedef LocalOrdinal LO;
2213  typedef device_type DD;
2214  typedef typename View<LO*, DD>::HostMirror::device_type HD;
2215  // inputInds and inputVals come from the user, so they are host data.
2216  typedef View<const LO*, HD, MemoryUnmanaged> LIVT; // lcl ind view type
2217  typedef View<const IST*, HD, MemoryUnmanaged> ISVT; // impl scalar view type
2218 
2219  LIVT indsK (inputCols, numEnt);
2220  ISVT valsK (reinterpret_cast<const IST*> (inputVals), numEnt);
2221  return this->template replaceLocalValues<LIVT, ISVT> (localRow,
2222  indsK,
2223  valsK);
2224  }
2225 
2226  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2227  LocalOrdinal
2229  replaceGlobalValues (const GlobalOrdinal globalRow,
2230  const Teuchos::ArrayView<const GlobalOrdinal>& inputInds,
2231  const Teuchos::ArrayView<const Scalar>& inputVals) const
2232  {
2233  using Kokkos::MemoryUnmanaged;
2234  using Kokkos::View;
2235  typedef impl_scalar_type IST;
2236  typedef GlobalOrdinal GO;
2237  typedef device_type DD;
2238  typedef typename View<GO*, DD>::HostMirror::device_type HD;
2239  // inputInds and inputVals come from the user, so they are host data.
2240  typedef View<const GO*, HD, MemoryUnmanaged> GIVT; // gbl ind view type
2241  typedef View<const IST*, HD, MemoryUnmanaged> ISVT; // impl scalar view type
2242 
2243  const IST* inputValsRaw =
2244  reinterpret_cast<const IST*> (inputVals.getRawPtr ());
2245  GIVT indsK (inputInds.getRawPtr (), inputInds.size ());
2246  ISVT valsK (inputValsRaw, inputVals.size ());
2247  return this->template replaceGlobalValues<GIVT, ISVT> (globalRow,
2248  indsK,
2249  valsK);
2250  }
2251 
2252 
2253  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2254  LocalOrdinal
2256  replaceGlobalValues (const GlobalOrdinal globalRow,
2257  const LocalOrdinal numEnt,
2258  const Scalar inputVals[],
2259  const GlobalOrdinal inputCols[]) const
2260  {
2261  using Kokkos::MemoryUnmanaged;
2262  using Kokkos::View;
2263  typedef impl_scalar_type IST;
2264  typedef GlobalOrdinal GO;
2265  typedef device_type DD;
2266  typedef typename View<GO*, DD>::HostMirror::device_type HD;
2267  // inputInds and inputVals come from the user, so they are host data.
2268  typedef View<const GO*, HD, MemoryUnmanaged> GIVT; // gbl ind view type
2269  typedef View<const IST*, HD, MemoryUnmanaged> ISVT; // impl scalar view type
2270 
2271  GIVT indsK (inputCols, numEnt);
2272  ISVT valsK (reinterpret_cast<const IST*> (inputVals), numEnt);
2273  return this->template replaceGlobalValues<GIVT, ISVT> (globalRow,
2274  indsK,
2275  valsK);
2276  }
2277 
2278 
2279  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2280  LocalOrdinal
2282  sumIntoGlobalValues (const GlobalOrdinal gblRow,
2283  const Teuchos::ArrayView<const GlobalOrdinal>& gblInputInds,
2284  const Teuchos::ArrayView<const Scalar>& inputVals,
2285  const bool atomic)
2286  {
2287  using Kokkos::MemoryUnmanaged;
2288  using Kokkos::View;
2289  typedef impl_scalar_type IST;
2290  typedef LocalOrdinal LO;
2291  typedef GlobalOrdinal GO;
2292  typedef device_type DD;
2293  typedef typename View<LO*, DD>::HostMirror::device_type HD;
2294 
2295  if (! this->isFillActive ()) {
2296  // Fill must be active in order to call this method.
2297  return Teuchos::OrdinalTraits<LO>::invalid ();
2298  }
2299 
2300  // mfh 26 Nov 2015: Avoid calling getRowMap() or getCrsGraph(),
2301  // because they touch RCP's reference count, which is not thread
2302  // safe. Dereferencing an RCP or calling op-> does not touch the
2303  // reference count.
2304  const LO lclRow = this->staticGraph_.is_null () ?
2305  this->myGraph_->rowMap_->getLocalElement (gblRow) :
2306  this->staticGraph_->rowMap_->getLocalElement (gblRow);
2307  //const LO lclRow = this->getRowMap ()->getLocalElement (gblRow);
2308 
2309  if (lclRow == Teuchos::OrdinalTraits<LO>::invalid ()) {
2310  // gblRow is not in the row Map, so stash the given entries
2311  // away in a separate data structure. globalAssemble() (called
2312  // during fillComplete()) will exchange that data and sum it in
2313  // using sumIntoGlobalValues().
2314  this->insertNonownedGlobalValues (gblRow, gblInputInds, inputVals);
2315  // FIXME (mfh 08 Jul 2014) It's not clear what to return here,
2316  // since we won't know whether the input indices were valid
2317  // until globalAssemble (called in fillComplete) is called.
2318  // That's why insertNonownedGlobalValues doesn't return
2319  // anything. Just for consistency, I'll return the number of
2320  // entries that the user gave us.
2321  return static_cast<LO> (gblInputInds.size ());
2322  }
2323  else if (this->staticGraph_.is_null ()) {
2324  return Teuchos::OrdinalTraits<LO>::invalid ();
2325  }
2326  else {
2327  const RowInfo rowInfo = this->staticGraph_->getRowInfo (lclRow);
2328  auto curVals = this->getRowViewNonConst (rowInfo);
2329  const IST* inputValsRaw = reinterpret_cast<const IST*> (inputVals.getRawPtr ());
2330  // 'inputVals' and 'gblInputInds' come from the user, so they are host data.
2331  View<const IST*, HD, MemoryUnmanaged> valsIn (inputValsRaw, inputVals.size ());
2332  View<const GO*, HD, MemoryUnmanaged> indsIn (gblInputInds.getRawPtr (),
2333  gblInputInds.size ());
2334  return staticGraph_->template sumIntoGlobalValues<IST, HD, DD> (rowInfo,
2335  curVals,
2336  indsIn,
2337  valsIn,
2338  atomic);
2339  }
2340  }
2341 
2342 
2343  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2344  LocalOrdinal
2346  sumIntoGlobalValues (const GlobalOrdinal gblRow,
2347  const LocalOrdinal numEnt,
2348  const Scalar inputVals[],
2349  const GlobalOrdinal gblInputInds[],
2350  const bool atomic)
2351  {
2352  using Kokkos::MemoryUnmanaged;
2353  using Kokkos::View;
2354  typedef impl_scalar_type IST;
2355  typedef LocalOrdinal LO;
2356  typedef GlobalOrdinal GO;
2357  typedef device_type DD;
2358  typedef typename View<LO*, DD>::HostMirror::device_type HD;
2359 
2360  if (! this->isFillActive ()) {
2361  // Fill must be active in order to call this method.
2362  return Teuchos::OrdinalTraits<LO>::invalid ();
2363  }
2364 
2365  // mfh 26 Nov 2015: Avoid calling getRowMap() or getCrsGraph(),
2366  // because they touch RCP's reference count, which is not thread
2367  // safe. Dereferencing an RCP or calling op-> does not touch the
2368  // reference count.
2369  const LO lclRow = this->staticGraph_.is_null () ?
2370  this->myGraph_->rowMap_->getLocalElement (gblRow) :
2371  this->staticGraph_->rowMap_->getLocalElement (gblRow);
2372  //const LO lclRow = this->getRowMap ()->getLocalElement (gblRow);
2373 
2374  if (lclRow == Teuchos::OrdinalTraits<LO>::invalid ()) {
2375  // mfh 23 Mar 2017: This branch is not thread safe in a debug
2376  // build, in part because it uses Teuchos::ArrayView.
2377  using Teuchos::ArrayView;
2378  ArrayView<const GO> gblInputInds_av (numEnt == 0 ? NULL : gblInputInds, numEnt);
2379  ArrayView<const Scalar> inputVals_av (numEnt == 0 ? NULL : inputVals, numEnt);
2380 
2381  // gblRow is not in the row Map, so stash the given entries away
2382  // in a separate data structure. globalAssemble() (called
2383  // during fillComplete()) will exchange that data and sum it in
2384  // using sumIntoGlobalValues().
2385  this->insertNonownedGlobalValues (gblRow, gblInputInds_av, inputVals_av);
2386  // FIXME (mfh 08 Jul 2014) It's not clear what to return here,
2387  // since we won't know whether the given indices were valid
2388  // until globalAssemble (called in fillComplete) is called.
2389  // That's why insertNonownedGlobalValues doesn't return
2390  // anything. Just for consistency, I'll return the number of
2391  // entries that the user gave us.
2392  return numEnt;
2393  }
2394  else if (this->staticGraph_.is_null ()) {
2395  return Teuchos::OrdinalTraits<LO>::invalid ();
2396  }
2397  else {
2398  const RowInfo rowInfo = this->staticGraph_->getRowInfo (lclRow);
2399  auto curVals = this->getRowViewNonConst (rowInfo);
2400  const IST* inputValsIST = reinterpret_cast<const IST*> (inputVals);
2401  View<const IST*, HD, MemoryUnmanaged> valsIn (numEnt == 0 ? NULL : inputValsIST, numEnt);
2402  View<const GO*, HD, MemoryUnmanaged> indsIn (numEnt == 0 ? NULL : gblInputInds, numEnt);
2403  return staticGraph_->template sumIntoGlobalValues<IST, HD, DD> (rowInfo,
2404  curVals,
2405  indsIn,
2406  valsIn,
2407  atomic);
2408  }
2409  }
2410 
2411 
2412  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2413  LocalOrdinal
2415  sumIntoLocalValues (const LocalOrdinal localRow,
2416  const Teuchos::ArrayView<const LocalOrdinal>& indices,
2417  const Teuchos::ArrayView<const Scalar>& values,
2418  const bool atomic) const
2419  {
2420  using Kokkos::MemoryUnmanaged;
2421  using Kokkos::View;
2422  typedef impl_scalar_type IST;
2423  typedef LocalOrdinal LO;
2424  typedef device_type DD;
2425  typedef typename View<LO*, DD>::HostMirror::device_type HD;
2426  typedef View<const IST*, HD, MemoryUnmanaged> IVT;
2427  typedef View<const LO*, HD, MemoryUnmanaged> IIT;
2428 
2429  const IST* valsRaw = reinterpret_cast<const IST*> (values.getRawPtr ());
2430  IVT valsIn (valsRaw, values.size ());
2431  IIT indsIn (indices.getRawPtr (), indices.size ());
2432  return this->template sumIntoLocalValues<IIT, IVT> (localRow, indsIn,
2433  valsIn, atomic);
2434  }
2435 
2436  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2437  LocalOrdinal
2439  sumIntoLocalValues (const LocalOrdinal localRow,
2440  const LocalOrdinal numEnt,
2441  const Scalar vals[],
2442  const LocalOrdinal cols[],
2443  const bool atomic) const
2444  {
2445  using Kokkos::MemoryUnmanaged;
2446  using Kokkos::View;
2447  typedef impl_scalar_type IST;
2448  typedef LocalOrdinal LO;
2449  typedef device_type DD;
2450  typedef typename View<LO*, DD>::HostMirror::device_type HD;
2451  typedef View<const IST*, HD, MemoryUnmanaged> IVT;
2452  typedef View<const LO*, HD, MemoryUnmanaged> IIT;
2453 
2454  const IST* valsRaw = reinterpret_cast<const IST*> (vals);
2455  IVT valsIn (valsRaw, numEnt);
2456  IIT indsIn (cols, numEnt);
2457  return this->template sumIntoLocalValues<IIT, IVT> (localRow, indsIn,
2458  valsIn, atomic);
2459  }
2460 
2461 
2462  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2463  Teuchos::ArrayView<const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic>::impl_scalar_type>
2465  getView (RowInfo rowinfo) const
2466  {
2467  using Kokkos::MemoryUnmanaged;
2468  using Kokkos::View;
2469  using Teuchos::ArrayView;
2470  typedef impl_scalar_type ST;
2471  typedef std::pair<size_t, size_t> range_type;
2472 
2473  if (k_values1D_.dimension_0 () != 0 && rowinfo.allocSize > 0) {
2474 #ifdef HAVE_TPETRA_DEBUG
2475  TEUCHOS_TEST_FOR_EXCEPTION(
2476  rowinfo.offset1D + rowinfo.allocSize > k_values1D_.dimension_0 (),
2477  std::range_error, "Tpetra::CrsMatrix::getView: Invalid access "
2478  "to 1-D storage of values." << std::endl << "rowinfo.offset1D (" <<
2479  rowinfo.offset1D << ") + rowinfo.allocSize (" << rowinfo.allocSize <<
2480  ") > k_values1D_.dimension_0() (" << k_values1D_.dimension_0 () << ").");
2481 #endif // HAVE_TPETRA_DEBUG
2482  range_type range (rowinfo.offset1D, rowinfo.offset1D + rowinfo.allocSize);
2483  typedef View<const ST*, execution_space, MemoryUnmanaged> subview_type;
2484  // mfh 23 Nov 2015: Don't just create a subview of k_values1D_
2485  // directly, because that first creates a _managed_ subview,
2486  // then returns an unmanaged version of that. That touches the
2487  // reference count, which costs performance in a measurable way.
2488  // Instead, we create a temporary unmanaged view, then create
2489  // the subview from that.
2490  subview_type sv = Kokkos::subview (subview_type (k_values1D_), range);
2491  const ST* const sv_raw = (rowinfo.allocSize == 0) ? NULL : sv.ptr_on_device ();
2492  return ArrayView<const ST> (sv_raw, rowinfo.allocSize);
2493  }
2494  else if (values2D_ != Teuchos::null) {
2495  return values2D_[rowinfo.localRow] ();
2496  }
2497  else {
2498  return ArrayView<impl_scalar_type> ();
2499  }
2500  }
2501 
2502 
2503  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2504  LocalOrdinal
2507  LocalOrdinal& numEnt,
2508  const RowInfo& rowinfo) const
2509  {
2510  if (k_values1D_.dimension_0 () != 0 && rowinfo.allocSize > 0) {
2511 #ifdef HAVE_TPETRA_DEBUG
2512  if (rowinfo.offset1D + rowinfo.allocSize > k_values1D_.dimension_0 ()) {
2513  vals = NULL;
2514  numEnt = 0;
2515  return Teuchos::OrdinalTraits<LocalOrdinal>::invalid ();
2516  }
2517 #endif // HAVE_TPETRA_DEBUG
2518  vals = k_values1D_.ptr_on_device () + rowinfo.offset1D;
2519  numEnt = rowinfo.allocSize;
2520  }
2521  else if (! values2D_.is_null ()) {
2522 #ifdef HAVE_TPETRA_DEBUG
2523  if (rowinfo.localRow >= static_cast<size_t> (values2D_.size ())) {
2524  vals = NULL;
2525  numEnt = 0;
2526  return Teuchos::OrdinalTraits<LocalOrdinal>::invalid ();
2527  }
2528 #endif // HAVE_TPETRA_DEBUG
2529  // Use const reference so that we don't update ArrayRCP's
2530  // reference count, which is not thread safe.
2531  const auto& curRow = values2D_[rowinfo.localRow];
2532  vals = curRow.getRawPtr ();
2533  numEnt = curRow.size ();
2534  }
2535  else {
2536  vals = NULL;
2537  numEnt = 0;
2538  }
2539 
2540  return static_cast<LocalOrdinal> (0);
2541  }
2542 
2543  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2544  LocalOrdinal
2547  LocalOrdinal& numEnt,
2548  const RowInfo& rowinfo) const
2549  {
2550  const impl_scalar_type* valsConst;
2551  const LocalOrdinal err = this->getViewRawConst (valsConst, numEnt, rowinfo);
2552  vals = const_cast<impl_scalar_type*> (valsConst);
2553  return err;
2554  }
2555 
2556  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2557  Kokkos::View<const typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic>::impl_scalar_type*,
2559  Kokkos::MemoryUnmanaged>
2561  getRowView (const RowInfo& rowInfo) const
2562  {
2563  using Kokkos::MemoryUnmanaged;
2564  using Kokkos::View;
2565  typedef impl_scalar_type ST;
2566  typedef View<const ST*, execution_space, MemoryUnmanaged> subview_type;
2567  typedef std::pair<size_t, size_t> range_type;
2568 
2569  if (k_values1D_.dimension_0 () != 0 && rowInfo.allocSize > 0) {
2570 #ifdef HAVE_TPETRA_DEBUG
2571  TEUCHOS_TEST_FOR_EXCEPTION
2572  (rowInfo.offset1D + rowInfo.allocSize > this->k_values1D_.dimension_0 (),
2573  std::range_error, "Tpetra::CrsMatrix::getRowView: Invalid access "
2574  "to 1-D storage of values. rowInfo.offset1D ("
2575  << rowInfo.offset1D << ") + rowInfo.allocSize (" << rowInfo.allocSize
2576  << ") > this->k_values1D_.dimension_0() ("
2577  << this->k_values1D_.dimension_0 () << ").");
2578 #endif // HAVE_TPETRA_DEBUG
2579  range_type range (rowInfo.offset1D, rowInfo.offset1D + rowInfo.allocSize);
2580  // mfh 23 Nov 2015: Don't just create a subview of k_values1D_
2581  // directly, because that first creates a _managed_ subview,
2582  // then returns an unmanaged version of that. That touches the
2583  // reference count, which costs performance in a measurable way.
2584  // Instead, we create a temporary unmanaged view, then create
2585  // the subview from that.
2586  return Kokkos::subview (subview_type (this->k_values1D_), range);
2587  }
2588  else if (this->values2D_ != Teuchos::null) {
2589  // Use a reference, so that I don't touch the Teuchos::ArrayView
2590  // reference count in a debug build. (It has no reference count
2591  // in a release build.) This ensures thread safety.
2592  auto& rowView = this->values2D_[rowInfo.localRow];
2593  return subview_type (rowView.getRawPtr (), rowView.size ());
2594  }
2595  else {
2596  return subview_type ();
2597  }
2598  }
2599 
2600  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2601  Kokkos::View<typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic>::impl_scalar_type*,
2602  typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic>::execution_space,
2603  Kokkos::MemoryUnmanaged>
2605  getRowViewNonConst (const RowInfo& rowInfo) const
2606  {
2607  using Kokkos::MemoryUnmanaged;
2608  using Kokkos::View;
2609  typedef impl_scalar_type ST;
2610  typedef View<ST*, execution_space, MemoryUnmanaged> subview_type;
2611  typedef std::pair<size_t, size_t> range_type;
2612 
2613  if (k_values1D_.dimension_0 () != 0 && rowInfo.allocSize > 0) {
2614 #ifdef HAVE_TPETRA_DEBUG
2615  TEUCHOS_TEST_FOR_EXCEPTION
2616  (rowInfo.offset1D + rowInfo.allocSize > this->k_values1D_.dimension_0 (),
2617  std::range_error, "Tpetra::CrsMatrix::getRowViewNonConst: Invalid "
2618  "access to 1-D storage of values. rowInfo.offset1D ("
2619  << rowInfo.offset1D << ") + rowInfo.allocSize (" << rowInfo.allocSize
2620  << ") > this->k_values1D_.dimension_0() ("
2621  << this->k_values1D_.dimension_0 () << ").");
2622 #endif // HAVE_TPETRA_DEBUG
2623  range_type range (rowInfo.offset1D, rowInfo.offset1D + rowInfo.allocSize);
2624  // mfh 23 Nov 2015: Don't just create a subview of k_values1D_
2625  // directly, because that first creates a _managed_ subview,
2626  // then returns an unmanaged version of that. That touches the
2627  // reference count, which costs performance in a measurable way.
2628  // Instead, we create a temporary unmanaged view, then create
2629  // the subview from that.
2630  return Kokkos::subview (subview_type (this->k_values1D_), range);
2631  }
2632  else if (this->values2D_ != Teuchos::null) {
2633  // Use a reference, so that I don't touch the Teuchos::ArrayView
2634  // reference count in a debug build. (It has no reference count
2635  // in a release build.) This ensures thread safety.
2636  auto& rowView = this->values2D_[rowInfo.localRow];
2637  return subview_type (rowView.getRawPtr (), rowView.size ());
2638  }
2639  else {
2640  return subview_type ();
2641  }
2642  }
2643 
2644  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2645  Teuchos::ArrayView<typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic>::impl_scalar_type>
2647  getViewNonConst (const RowInfo& rowinfo) const
2648  {
2649  return Teuchos::av_const_cast<impl_scalar_type> (this->getView (rowinfo));
2650  }
2651 
2652  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2653  void
2655  getLocalRowCopy (LocalOrdinal localRow,
2656  const Teuchos::ArrayView<LocalOrdinal>& indices,
2657  const Teuchos::ArrayView<Scalar>& values,
2658  size_t& numEntries) const
2659  {
2660  using Teuchos::ArrayView;
2661  using Teuchos::av_reinterpret_cast;
2662  const char tfecfFuncName[] = "getLocalRowCopy: ";
2663 
2664  TEUCHOS_TEST_FOR_EXCEPTION(
2665  isGloballyIndexed () && ! hasColMap (), std::runtime_error,
2666  "Tpetra::CrsMatrix::getLocalRowCopy: The matrix is globally indexed and "
2667  "does not have a column Map yet. That means we don't have local indices "
2668  "for columns yet, so it doesn't make sense to call this method. If the "
2669  "matrix doesn't have a column Map yet, you should call fillComplete on "
2670  "it first.");
2671 #ifdef HAVE_TPETRA_DEBUG
2672  TEUCHOS_TEST_FOR_EXCEPTION(
2673  ! staticGraph_->hasRowInfo (), std::runtime_error,
2674  "Tpetra::CrsMatrix::getLocalRowCopy: The graph's row information was "
2675  "deleted at fillComplete().");
2676 #endif // HAVE_TPETRA_DEBUG
2677 
2678  const RowInfo rowinfo = staticGraph_->getRowInfo (localRow);
2679  const size_t theNumEntries = rowinfo.numEntries;
2680  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2681  (static_cast<size_t> (indices.size ()) < theNumEntries ||
2682  static_cast<size_t> (values.size ()) < theNumEntries,
2683  std::runtime_error, "Row with local index " << localRow << " has " <<
2684  theNumEntries << " entry/ies, but indices.size() = " <<
2685  indices.size () << " and values.size() = " << values.size () << ".");
2686  numEntries = theNumEntries; // first side effect
2687 
2688  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2689  if (staticGraph_->isLocallyIndexed ()) {
2690  const LocalOrdinal* curLclInds;
2691  const impl_scalar_type* curVals;
2692  LocalOrdinal numSpots; // includes both current entries and extra space
2693 
2694  // If we got this far, rowinfo should be correct and should
2695  // refer to a valid local row. Thus, these error checks are
2696  // superfluous, but we retain them in a debug build.
2697 #ifdef HAVE_TPETRA_DEBUG
2698  int err =
2699  staticGraph_->getLocalViewRawConst (curLclInds, numSpots, rowinfo);
2700  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2701  (err != static_cast<LocalOrdinal> (0), std::logic_error,
2702  "staticGraph_->getLocalViewRawConst returned nonzero error code "
2703  << err << ".");
2704  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2705  (static_cast<size_t> (numSpots) < theNumEntries, std::logic_error,
2706  "numSpots = " << numSpots << " < theNumEntries = " << theNumEntries
2707  << ".");
2708  const LocalOrdinal numSpotsBefore = numSpots;
2709  err = getViewRawConst (curVals, numSpots, rowinfo);
2710  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2711  (err != static_cast<LocalOrdinal> (0), std::logic_error,
2712  "getViewRaw returned nonzero error code " << err << ".");
2713  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2714  (numSpotsBefore != numSpots, std::logic_error,
2715  "numSpotsBefore = " << numSpotsBefore << " != numSpots = "
2716  << numSpots << ".");
2717 #else
2718  (void) staticGraph_->getLocalViewRawConst (curLclInds, numSpots, rowinfo);
2719  (void) getViewRawConst (curVals, numSpots, rowinfo);
2720 #endif // HAVE_TPETRA_DEBUG
2721 
2722  for (size_t j = 0; j < theNumEntries; ++j) {
2723  values[j] = curVals[j];
2724  indices[j] = curLclInds[j];
2725  }
2726  }
2727  else if (staticGraph_->isGloballyIndexed ()) {
2728  const map_type& colMap = * (staticGraph_->colMap_);
2729  const GlobalOrdinal* curGblInds;
2730  const impl_scalar_type* curVals;
2731  LocalOrdinal numSpots; // includes both current entries and extra space
2732 
2733  // If we got this far, rowinfo should be correct and should
2734  // refer to a valid local row. Thus, these error checks are
2735  // superfluous, but we retain them in a debug build.
2736 #ifdef HAVE_TPETRA_DEBUG
2737  int err =
2738  staticGraph_->getGlobalViewRawConst (curGblInds, numSpots, rowinfo);
2739  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2740  (err != static_cast<LocalOrdinal> (0), std::logic_error,
2741  "staticGraph_->getGlobalViewRawConst returned nonzero error code "
2742  << err << ".");
2743  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2744  (static_cast<size_t> (numSpots) < theNumEntries, std::logic_error,
2745  "numSpots = " << numSpots << " < theNumEntries = " << theNumEntries
2746  << ".");
2747  const LocalOrdinal numSpotsBefore = numSpots;
2748  err = getViewRawConst (curVals, numSpots, rowinfo);
2749  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2750  (err != static_cast<LocalOrdinal> (0), std::logic_error,
2751  "getViewRawConst returned nonzero error code " << err << ".");
2752  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2753  (numSpotsBefore != numSpots, std::logic_error,
2754  "numSpotsBefore = " << numSpotsBefore << " != numSpots = "
2755  << numSpots << ".");
2756 #else
2757  (void) staticGraph_->getGlobalViewRawConst (curGblInds, numSpots, rowinfo);
2758  (void) getViewRawConst (curVals, numSpots, rowinfo);
2759 #endif //HAVE_TPETRA_DEBUG
2760 
2761  for (size_t j = 0; j < theNumEntries; ++j) {
2762  values[j] = curVals[j];
2763  indices[j] = colMap.getLocalElement (curGblInds[j]);
2764  }
2765  }
2766  }
2767  }
2768 
2769  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2770  void
2772  getGlobalRowCopy (GlobalOrdinal globalRow,
2773  const Teuchos::ArrayView<GlobalOrdinal>& indices,
2774  const Teuchos::ArrayView<Scalar>& values,
2775  size_t& numEntries) const
2776  {
2777  using Teuchos::ArrayView;
2778  using Teuchos::av_reinterpret_cast;
2779  const char tfecfFuncName[] = "getGlobalRowCopy: ";
2780 
2781  const RowInfo rowinfo =
2782  staticGraph_->getRowInfoFromGlobalRowIndex (globalRow);
2783  const size_t theNumEntries = rowinfo.numEntries;
2784  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2785  static_cast<size_t> (indices.size ()) < theNumEntries ||
2786  static_cast<size_t> (values.size ()) < theNumEntries,
2787  std::runtime_error, "Row with global index " << globalRow << " has "
2788  << theNumEntries << " entry/ies, but indices.size() = " <<
2789  indices.size () << " and values.size() = " << values.size () << ".");
2790  numEntries = theNumEntries; // first side effect
2791 
2792  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2793  if (staticGraph_->isLocallyIndexed ()) {
2794  const map_type& colMap = * (staticGraph_->colMap_);
2795  const LocalOrdinal* curLclInds;
2796  const impl_scalar_type* curVals;
2797  LocalOrdinal numSpots; // includes both current entries and extra space
2798 
2799  // If we got this far, rowinfo should be correct and should
2800  // refer to a valid local row. Thus, these error checks are
2801  // superfluous, but we retain them in a debug build.
2802 #ifdef HAVE_TPETRA_DEBUG
2803  int err =
2804  staticGraph_->getLocalViewRawConst (curLclInds, numSpots, rowinfo);
2805  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2806  (err != static_cast<LocalOrdinal> (0), std::logic_error,
2807  "staticGraph_->getLocalViewRawConst returned nonzero error code "
2808  << err << ".");
2809  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2810  (static_cast<size_t> (numSpots) < theNumEntries, std::logic_error,
2811  "numSpots = " << numSpots << " < theNumEntries = " << theNumEntries
2812  << ".");
2813  const LocalOrdinal numSpotsBefore = numSpots;
2814  err = getViewRawConst (curVals, numSpots, rowinfo);
2815  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2816  (err != static_cast<LocalOrdinal> (0), std::logic_error,
2817  "getViewRaw returned nonzero error code " << err << ".");
2818  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2819  (numSpotsBefore != numSpots, std::logic_error,
2820  "numSpotsBefore = " << numSpotsBefore << " != numSpots = "
2821  << numSpots << ".");
2822 #else
2823  (void) staticGraph_->getLocalViewRawConst (curLclInds, numSpots, rowinfo);
2824  (void) getViewRawConst (curVals, numSpots, rowinfo);
2825 #endif //HAVE_TPETRA_DEBUG
2826 
2827  for (size_t j = 0; j < theNumEntries; ++j) {
2828  values[j] = curVals[j];
2829  indices[j] = colMap.getGlobalElement (curLclInds[j]);
2830  }
2831  }
2832  else if (staticGraph_->isGloballyIndexed ()) {
2833  const GlobalOrdinal* curGblInds;
2834  const impl_scalar_type* curVals;
2835  LocalOrdinal numSpots; // includes both current entries and extra space
2836 
2837  // If we got this far, rowinfo should be correct and should
2838  // refer to a valid local row. Thus, these error checks are
2839  // superfluous, but we retain them in a debug build.
2840 #ifdef HAVE_TPETRA_DEBUG
2841  int err =
2842  staticGraph_->getGlobalViewRawConst (curGblInds, numSpots, rowinfo);
2843  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2844  (err != static_cast<LocalOrdinal> (0), std::logic_error,
2845  "staticGraph_->getGlobalViewRawConst returned nonzero error code "
2846  << err << ".");
2847  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2848  (static_cast<size_t> (numSpots) < theNumEntries, std::logic_error,
2849  "numSpots = " << numSpots << " < theNumEntries = " << theNumEntries
2850  << ".");
2851  const LocalOrdinal numSpotsBefore = numSpots;
2852  err = getViewRawConst (curVals, numSpots, rowinfo);
2853  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2854  (err != static_cast<LocalOrdinal> (0), std::logic_error,
2855  "getViewRawConst returned nonzero error code " << err << ".");
2856  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2857  (numSpotsBefore != numSpots, std::logic_error,
2858  "numSpotsBefore = " << numSpotsBefore << " != numSpots = "
2859  << numSpots << ".");
2860 #else
2861  (void) staticGraph_->getGlobalViewRawConst (curGblInds, numSpots, rowinfo);
2862  (void) getViewRawConst (curVals, numSpots, rowinfo);
2863 #endif //HAVE_TPETRA_DEBUG
2864 
2865  for (size_t j = 0; j < theNumEntries; ++j) {
2866  values[j] = curVals[j];
2867  indices[j] = curGblInds[j];
2868  }
2869  }
2870  }
2871  }
2872 
2873  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2874  void
2876  getLocalRowView (LocalOrdinal localRow,
2877  Teuchos::ArrayView<const LocalOrdinal>& indices,
2878  Teuchos::ArrayView<const Scalar>& values) const
2879  {
2880  using Teuchos::ArrayView;
2881  using Teuchos::av_reinterpret_cast;
2882  typedef LocalOrdinal LO;
2883  const char tfecfFuncName[] = "getLocalRowView: ";
2884 
2885  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2886  isGloballyIndexed (), std::runtime_error, "The matrix currently stores "
2887  "its indices as global indices, so you cannot get a view with local "
2888  "column indices. If the matrix has a column Map, you may call "
2889  "getLocalRowCopy() to get local column indices; otherwise, you may get "
2890  "a view with global column indices by calling getGlobalRowCopy().");
2891  indices = Teuchos::null;
2892  values = Teuchos::null;
2893  const RowInfo rowinfo = staticGraph_->getRowInfo (localRow);
2894  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2895  rowinfo.numEntries > 0) {
2896  ArrayView<const LO> indTmp = staticGraph_->getLocalView (rowinfo);
2897  ArrayView<const Scalar> valTmp =
2898  av_reinterpret_cast<const Scalar> (this->getView (rowinfo));
2899  indices = indTmp (0, rowinfo.numEntries);
2900  values = valTmp (0, rowinfo.numEntries);
2901  }
2902 
2903 #ifdef HAVE_TPETRA_DEBUG
2904  const char suffix[] = ". This should never happen. Please report this "
2905  "bug to the Tpetra developers.";
2906  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2907  (static_cast<size_t> (indices.size ()) !=
2908  static_cast<size_t> (values.size ()), std::logic_error,
2909  "At the end of this method, for local row " << localRow << ", "
2910  "indices.size() = " << indices.size () << " != values.size () = "
2911  << values.size () << suffix);
2912  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2913  (static_cast<size_t> (indices.size ()) !=
2914  static_cast<size_t> (rowinfo.numEntries), std::logic_error,
2915  "At the end of this method, for local row " << localRow << ", "
2916  "indices.size() = " << indices.size () << " != rowinfo.numEntries = "
2917  << rowinfo.numEntries << suffix);
2918  const size_t expectedNumEntries = getNumEntriesInLocalRow (localRow);
2919  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2920  (rowinfo.numEntries != expectedNumEntries, std::logic_error, "At the end "
2921  "of this method, for local row " << localRow << ", rowinfo.numEntries = "
2922  << rowinfo.numEntries << " != getNumEntriesInLocalRow(localRow) = " <<
2923  expectedNumEntries << suffix);
2924 #endif // HAVE_TPETRA_DEBUG
2925  }
2926 
2927  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2928  LocalOrdinal
2930  getLocalRowView (const LocalOrdinal lclRow,
2931  LocalOrdinal& numEnt,
2932  const impl_scalar_type*& val,
2933  const LocalOrdinal*& ind) const
2934  {
2935  typedef LocalOrdinal LO;
2936 
2937  // Don't call getCrsGraph(), because that modfies an RCP reference
2938  // count, which is not thread safe. Checking whether an RCP is
2939  // null does NOT modify its reference count, and is therefore
2940  // thread safe. Note that isGloballyIndexed() calls
2941  // getCrsGraph(), so we have to go to the graph directly.
2942  if (staticGraph_.is_null () || staticGraph_->isGloballyIndexed ()) {
2943  return Tpetra::Details::OrdinalTraits<LO>::invalid ();
2944  }
2945  else {
2946  const RowInfo rowInfo = staticGraph_->getRowInfo (lclRow);
2947  if (rowInfo.localRow == Tpetra::Details::OrdinalTraits<size_t>::invalid ()) {
2948  numEnt = 0; // no valid entries in this row on the calling process
2949  val = NULL;
2950  ind = NULL;
2951  // First argument (lclRow) invalid, so make 1 the error code.
2952  return static_cast<LO> (1);
2953  }
2954  else {
2955  numEnt = static_cast<LO> (rowInfo.numEntries);
2956  auto lclColInds = staticGraph_->getLocalKokkosRowView (rowInfo);
2957  ind = lclColInds.ptr_on_device (); // FIXME (mfh 18 Jul 2016) UVM
2958  const LO err = this->getViewRawConst (val, numEnt, rowInfo);
2959  return err;
2960  }
2961  }
2962  }
2963 
2964  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2965  LocalOrdinal
2967  getLocalRowViewRaw (const LocalOrdinal lclRow,
2968  LocalOrdinal& numEnt,
2969  const LocalOrdinal*& lclColInds,
2970  const Scalar*& vals) const
2971  {
2972  const impl_scalar_type* vals_ist = NULL;
2973  const LocalOrdinal errCode =
2974  this->getLocalRowView (lclRow, numEnt, vals_ist, lclColInds);
2975  vals = reinterpret_cast<const Scalar*> (vals_ist);
2976  return errCode;
2977  }
2978 
2979  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
2980  void
2982  getGlobalRowView (GlobalOrdinal globalRow,
2983  Teuchos::ArrayView<const GlobalOrdinal>& indices,
2984  Teuchos::ArrayView<const Scalar>& values) const
2985  {
2986  using Teuchos::ArrayView;
2987  using Teuchos::av_reinterpret_cast;
2988  typedef GlobalOrdinal GO;
2989  const char tfecfFuncName[] = "getGlobalRowView: ";
2990 
2991  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2992  isLocallyIndexed (), std::runtime_error,
2993  "The matrix is locally indexed, so we cannot return a view of the row "
2994  "with global column indices. Use getGlobalRowCopy() instead.");
2995  indices = Teuchos::null;
2996  values = Teuchos::null;
2997  const RowInfo rowinfo =
2998  staticGraph_->getRowInfoFromGlobalRowIndex (globalRow);
2999  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3000  rowinfo.numEntries > 0) {
3001  ArrayView<const GO> indTmp = staticGraph_->getGlobalView (rowinfo);
3002  ArrayView<const Scalar> valTmp =
3003  av_reinterpret_cast<const Scalar> (this->getView (rowinfo));
3004  indices = indTmp (0, rowinfo.numEntries);
3005  values = valTmp (0, rowinfo.numEntries);
3006  }
3007 
3008 #ifdef HAVE_TPETRA_DEBUG
3009  const char suffix[] = ". This should never happen. Please report this "
3010  "bug to the Tpetra developers.";
3011  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3012  (static_cast<size_t> (indices.size ()) !=
3013  static_cast<size_t> (values.size ()), std::logic_error,
3014  "At the end of this method, for global row " << globalRow << ", "
3015  "indices.size() = " << indices.size () << " != values.size () = "
3016  << values.size () << suffix);
3017  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3018  (static_cast<size_t> (indices.size ()) !=
3019  static_cast<size_t> (rowinfo.numEntries), std::logic_error,
3020  "At the end of this method, for global row " << globalRow << ", "
3021  "indices.size() = " << indices.size () << " != rowinfo.numEntries = "
3022  << rowinfo.numEntries << suffix);
3023  const size_t expectedNumEntries = getNumEntriesInGlobalRow (globalRow);
3024  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3025  (rowinfo.numEntries != expectedNumEntries, std::logic_error, "At the end "
3026  "of this method, for global row " << globalRow << ", rowinfo.numEntries "
3027  "= " << rowinfo.numEntries << " != getNumEntriesInGlobalRow(globalRow) ="
3028  " " << expectedNumEntries << suffix);
3029 #endif // HAVE_TPETRA_DEBUG
3030  }
3031 
3032  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3033  void
3035  scale (const Scalar& alpha)
3036  {
3037  typedef LocalOrdinal LO;
3038  typedef typename Teuchos::Array<Scalar>::size_type size_type;
3039  const char tfecfFuncName[] = "scale: ";
3040  const impl_scalar_type theAlpha = static_cast<impl_scalar_type> (alpha);
3041 
3042  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3043  ! isFillActive (), std::runtime_error,
3044  "Fill must be active before you may call this method. "
3045  "Please call resumeFill() to make fill active.");
3046 
3047  const size_t nlrs = staticGraph_->getNodeNumRows ();
3048  const size_t numEntries = staticGraph_->getNodeNumEntries ();
3049  if (! staticGraph_->indicesAreAllocated () ||
3050  nlrs == 0 || numEntries == 0) {
3051  // do nothing
3052  }
3053  else {
3054  if (staticGraph_->getProfileType () == StaticProfile) {
3055  const LO lclNumRows = lclMatrix_.numRows ();
3056  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
3057  auto row_i = lclMatrix_.row (lclRow);
3058  for (LO k = 0; k < row_i.length; ++k) {
3059  // FIXME (mfh 02 Jan 2015) This assumes CUDA UVM.
3060  row_i.value (k) *= theAlpha;
3061  }
3062  }
3063  }
3064  else if (staticGraph_->getProfileType () == DynamicProfile) {
3065  for (size_t row = 0; row < nlrs; ++row) {
3066  const size_type numEnt = getNumEntriesInLocalRow (row);
3067  Teuchos::ArrayView<impl_scalar_type> rowVals = values2D_[row] ();
3068  for (size_type k = 0; k < numEnt; ++k) {
3069  rowVals[k] *= theAlpha;
3070  }
3071  }
3072  }
3073  }
3074  }
3075 
3076  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3077  void
3079  setAllToScalar (const Scalar& alpha)
3080  {
3081  const char tfecfFuncName[] = "setAllToScalar: ";
3082  const impl_scalar_type theAlpha = static_cast<impl_scalar_type> (alpha);
3083  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3084  ! isFillActive (), std::runtime_error,
3085  "Fill must be active before you may call this method. "
3086  "Please call resumeFill() to make fill active.");
3087 
3088  // replace all values in the matrix
3089  // it is easiest to replace all allocated values, instead of replacing only the ones with valid entries
3090  // however, if there are no valid entries, we can short-circuit
3091  // furthermore, if the values aren't allocated, we can short-circuit (no entry have been inserted so far)
3092  const size_t nlrs = staticGraph_->getNodeNumRows();
3093  const size_t numEntries = staticGraph_->getNodeNumEntries();
3094  if (! staticGraph_->indicesAreAllocated () || numEntries == 0) {
3095  // do nothing
3096  }
3097  else {
3098  const ProfileType profType = staticGraph_->getProfileType ();
3099  if (profType == StaticProfile) {
3100  // FIXME (mfh 24 Dec 2014) Once CrsMatrix implements DualView
3101  // semantics, this would be the place to mark memory as
3102  // modified.
3103  Kokkos::deep_copy (k_values1D_, theAlpha);
3104  }
3105  else if (profType == DynamicProfile) {
3106  for (size_t row = 0; row < nlrs; ++row) {
3107  std::fill (values2D_[row].begin (), values2D_[row].end (), theAlpha);
3108  }
3109  }
3110  }
3111  }
3112 
3113  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3114  void
3116  setAllValues (const typename local_matrix_type::row_map_type& rowPointers,
3117  const typename local_graph_type::entries_type::non_const_type& columnIndices,
3118  const typename local_matrix_type::values_type& values)
3119  {
3120  const char tfecfFuncName[] = "setAllValues: ";
3121  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3122  (columnIndices.size () != values.size (), std::invalid_argument,
3123  "columnIndices.size() = " << columnIndices.size () << " != values.size()"
3124  " = " << values.size () << ".");
3125  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3126  (myGraph_.is_null (), std::runtime_error, "myGraph_ must not be null.");
3127 
3128  try {
3129  myGraph_->setAllIndices (rowPointers, columnIndices);
3130  }
3131  catch (std::exception &e) {
3132  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3133  (true, std::runtime_error, "myGraph_->setAllIndices() threw an "
3134  "exception: " << e.what ());
3135  }
3136  // Make sure that myGraph_ now has a local graph. It may not be
3137  // fillComplete yet, so it's important to check. We don't care
3138  // whether setAllIndices() did a shallow copy or a deep copy, so a
3139  // good way to check is to compare dimensions.
3140  auto lclGraph = myGraph_->getLocalGraph ();
3141  const size_t numEnt = lclGraph.entries.dimension_0 ();
3142  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3143  (lclGraph.row_map.dimension_0 () != rowPointers.dimension_0 () ||
3144  numEnt != static_cast<size_t> (columnIndices.dimension_0 ()),
3145  std::logic_error, "myGraph_->setAllIndices() did not correctly create "
3146  "local graph. Please report this bug to the Tpetra developers.");
3147 
3148  const size_t numCols = myGraph_->getColMap ()->getNodeNumElements ();
3149  this->lclMatrix_ = local_matrix_type ("Tpetra::CrsMatrix::lclMatrix_",
3150  numCols, values, lclGraph);
3151  // FIXME (22 Jun 2016) I would very much like to get rid of
3152  // k_values1D_ at some point. I find it confusing to have all
3153  // these extra references lying around.
3154  this->k_values1D_ = this->lclMatrix_.values;
3155 
3156  checkInternalState ();
3157  }
3158 
3159  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3160  void
3162  setAllValues (const Teuchos::ArrayRCP<size_t>& ptr,
3163  const Teuchos::ArrayRCP<LocalOrdinal>& ind,
3164  const Teuchos::ArrayRCP<Scalar>& val)
3165  {
3166  using Kokkos::Compat::getKokkosViewDeepCopy;
3167  using Teuchos::ArrayRCP;
3168  using Teuchos::av_reinterpret_cast;
3169  typedef device_type DT;
3170  typedef impl_scalar_type IST;
3171  typedef typename local_matrix_type::row_map_type row_map_type;
3172  //typedef typename row_map_type::non_const_value_type row_offset_type;
3173  const char tfecfFuncName[] = "setAllValues(ArrayRCP<size_t>, ArrayRCP<LO>, ArrayRCP<Scalar>): ";
3174 
3175  // The row offset type may depend on the execution space. It may
3176  // not necessarily be size_t. If it's not, we need to make a deep
3177  // copy. We need to make a deep copy anyway so that Kokkos can
3178  // own the memory. Regardless, ptrIn gets the copy.
3179  typename row_map_type::non_const_type ptrNative ("ptr", ptr.size ());
3180  Kokkos::View<const size_t*,
3181  typename row_map_type::array_layout,
3182  Kokkos::HostSpace,
3183  Kokkos::MemoryUnmanaged> ptrSizeT (ptr.getRawPtr (), ptr.size ());
3184  ::Tpetra::Details::copyOffsets (ptrNative, ptrSizeT);
3185 
3186  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3187  (ptrNative.dimension_0 () != ptrSizeT.dimension_0 (),
3188  std::logic_error, "ptrNative.dimension_0() = " <<
3189  ptrNative.dimension_0 () << " != ptrSizeT.dimension_0() = "
3190  << ptrSizeT.dimension_0 () << ". Please report this bug to the "
3191  "Tpetra developers.");
3192 
3193  auto indIn = getKokkosViewDeepCopy<DT> (ind ());
3194  auto valIn = getKokkosViewDeepCopy<DT> (av_reinterpret_cast<IST> (val ()));
3195  this->setAllValues (ptrNative, indIn, valIn);
3196  }
3197 
3198  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3199  void
3201  getLocalDiagOffsets (Teuchos::ArrayRCP<size_t>& offsets) const
3202  {
3203  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
3204  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3205  (staticGraph_.is_null (), std::runtime_error, "The matrix has no graph.");
3206 
3207  // mfh 11 May 2016: We plan to deprecate the ArrayRCP version of
3208  // this method in CrsGraph too, so don't call it (otherwise build
3209  // warnings will show up and annoy users). Instead, copy results
3210  // in and out, if the memory space requires it.
3211 
3212  const size_t lclNumRows = staticGraph_->getNodeNumRows ();
3213  if (static_cast<size_t> (offsets.size ()) < lclNumRows) {
3214  offsets.resize (lclNumRows);
3215  }
3216 
3217  // The input ArrayRCP must always be a host pointer. Thus, if
3218  // device_type::memory_space is Kokkos::HostSpace, it's OK for us
3219  // to write to that allocation directly as a Kokkos::View.
3220  typedef typename device_type::memory_space memory_space;
3221  if (std::is_same<memory_space, Kokkos::HostSpace>::value) {
3222  // It is always syntactically correct to assign a raw host
3223  // pointer to a device View, so this code will compile correctly
3224  // even if this branch never runs.
3225  typedef Kokkos::View<size_t*, device_type,
3226  Kokkos::MemoryUnmanaged> output_type;
3227  output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
3228  staticGraph_->getLocalDiagOffsets (offsetsOut);
3229  }
3230  else {
3231  Kokkos::View<size_t*, device_type> offsetsTmp ("diagOffsets", lclNumRows);
3232  staticGraph_->getLocalDiagOffsets (offsetsTmp);
3233  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
3234  Kokkos::MemoryUnmanaged> output_type;
3235  output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
3236  Kokkos::deep_copy (offsetsOut, offsetsTmp);
3237  }
3238  }
3239 
3240  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3241  void
3244  {
3245  using Teuchos::ArrayRCP;
3246  using Teuchos::ArrayView;
3247  using Teuchos::av_reinterpret_cast;
3248  const char tfecfFuncName[] = "getLocalDiagCopy (1-arg): ";
3249  typedef local_ordinal_type LO;
3250 
3251 
3252  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3253  staticGraph_.is_null (), std::runtime_error,
3254  "This method requires that the matrix have a graph.");
3255  auto rowMapPtr = this->getRowMap ();
3256  if (rowMapPtr.is_null () || rowMapPtr->getComm ().is_null ()) {
3257  // Processes on which the row Map or its communicator is null
3258  // don't participate. Users shouldn't even call this method on
3259  // those processes.
3260  return;
3261  }
3262  auto colMapPtr = this->getColMap ();
3263  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3264  (! this->hasColMap () || colMapPtr.is_null (), std::runtime_error,
3265  "This method requires that the matrix have a column Map.");
3266  const map_type& rowMap = * rowMapPtr;
3267  const map_type& colMap = * colMapPtr;
3268  const LO myNumRows = static_cast<LO> (this->getNodeNumRows ());
3269 
3270 #ifdef HAVE_TPETRA_DEBUG
3271  // isCompatible() requires an all-reduce, and thus this check
3272  // should only be done in debug mode.
3273  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3274  ! diag.getMap ()->isCompatible (rowMap), std::runtime_error,
3275  "The input Vector's Map must be compatible with the CrsMatrix's row "
3276  "Map. You may check this by using Map's isCompatible method: "
3277  "diag.getMap ()->isCompatible (A.getRowMap ());");
3278 #endif // HAVE_TPETRA_DEBUG
3279 
3280  if (this->isFillComplete ()) {
3281  diag.template modify<device_type> ();
3282  const auto D_lcl = diag.template getLocalView<device_type> ();
3283  // 1-D subview of the first (and only) column of D_lcl.
3284  const auto D_lcl_1d =
3285  Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);
3286 
3287  const auto lclRowMap = rowMap.getLocalMap ();
3288  const auto lclColMap = colMap.getLocalMap ();
3289  const auto lclMatrix = this->lclMatrix_;
3291  (void) getDiagCopyWithoutOffsets (D_lcl_1d, lclRowMap,
3292  lclColMap, lclMatrix);
3293  }
3294  else {
3296  (void) getLocalDiagCopyWithoutOffsetsNotFillComplete (diag, *this);
3297  }
3298  }
3299 
3300  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3301  void
3304  const Kokkos::View<const size_t*, device_type,
3305  Kokkos::MemoryUnmanaged>& offsets) const
3306  {
3307  typedef LocalOrdinal LO;
3308 
3309 #ifdef HAVE_TPETRA_DEBUG
3310  const char tfecfFuncName[] = "getLocalDiagCopy: ";
3311  const map_type& rowMap = * (this->getRowMap ());
3312  // isCompatible() requires an all-reduce, and thus this check
3313  // should only be done in debug mode.
3314  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3315  ! diag.getMap ()->isCompatible (rowMap), std::runtime_error,
3316  "The input Vector's Map must be compatible with (in the sense of Map::"
3317  "isCompatible) the CrsMatrix's row Map.");
3318 #endif // HAVE_TPETRA_DEBUG
3319 
3320  // For now, we fill the Vector on the host and sync to device.
3321  // Later, we may write a parallel kernel that works entirely on
3322  // device.
3323  //
3324  // NOTE (mfh 21 Jan 2016): The host kernel here assumes UVM. Once
3325  // we write a device kernel, it will not need to assume UVM.
3326 
3327  diag.template modify<device_type> ();
3328  auto D_lcl = diag.template getLocalView<device_type> ();
3329  const LO myNumRows = static_cast<LO> (this->getNodeNumRows ());
3330  // Get 1-D subview of the first (and only) column of D_lcl.
3331  auto D_lcl_1d =
3332  Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);
3333 
3334  KokkosSparse::getDiagCopy (D_lcl_1d, offsets, this->lclMatrix_);
3335  }
3336 
3337  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3338  void
3341  const Teuchos::ArrayView<const size_t>& offsets) const
3342  {
3343  typedef LocalOrdinal LO;
3344  typedef impl_scalar_type IST;
3346  typedef typename vec_type::dual_view_type dual_view_type;
3347  typedef typename dual_view_type::host_mirror_space::execution_space host_execution_space;
3348 
3349 #ifdef HAVE_TPETRA_DEBUG
3350  const char tfecfFuncName[] = "getLocalDiagCopy: ";
3351  const map_type& rowMap = * (this->getRowMap ());
3352  // isCompatible() requires an all-reduce, and thus this check
3353  // should only be done in debug mode.
3354  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3355  ! diag.getMap ()->isCompatible (rowMap), std::runtime_error,
3356  "The input Vector's Map must be compatible with (in the sense of Map::"
3357  "isCompatible) the CrsMatrix's row Map.");
3358 #endif // HAVE_TPETRA_DEBUG
3359 
3360  // See #1510. In case diag has already been marked modified on
3361  // device, we need to clear that flag, since the code below works
3362  // on host.
3363  auto diag_dv = diag.getDualView ();
3364  diag_dv.modified_device () = 0;
3365 
3366  // For now, we fill the Vector on the host and sync to device.
3367  // Later, we may write a parallel kernel that works entirely on
3368  // device.
3369  diag.template modify<host_execution_space> ();
3370  auto lclVecHost = diag.template getLocalView<host_execution_space> ();
3371  // 1-D subview of the first (and only) column of lclVecHost.
3372  auto lclVecHost1d = Kokkos::subview (lclVecHost, Kokkos::ALL (), 0);
3373 
3374  Kokkos::View<const size_t*, Kokkos::HostSpace,
3375  Kokkos::MemoryTraits<Kokkos::Unmanaged> >
3376  h_offsets (offsets.getRawPtr (), offsets.size ());
3377  // Find the diagonal entries and put them in lclVecHost1d.
3378  const LO myNumRows = static_cast<LO> (this->getNodeNumRows ());
3379  typedef Kokkos::RangePolicy<host_execution_space, LO> policy_type;
3380  const size_t INV = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
3381 
3382  Kokkos::parallel_for (policy_type (0, myNumRows), [&] (const LO& lclRow) {
3383  lclVecHost1d(lclRow) = STS::zero (); // default value if no diag entry
3384  if (h_offsets[lclRow] != INV) {
3385  auto curRow = lclMatrix_.rowConst (lclRow);
3386  lclVecHost1d(lclRow) = static_cast<IST> (curRow.value(h_offsets[lclRow]));
3387  }
3388  });
3389  diag.template sync<execution_space> (); // sync changes back to device
3390  }
3391 
3392 
3393  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3394  void
3397  {
3398  using Teuchos::ArrayRCP;
3399  using Teuchos::ArrayView;
3400  using Teuchos::null;
3401  using Teuchos::RCP;
3402  using Teuchos::rcp;
3403  using Teuchos::rcpFromRef;
3405  const char tfecfFuncName[] = "leftScale";
3406 
3407  // FIXME (mfh 06 Aug 2014) This doesn't make sense. The matrix
3408  // should only be modified when it is not fill complete.
3409  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3410  ! isFillComplete (), std::runtime_error,
3411  ": matrix must be fill complete.");
3412  RCP<const vec_type> xp;
3413 
3414  if (getRangeMap ()->isSameAs (* (x.getMap ()))){
3415  // Take from Epetra: If we have a non-trivial exporter, we must
3416  // import elements that are permuted or are on other processors.
3417  // (We will use the exporter to perform the import ("reverse
3418  // mode").)
3419  if (getCrsGraphRef ().getExporter () != Teuchos::null) {
3420  RCP<vec_type> tempVec = rcp (new vec_type (getRowMap ()));
3421  tempVec->doImport (x, * (getCrsGraphRef ().getExporter ()), INSERT);
3422  xp = tempVec;
3423  }
3424  else {
3425  xp = rcpFromRef (x);
3426  }
3427  }
3428  else if (getRowMap ()->isSameAs (* (x.getMap ()))) {
3429  xp = rcpFromRef (x);
3430  }
3431  else {
3432  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::invalid_argument, ": The "
3433  "input scaling vector x's Map must be the same as either the row Map or "
3434  "the range Map of the CrsMatrix.");
3435  }
3436  ArrayRCP<const Scalar> vectorVals = xp->getData (0);
3437  ArrayView<impl_scalar_type> rowValues = null;
3438 
3439  const LocalOrdinal lclNumRows =
3440  static_cast<LocalOrdinal> (this->getNodeNumRows ());
3441  for (LocalOrdinal i = 0; i < lclNumRows; ++i) {
3442  const RowInfo rowinfo = staticGraph_->getRowInfo (i);
3443  rowValues = this->getViewNonConst (rowinfo);
3444  const impl_scalar_type scaleValue = static_cast<impl_scalar_type> (vectorVals[i]);
3445  for (size_t j = 0; j < rowinfo.numEntries; ++j) {
3446  rowValues[j] *= scaleValue;
3447  }
3448  }
3449  }
3450 
3451  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3452  void
3455  {
3456  using Teuchos::ArrayRCP;
3457  using Teuchos::ArrayView;
3458  using Teuchos::null;
3459  using Teuchos::RCP;
3460  using Teuchos::rcp;
3461  using Teuchos::rcpFromRef;
3463  const char tfecfFuncName[] = "rightScale: ";
3464 
3465  // FIXME (mfh 06 Aug 2014) This doesn't make sense. The matrix
3466  // should only be modified when it is not fill complete.
3467  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3468  ! isFillComplete (), std::runtime_error, "Matrix must be fill complete.");
3469  RCP<const vec_type> xp;
3470  if (getDomainMap ()->isSameAs (* (x.getMap ()))) {
3471  // Take from Epetra: If we have a non-trivial exporter, we must
3472  // import elements that are permuted or are on other processors.
3473  // (We will use the exporter to perform the import.)
3474  if (getCrsGraphRef ().getImporter () != Teuchos::null) {
3475  RCP<vec_type> tempVec = rcp (new vec_type (getColMap ()));
3476  tempVec->doImport (x, * (getCrsGraphRef ().getImporter ()), INSERT);
3477  xp = tempVec;
3478  }
3479  else {
3480  xp = rcpFromRef (x);
3481  }
3482  }
3483  else if (getRowMap ()->isSameAs (* (x.getMap ()))) {
3484  xp = rcpFromRef (x);
3485  } else {
3486  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3487  true, std::runtime_error, "The vector x must have the same Map as "
3488  "either the row Map or the range Map.");
3489  }
3490 
3491  ArrayRCP<const Scalar> vectorVals = xp->getData (0);
3492  ArrayView<impl_scalar_type> rowValues = null;
3493 
3494  const LocalOrdinal lclNumRows =
3495  static_cast<LocalOrdinal> (this->getNodeNumRows ());
3496  for (LocalOrdinal i = 0; i < lclNumRows; ++i) {
3497  const RowInfo rowinfo = staticGraph_->getRowInfo (i);
3498  rowValues = this->getViewNonConst (rowinfo);
3499  ArrayView<const LocalOrdinal> colInds;
3500  getCrsGraphRef ().getLocalRowView (i, colInds);
3501  for (size_t j = 0; j < rowinfo.numEntries; ++j) {
3502  rowValues[j] *= static_cast<impl_scalar_type> (vectorVals[colInds[j]]);
3503  }
3504  }
3505  }
3506 
3507  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3511  {
3512  using Teuchos::ArrayView;
3513  using Teuchos::outArg;
3514  using Teuchos::REDUCE_SUM;
3515  using Teuchos::reduceAll;
3516  typedef typename Teuchos::ArrayRCP<const impl_scalar_type>::size_type size_type;
3517 
3518  // FIXME (mfh 05 Aug 2014) Write a thread-parallel kernel for the
3519  // local part of this computation. It could make sense to put
3520  // this operation in the Kokkos::CrsMatrix.
3521 
3522  // check the cache first
3523  mag_type frobNorm = frobNorm_;
3524  if (frobNorm == -STM::one ()) {
3525  mag_type mySum = STM::zero ();
3526  if (getNodeNumEntries() > 0) {
3527  if (isStorageOptimized ()) {
3528  // "Optimized" storage is packed storage. That means we can
3529  // iterate in one pass through the 1-D values array.
3530  const size_type numEntries =
3531  static_cast<size_type> (getNodeNumEntries ());
3532  for (size_type k = 0; k < numEntries; ++k) {
3533  // FIXME (mfh 05 Aug 2014) This assumes UVM.
3534  const impl_scalar_type val = k_values1D_(k);
3535  // Note (etp 06 Jan 2015) We need abs() here for composite types
3536  // (in general, if mag_type is on the left-hand-side, we need
3537  // abs() on the right-hand-side)
3538  const mag_type val_abs = STS::abs (val);
3539  mySum += val_abs * val_abs;
3540  }
3541  }
3542  else {
3543  const LocalOrdinal numRows =
3544  static_cast<LocalOrdinal> (this->getNodeNumRows ());
3545  for (LocalOrdinal r = 0; r < numRows; ++r) {
3546  const RowInfo rowInfo = myGraph_->getRowInfo (r);
3547  const size_type numEntries =
3548  static_cast<size_type> (rowInfo.numEntries);
3549  ArrayView<const impl_scalar_type> A_r =
3550  this->getView (rowInfo).view (0, numEntries);
3551  for (size_type k = 0; k < numEntries; ++k) {
3552  const impl_scalar_type val = A_r[k];
3553  const mag_type val_abs = STS::abs (val);
3554  mySum += val_abs * val_abs;
3555  }
3556  }
3557  }
3558  }
3559  mag_type totalSum = STM::zero ();
3560  reduceAll<int, mag_type> (* (getComm ()), REDUCE_SUM,
3561  mySum, outArg (totalSum));
3562  frobNorm = STM::sqrt (totalSum);
3563  }
3564  if (isFillComplete ()) {
3565  // Only cache the result if the matrix is fill complete.
3566  // Otherwise, the values might still change. resumeFill clears
3567  // the cache.
3568  frobNorm_ = frobNorm;
3569  }
3570  return frobNorm;
3571  }
3572 
3573  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3574  void
3576  replaceColMap (const Teuchos::RCP<const map_type>& newColMap)
3577  {
3578  const char tfecfFuncName[] = "replaceColMap: ";
3579  // FIXME (mfh 06 Aug 2014) What if the graph is locally indexed?
3580  // Then replacing the column Map might mean that we need to
3581  // reindex the column indices.
3582  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3583  myGraph_.is_null (), std::runtime_error,
3584  "This method does not work if the matrix has a const graph. The whole "
3585  "idea of a const graph is that you are not allowed to change it, but "
3586  "this method necessarily must modify the graph, since the graph owns "
3587  "the matrix's column Map.");
3588  myGraph_->replaceColMap (newColMap);
3589  }
3590 
3591  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3592  void
3595  const Teuchos::RCP<const map_type>& newColMap,
3596  const Teuchos::RCP<const import_type>& newImport,
3597  const bool sortEachRow)
3598  {
3599  const char tfecfFuncName[] = "reindexColumns: ";
3600  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3601  graph == NULL && myGraph_.is_null (), std::invalid_argument,
3602  "The input graph is NULL, but the matrix does not own its graph.");
3603 
3604  crs_graph_type& theGraph = (graph == NULL) ? *myGraph_ : *graph;
3605  const bool sortGraph = false; // we'll sort graph & matrix together below
3606  theGraph.reindexColumns (newColMap, newImport, sortGraph);
3607  if (sortEachRow && theGraph.isLocallyIndexed () && ! theGraph.isSorted ()) {
3608  const LocalOrdinal lclNumRows =
3609  static_cast<LocalOrdinal> (theGraph.getNodeNumRows ());
3610  for (LocalOrdinal row = 0; row < lclNumRows; ++row) {
3611  const RowInfo rowInfo = theGraph.getRowInfo (row);
3612  auto lclColInds = theGraph.getLocalKokkosRowViewNonConst (rowInfo);
3613  auto vals = this->getRowViewNonConst (rowInfo);
3614  // FIXME (mfh 09 May 2017) This assumes CUDA UVM, at least for
3615  // lclColInds, if not also for values.
3616  sort2 (lclColInds.ptr_on_device (),
3617  lclColInds.ptr_on_device () + rowInfo.numEntries,
3618  vals.ptr_on_device ());
3619  }
3620  theGraph.indicesAreSorted_ = true;
3621  }
3622  }
3623 
3624  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3625  void
3627  replaceDomainMapAndImporter (const Teuchos::RCP<const map_type>& newDomainMap,
3628  Teuchos::RCP<const import_type>& newImporter)
3629  {
3630  const char tfecfFuncName[] = "replaceDomainMapAndImporter: ";
3631  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3632  myGraph_.is_null (), std::runtime_error,
3633  "This method does not work if the matrix has a const graph. The whole "
3634  "idea of a const graph is that you are not allowed to change it, but this"
3635  " method necessarily must modify the graph, since the graph owns the "
3636  "matrix's domain Map and Import objects.");
3637  myGraph_->replaceDomainMapAndImporter (newDomainMap, newImporter);
3638  }
3639 
3640  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3641  void
3643  insertNonownedGlobalValues (const GlobalOrdinal globalRow,
3644  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
3645  const Teuchos::ArrayView<const Scalar>& values)
3646  {
3647  using Teuchos::Array;
3648  typedef GlobalOrdinal GO;
3649  typedef typename Array<GO>::size_type size_type;
3650 
3651  const size_type numToInsert = indices.size ();
3652  // Add the new data to the list of nonlocals.
3653  // This creates the arrays if they don't exist yet.
3654  std::pair<Array<GO>, Array<Scalar> >& curRow = nonlocals_[globalRow];
3655  Array<GO>& curRowInds = curRow.first;
3656  Array<Scalar>& curRowVals = curRow.second;
3657  const size_type newCapacity = curRowInds.size () + numToInsert;
3658  curRowInds.reserve (newCapacity);
3659  curRowVals.reserve (newCapacity);
3660  for (size_type k = 0; k < numToInsert; ++k) {
3661  curRowInds.push_back (indices[k]);
3662  curRowVals.push_back (values[k]);
3663  }
3664  }
3665 
3666  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3667  void
3670  {
3671  using ::Tpetra::Details::ProfilingRegion;
3672  using Teuchos::Comm;
3673  using Teuchos::outArg;
3674  using Teuchos::RCP;
3675  using Teuchos::rcp;
3676  using Teuchos::REDUCE_MAX;
3677  using Teuchos::REDUCE_MIN;
3678  using Teuchos::reduceAll;
3680  //typedef LocalOrdinal LO;
3681  typedef GlobalOrdinal GO;
3682  typedef typename Teuchos::Array<GO>::size_type size_type;
3683  const char tfecfFuncName[] = "globalAssemble: "; // for exception macro
3684  ProfilingRegion regionGlobalAssemble ("Tpetra::CrsMatrix::globalAssemble");
3685 
3686  RCP<const Comm<int> > comm = getComm ();
3687 
3688  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3689  (! isFillActive (), std::runtime_error, "Fill must be active before "
3690  "you may call this method.");
3691 
3692  const size_t myNumNonlocalRows = nonlocals_.size ();
3693 
3694  // If no processes have nonlocal rows, then we don't have to do
3695  // anything. Checking this is probably cheaper than constructing
3696  // the Map of nonlocal rows (see below) and noticing that it has
3697  // zero global entries.
3698  {
3699  const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
3700  int someoneHasNonlocalRows = 0;
3701  reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
3702  outArg (someoneHasNonlocalRows));
3703  if (someoneHasNonlocalRows == 0) {
3704  return; // no process has nonlocal rows, so nothing to do
3705  }
3706  }
3707 
3708  // 1. Create a list of the "nonlocal" rows on each process. this
3709  // requires iterating over nonlocals_, so while we do this,
3710  // deduplicate the entries and get a count for each nonlocal
3711  // row on this process.
3712  // 2. Construct a new row Map corresponding to those rows. This
3713  // Map is likely overlapping. We know that the Map is not
3714  // empty on all processes, because the above all-reduce and
3715  // return exclude that case.
3716 
3717  RCP<const map_type> nonlocalRowMap;
3718  // Keep this for CrsGraph's constructor, so we can use StaticProfile.
3719  Teuchos::ArrayRCP<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
3720  {
3721  Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
3722  size_type curPos = 0;
3723  for (auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
3724  ++mapIter, ++curPos) {
3725  myNonlocalGblRows[curPos] = mapIter->first;
3726  // Get the values and column indices by reference, since we
3727  // intend to change them in place (that's what "erase" does).
3728  Teuchos::Array<GO>& gblCols = (mapIter->second).first;
3729  Teuchos::Array<Scalar>& vals = (mapIter->second).second;
3730 
3731  // Sort both arrays jointly, using the column indices as keys,
3732  // then merge them jointly. "Merge" here adds values
3733  // corresponding to the same column indices. The first 2 args
3734  // of merge2 are output arguments that work just like the
3735  // return value of std::unique.
3736  sort2 (gblCols.begin (), gblCols.end (), vals.begin ());
3737  typename Teuchos::Array<GO>::iterator gblCols_newEnd;
3738  typename Teuchos::Array<Scalar>::iterator vals_newEnd;
3739  merge2 (gblCols_newEnd, vals_newEnd,
3740  gblCols.begin (), gblCols.end (),
3741  vals.begin (), vals.end ());
3742  gblCols.erase (gblCols_newEnd, gblCols.end ());
3743  vals.erase (vals_newEnd, vals.end ());
3744  numEntPerNonlocalRow[curPos] = gblCols.size ();
3745  }
3746 
3747  // Currently, Map requires that its indexBase be the global min
3748  // of all its global indices. Map won't compute this for us, so
3749  // we must do it. If our process has no nonlocal rows, set the
3750  // "min" to the max possible GO value. This ensures that if
3751  // some process has at least one nonlocal row, then it will pick
3752  // that up as the min. We know that at least one process has a
3753  // nonlocal row, since the all-reduce and return at the top of
3754  // this method excluded that case.
3755  GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
3756  {
3757  auto iter = std::min_element (myNonlocalGblRows.begin (),
3758  myNonlocalGblRows.end ());
3759  if (iter != myNonlocalGblRows.end ()) {
3760  myMinNonlocalGblRow = *iter;
3761  }
3762  }
3763  GO gblMinNonlocalGblRow = 0;
3764  reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
3765  outArg (gblMinNonlocalGblRow));
3766  const GO indexBase = gblMinNonlocalGblRow;
3767  const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
3768  nonlocalRowMap = rcp (new map_type (INV, myNonlocalGblRows (), indexBase, comm));
3769  }
3770 
3771  // 3. Use the values and column indices for each nonlocal row, as
3772  // stored in nonlocals_, to construct a CrsMatrix corresponding
3773  // to nonlocal rows. We may use StaticProfile, since we have
3774  // exact counts of the number of entries in each nonlocal row.
3775 
3776  RCP<crs_matrix_type> nonlocalMatrix =
3777  rcp (new crs_matrix_type (nonlocalRowMap, numEntPerNonlocalRow,
3778  StaticProfile));
3779  {
3780  size_type curPos = 0;
3781  for (auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
3782  ++mapIter, ++curPos) {
3783  const GO gblRow = mapIter->first;
3784  // Get values & column indices by ref, just to avoid copy.
3785  Teuchos::Array<GO>& gblCols = (mapIter->second).first;
3786  Teuchos::Array<Scalar>& vals = (mapIter->second).second;
3787  //const LO numEnt = static_cast<LO> (numEntPerNonlocalRow[curPos]);
3788  nonlocalMatrix->insertGlobalValues (gblRow, gblCols (), vals ());
3789  }
3790  }
3791  // There's no need to fill-complete the nonlocals matrix.
3792  // We just use it as a temporary container for the Export.
3793 
3794  // 4. If the original row Map is one to one, then we can Export
3795  // directly from nonlocalMatrix into this. Otherwise, we have
3796  // to create a temporary matrix with a one-to-one row Map,
3797  // Export into that, then Import from the temporary matrix into
3798  // *this.
3799 
3800  auto origRowMap = this->getRowMap ();
3801  const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
3802 
3803  int isLocallyComplete = 1; // true by default
3804 
3805  if (origRowMapIsOneToOne) {
3806  export_type exportToOrig (nonlocalRowMap, origRowMap);
3807  if (! exportToOrig.isLocallyComplete ()) {
3808  isLocallyComplete = 0;
3809  }
3810  this->doExport (*nonlocalMatrix, exportToOrig, Tpetra::ADD);
3811  // We're done at this point!
3812  }
3813  else {
3814  // If you ask a Map whether it is one to one, it does some
3815  // communication and stashes intermediate results for later use
3816  // by createOneToOne. Thus, calling createOneToOne doesn't cost
3817  // much more then the original cost of calling isOneToOne.
3818  auto oneToOneRowMap = Tpetra::createOneToOne (origRowMap);
3819  export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
3820  if (! exportToOneToOne.isLocallyComplete ()) {
3821  isLocallyComplete = 0;
3822  }
3823 
3824  // Create a temporary matrix with the one-to-one row Map.
3825  //
3826  // TODO (mfh 09 Sep 2016, 12 Sep 2016) Estimate # entries in
3827  // each row, to avoid reallocation during the Export operation.
3828  crs_matrix_type oneToOneMatrix (oneToOneRowMap, 0);
3829  // Export from matrix of nonlocals into the temp one-to-one matrix.
3830  oneToOneMatrix.doExport (*nonlocalMatrix, exportToOneToOne, Tpetra::ADD);
3831 
3832  // We don't need the matrix of nonlocals anymore, so get rid of
3833  // it, to keep the memory high-water mark down.
3834  nonlocalMatrix = Teuchos::null;
3835 
3836  // Import from the one-to-one matrix to the original matrix.
3837  import_type importToOrig (oneToOneRowMap, origRowMap);
3838  this->doImport (oneToOneMatrix, importToOrig, Tpetra::ADD);
3839  }
3840 
3841  // It's safe now to clear out nonlocals_, since we've already
3842  // committed side effects to *this. The standard idiom for
3843  // clearing a Container like std::map, is to swap it with an empty
3844  // Container and let the swapped Container fall out of scope.
3845  decltype (nonlocals_) newNonlocals;
3846  std::swap (nonlocals_, newNonlocals);
3847 
3848  // FIXME (mfh 12 Sep 2016) I don't like this all-reduce, and I
3849  // don't like throwing an exception here. A local return value
3850  // would likely be more useful to users. However, if users find
3851  // themselves exercising nonlocal inserts often, then they are
3852  // probably novice users who need the help. See Gibhub Issues
3853  // #603 and #601 (esp. the latter) for discussion.
3854 
3855  int isGloballyComplete = 0; // output argument of reduceAll
3856  reduceAll<int, int> (*comm, REDUCE_MIN, isLocallyComplete,
3857  outArg (isGloballyComplete));
3858  TEUCHOS_TEST_FOR_EXCEPTION
3859  (isGloballyComplete != 1, std::runtime_error, "On at least one process, "
3860  "you called insertGlobalValues with a global row index which is not in "
3861  "the matrix's row Map on any process in its communicator.");
3862  }
3863 
3864  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3865  void
3867  resumeFill (const Teuchos::RCP<Teuchos::ParameterList>& params)
3868  {
3869  if (! isStaticGraph ()) { // Don't resume fill of a nonowned graph.
3870  myGraph_->resumeFill (params);
3871  }
3873  fillComplete_ = false;
3874  }
3875 
3876  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3877  void
3880  {
3881  // This method doesn't do anything. The analogous method in
3882  // CrsGraph does actually compute something.
3883  //
3884  // Oddly enough, clearGlobalConstants() clears frobNorm_ (by
3885  // setting it to -1), but computeGlobalConstants() does _not_
3886  // compute the Frobenius norm; this is done on demand in
3887  // getFrobeniusNorm(), and the result is cached there.
3888  }
3889 
3890  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3891  bool
3894  return getCrsGraphRef ().haveGlobalConstants ();
3895  }
3896 
3897  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3898  void
3901  // We use -1 to indicate that the Frobenius norm needs to be
3902  // recomputed, since the values might change between now and the
3903  // next fillComplete call.
3904  //
3905  // Oddly enough, clearGlobalConstants() clears frobNorm_, but
3906  // computeGlobalConstants() does _not_ compute the Frobenius norm;
3907  // this is done on demand in getFrobeniusNorm(), and the result is
3908  // cached there.
3909  frobNorm_ = -STM::one ();
3910  }
3911 
3912  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3913  void
3915  fillComplete (const Teuchos::RCP<Teuchos::ParameterList>& params)
3916  {
3917  const char tfecfFuncName[] = "fillComplete(params): ";
3918 
3919  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3920  (this->getCrsGraph ().is_null (), std::logic_error,
3921  "getCrsGraph() returns null. This should not happen at this point. "
3922  "Please report this bug to the Tpetra developers.");
3923 
3924  if (this->isStaticGraph () && this->getCrsGraphRef ().isFillComplete ()) {
3925  // If this matrix's graph is fill complete and the user did not
3926  // supply a domain or range Map, use the graph's domain and
3927  // range Maps.
3928  this->fillComplete (this->getCrsGraphRef ().getDomainMap (),
3929  this->getCrsGraphRef ().getRangeMap (), params);
3930  } else {
3931  this->fillComplete (this->getRowMap (), this->getRowMap (), params);
3932  }
3933  }
3934 
3935  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
3936  void
3938  fillComplete (const Teuchos::RCP<const map_type>& domainMap,
3939  const Teuchos::RCP<const map_type>& rangeMap,
3940  const Teuchos::RCP<Teuchos::ParameterList>& params)
3941  {
3942  using ::Tpetra::Details::ProfilingRegion;
3943  using Teuchos::ArrayRCP;
3944  using Teuchos::RCP;
3945  using Teuchos::rcp;
3946  const char tfecfFuncName[] = "fillComplete";
3947  ProfilingRegion regionFillComplete ("Tpetra::CrsMatrix::fillComplete");
3948 
3949  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3950  (! isFillActive () || isFillComplete (), std::runtime_error,
3951  ": Matrix fill state must be active (isFillActive() "
3952  "must be true) before you may call fillComplete().");
3953  const int numProcs = getComm ()->getSize ();
3954 
3955  //
3956  // Read parameters from the input ParameterList.
3957  //
3958 
3959  // If true, the caller promises that no process did nonlocal
3960  // changes since the last call to fillComplete.
3961  bool assertNoNonlocalInserts = false;
3962  // If true, makeColMap sorts remote GIDs (within each remote
3963  // process' group).
3964  bool sortGhosts = true;
3965 
3966  if (! params.is_null ()) {
3967  assertNoNonlocalInserts = params->get ("No Nonlocal Changes",
3968  assertNoNonlocalInserts);
3969  if (params->isParameter ("sort column map ghost gids")) {
3970  sortGhosts = params->get ("sort column map ghost gids", sortGhosts);
3971  }
3972  else if (params->isParameter ("Sort column Map ghost GIDs")) {
3973  sortGhosts = params->get ("Sort column Map ghost GIDs", sortGhosts);
3974  }
3975  }
3976  // We also don't need to do global assembly if there is only one
3977  // process in the communicator.
3978  const bool needGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
3979  // This parameter only matters if this matrix owns its graph.
3980  if (! myGraph_.is_null ()) {
3981  myGraph_->sortGhostsAssociatedWithEachProcessor_ = sortGhosts;
3982  }
3983 
3984  if (! this->getCrsGraphRef ().indicesAreAllocated ()) {
3985  if (this->hasColMap ()) {
3986  // We have a column Map, so use local indices.
3987  allocateValues (LocalIndices, GraphNotYetAllocated);
3988  } else {
3989  // We don't have a column Map, so use global indices.
3990  allocateValues (GlobalIndices, GraphNotYetAllocated);
3991  }
3992  }
3993  // Global assemble, if we need to. This call only costs a single
3994  // all-reduce if we didn't need global assembly after all.
3995  if (needGlobalAssemble) {
3996  globalAssemble ();
3997  }
3998  else {
3999  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4000  (numProcs == 1 && nonlocals_.size() > 0,
4001  std::runtime_error, ": cannot have nonlocal entries on a serial run. "
4002  "An invalid entry (i.e., with row index not in the row Map) must have "
4003  "been submitted to the CrsMatrix.");
4004  }
4005 
4006  if (isStaticGraph ()) {
4007  // FIXME (mfh 14 Nov 2016) In order to fix #843, I enable the
4008  // checks below only in debug mode. It would be nicer to do a
4009  // local check, then propagate the error state in a deferred
4010  // way, whenever communication happens. That would reduce the
4011  // cost of checking, to the point where it may make sense to
4012  // enable it even in release mode.
4013 #ifdef HAVE_TPETRA_DEBUG
4014  // FIXME (mfh 18 Jun 2014) This check for correctness of the
4015  // input Maps incurs a penalty of two all-reduces for the
4016  // otherwise optimal const graph case.
4017  //
4018  // We could turn these (max) 2 all-reduces into (max) 1, by
4019  // fusing them. We could do this by adding a "locallySameAs"
4020  // method to Map, which would return one of four states:
4021  //
4022  // a. Certainly globally the same
4023  // b. Certainly globally not the same
4024  // c. Locally the same
4025  // d. Locally not the same
4026  //
4027  // The first two states don't require further communication.
4028  // The latter two states require an all-reduce to communicate
4029  // globally, but we only need one all-reduce, since we only need
4030  // to check whether at least one of the Maps is wrong.
4031  const bool domainMapsMatch = staticGraph_->getDomainMap ()->isSameAs (*domainMap);
4032  const bool rangeMapsMatch = staticGraph_->getRangeMap ()->isSameAs (*rangeMap);
4033 
4034  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4035  ! domainMapsMatch, std::runtime_error,
4036  ": The CrsMatrix's domain Map does not match the graph's domain Map. "
4037  "The graph cannot be changed because it was given to the CrsMatrix "
4038  "constructor as const. You can fix this by passing in the graph's "
4039  "domain Map and range Map to the matrix's fillComplete call.");
4040 
4041  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4042  ! rangeMapsMatch, std::runtime_error,
4043  ": The CrsMatrix's range Map does not match the graph's range Map. "
4044  "The graph cannot be changed because it was given to the CrsMatrix "
4045  "constructor as const. You can fix this by passing in the graph's "
4046  "domain Map and range Map to the matrix's fillComplete call.");
4047 #endif // HAVE_TPETRA_DEBUG
4048  }
4049  else {
4050  // Set the graph's domain and range Maps. This will clear the
4051  // Import if the domain Map has changed (is a different
4052  // pointer), and the Export if the range Map has changed (is a
4053  // different pointer).
4054  myGraph_->setDomainRangeMaps (domainMap, rangeMap);
4055 
4056  // Make the graph's column Map, if necessary.
4057  if (! myGraph_->hasColMap ()) {
4058  myGraph_->makeColMap ();
4059  }
4060 
4061  // Make indices local, if necessary. The method won't do
4062  // anything if the graph is already locally indexed.
4063  myGraph_->makeIndicesLocal ();
4064 
4065  const bool sorted = myGraph_->isSorted ();
4066  const bool merged = myGraph_->isMerged ();
4067  this->sortAndMergeIndicesAndValues (sorted, merged);
4068 
4069  // Make the Import and Export, if they haven't been made already.
4070  myGraph_->makeImportExport ();
4071  myGraph_->computeGlobalConstants ();
4072  myGraph_->fillComplete_ = true;
4073  myGraph_->checkInternalState ();
4074  }
4076  // fill local objects; will fill and finalize local graph if appropriate
4077  if (myGraph_.is_null ()) {
4078  // The matrix does _not_ own the graph, and the graph's
4079  // structure is already fixed, so just fill the local matrix.
4080  fillLocalMatrix (params);
4081  } else {
4082  // The matrix _does_ own the graph, so fill the local graph at
4083  // the same time as the local matrix.
4084  fillLocalGraphAndMatrix (params);
4085  }
4086 
4087  // Once we've initialized the sparse kernels, we're done with the
4088  // local objects. We may now release them and their memory, since
4089  // they will persist in the local sparse ops if necessary. We
4090  // keep the local graph if the parameters tell us to do so.
4091 
4092  // FIXME (mfh 28 Aug 2014) "Preserve Local Graph" bool parameter no longer used.
4093 
4094  fillComplete_ = true; // Now we're fill complete!
4095  checkInternalState ();
4096  }
4097 
4098  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4099  void
4101  expertStaticFillComplete (const Teuchos::RCP<const map_type> & domainMap,
4102  const Teuchos::RCP<const map_type> & rangeMap,
4103  const Teuchos::RCP<const import_type>& importer,
4104  const Teuchos::RCP<const export_type>& exporter,
4105  const Teuchos::RCP<Teuchos::ParameterList> &params)
4106  {
4107 #ifdef HAVE_TPETRA_MMM_TIMINGS
4108  std::string label;
4109  if(!params.is_null())
4110  label = params->get("Timer Label",label);
4111  std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
4112  using Teuchos::TimeMonitor;
4113  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-M-Graph"))));
4114 #endif
4115 
4116 
4117  const char tfecfFuncName[] = "expertStaticFillComplete: ";
4118  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( ! isFillActive() || isFillComplete(),
4119  std::runtime_error, "Matrix fill state must be active (isFillActive() "
4120  "must be true) before calling fillComplete().");
4121  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4122  myGraph_.is_null (), std::logic_error, "myGraph_ is null. This is not allowed.");
4123 
4124 
4125  // We will presume globalAssemble is not needed, so we do the ESFC on the graph
4126  myGraph_->expertStaticFillComplete (domainMap, rangeMap, importer, exporter,params);
4127 
4128 #ifdef HAVE_TPETRA_MMM_TIMINGS
4129  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-M-cGC"))));
4130 #endif
4131  if(params.is_null() || params->get("compute global constants",true))
4133 
4134 #ifdef HAVE_TPETRA_MMM_TIMINGS
4135  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-M-fLGAM"))));
4136 #endif
4137 
4138  // Fill the local graph and matrix
4139  fillLocalGraphAndMatrix (params);
4140 
4141  // FIXME (mfh 28 Aug 2014) "Preserve Local Graph" bool parameter no longer used.
4142 
4143  // Now we're fill complete!
4144  fillComplete_ = true;
4145 
4146  // Sanity checks at the end.
4147 #ifdef HAVE_TPETRA_DEBUG
4148  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillActive(), std::logic_error,
4149  ": We're at the end of fillComplete(), but isFillActive() is true. "
4150  "Please report this bug to the Tpetra developers.");
4151  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillComplete(), std::logic_error,
4152  ": We're at the end of fillComplete(), but isFillActive() is true. "
4153  "Please report this bug to the Tpetra developers.");
4154 #endif // HAVE_TPETRA_DEBUG
4155 
4156 #ifdef HAVE_TPETRA_MMM_TIMINGS
4157  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-M-cIS"))));
4158 #endif
4159 
4161  }
4162 
4163  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4164  size_t
4167  const RowInfo& rowInfo)
4168  {
4169 #ifdef HAVE_TPETRA_DEBUG
4170  const char tfecfFuncName[] = "mergeRowIndicesAndValues: ";
4171 #endif // HAVE_TPETRA_DEBUG
4172 
4173  auto rowValues = this->getRowViewNonConst (rowInfo);
4174  typedef typename std::decay<decltype (rowValues[0]) >::type value_type;
4175  value_type* rowValueIter = rowValues.data ();
4176  auto inds_view = graph.getLocalKokkosRowViewNonConst (rowInfo);
4177 
4178  // beg,end define a half-exclusive interval over which to iterate.
4179  LocalOrdinal* beg = inds_view.data ();
4180  LocalOrdinal* end = inds_view.data () + rowInfo.numEntries;
4181 
4182 #ifdef HAVE_TPETRA_DEBUG
4183  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4184  (rowInfo.allocSize != static_cast<size_t> (inds_view.dimension_0 ()) ||
4185  rowInfo.allocSize != static_cast<size_t> (rowValues.dimension_0 ()),
4186  std::runtime_error, "rowInfo.allocSize = " << rowInfo.allocSize
4187  << " != inds_view.dimension_0() = " << inds_view.dimension_0 ()
4188  << " || rowInfo.allocSize = " << rowInfo.allocSize
4189  << " != rowValues.dimension_0() = " << rowValues.dimension_0 () << ".");
4190 #endif // HAVE_TPETRA_DEBUG
4191 
4192  LocalOrdinal* newend = beg;
4193  if (beg != end) {
4194  LocalOrdinal* cur = beg + 1;
4195  value_type* vcur = rowValueIter + 1;
4196  value_type* vend = rowValueIter;
4197  cur = beg+1;
4198  while (cur != end) {
4199  if (*cur != *newend) {
4200  // new entry; save it
4201  ++newend;
4202  ++vend;
4203  (*newend) = (*cur);
4204  (*vend) = (*vcur);
4205  }
4206  else {
4207  // old entry; merge it
4208  //(*vend) = f (*vend, *vcur);
4209  (*vend) += *vcur;
4210  }
4211  ++cur;
4212  ++vcur;
4213  }
4214  ++newend; // one past the last entry, per typical [beg,end) semantics
4215  }
4216  const size_t mergedEntries = newend - beg;
4217  graph.k_numRowEntries_(rowInfo.localRow) = mergedEntries;
4218  const size_t numDups = rowInfo.numEntries - mergedEntries;
4219  return numDups;
4220  }
4221 
4222  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4223  void
4225  sortAndMergeIndicesAndValues (const bool sorted, const bool merged)
4226  {
4227  using ::Tpetra::Details::ProfilingRegion;
4228  typedef LocalOrdinal LO;
4229  typedef typename Kokkos::View<LO*, device_type>::HostMirror::execution_space
4230  host_execution_space;
4231  typedef Kokkos::RangePolicy<host_execution_space, LO> range_type;
4232  //typedef Kokkos::RangePolicy<Kokkos::Serial, LO> range_type;
4233  const char tfecfFuncName[] = "sortAndMergeIndicesAndValues: ";
4234  ProfilingRegion regionSAM ("Tpetra::CrsMatrix::sortAndMergeIndicesAndValues");
4235 
4236  if (! sorted || ! merged) {
4237  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4238  (this->isStaticGraph (), std::runtime_error, "Cannot sort or merge with "
4239  "\"static\" (const) graph, since the matrix does not own the graph.");
4240  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4241  (this->myGraph_.is_null (), std::logic_error, "myGraph_ is null, but "
4242  "this matrix claims ! isStaticGraph(). "
4243  "Please report this bug to the Tpetra developers.");
4244  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4245  (this->isStorageOptimized (), std::logic_error, "It is invalid to call "
4246  "this method if the graph's storage has already been optimized. "
4247  "Please report this bug to the Tpetra developers.");
4248 
4249  crs_graph_type& graph = * (this->myGraph_);
4250  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
4251  size_t totalNumDups = 0;
4252  // FIXME (mfh 10 May 2017) This may assume CUDA UVM.
4253  Kokkos::parallel_reduce (range_type (0, lclNumRows),
4254  [this, &graph, sorted, merged] (const LO& lclRow, size_t& numDups) {
4255  const RowInfo rowInfo = graph.getRowInfo (lclRow);
4256  if (! sorted) {
4257  auto lclColInds = graph.getLocalKokkosRowViewNonConst (rowInfo);
4258  auto vals = this->getRowViewNonConst (rowInfo);
4259  // FIXME (mfh 09 May 2017) This assumes CUDA UVM, at least
4260  // for lclColInds, if not also for values.
4261  sort2 (lclColInds.ptr_on_device (),
4262  lclColInds.ptr_on_device () + rowInfo.numEntries,
4263  vals.ptr_on_device ());
4264  }
4265  if (! merged) {
4266  numDups += this->mergeRowIndicesAndValues (graph, rowInfo);
4267  }
4268  }, totalNumDups);
4269  if (! sorted) {
4270  graph.indicesAreSorted_ = true; // we just sorted every row
4271  }
4272  if (! merged) {
4273  graph.nodeNumEntries_ -= totalNumDups;
4274  graph.noRedundancies_ = true; // we just merged every row
4275  }
4276  }
4277  }
4278 
4279  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4280  void
4284  Scalar alpha,
4285  Scalar beta) const
4286  {
4288  using Teuchos::null;
4289  using Teuchos::RCP;
4290  using Teuchos::rcp;
4291  using Teuchos::rcp_const_cast;
4292  using Teuchos::rcpFromRef;
4293  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4294  const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one ();
4295 
4296  // mfh 05 Jun 2014: Special case for alpha == 0. I added this to
4297  // fix an Ifpack2 test (RILUKSingleProcessUnitTests), which was
4298  // failing only for the Kokkos refactor version of Tpetra. It's a
4299  // good idea regardless to have the bypass.
4300  if (alpha == ZERO) {
4301  if (beta == ZERO) {
4302  Y_in.putScalar (ZERO);
4303  } else if (beta != ONE) {
4304  Y_in.scale (beta);
4305  }
4306  return;
4307  }
4308 
4309  // It's possible that X is a view of Y or vice versa. We don't
4310  // allow this (apply() requires that X and Y not alias one
4311  // another), but it's helpful to detect and work around this case.
4312  // We don't try to to detect the more subtle cases (e.g., one is a
4313  // subview of the other, but their initial pointers differ). We
4314  // only need to do this if this matrix's Import is trivial;
4315  // otherwise, we don't actually apply the operator from X into Y.
4316 
4317  RCP<const import_type> importer = this->getGraph ()->getImporter ();
4318  RCP<const export_type> exporter = this->getGraph ()->getExporter ();
4319 
4320  // If beta == 0, then the output MV will be overwritten; none of
4321  // its entries should be read. (Sparse BLAS semantics say that we
4322  // must ignore any Inf or NaN entries in Y_in, if beta is zero.)
4323  // This matters if we need to do an Export operation; see below.
4324  const bool Y_is_overwritten = (beta == ZERO);
4325 
4326  // We treat the case of a replicated MV output specially.
4327  const bool Y_is_replicated =
4328  (! Y_in.isDistributed () && this->getComm ()->getSize () != 1);
4329 
4330  // This is part of the special case for replicated MV output.
4331  // We'll let each process do its thing, but do an all-reduce at
4332  // the end to sum up the results. Setting beta=0 on all processes
4333  // but Proc 0 makes the math work out for the all-reduce. (This
4334  // assumes that the replicated data is correctly replicated, so
4335  // that the data are the same on all processes.)
4336  if (Y_is_replicated && this->getComm ()->getRank () > 0) {
4337  beta = ZERO;
4338  }
4339 
4340  // Temporary MV for Import operation. After the block of code
4341  // below, this will be an (Imported if necessary) column Map MV
4342  // ready to give to localMultiply().
4343  RCP<const MV> X_colMap;
4344  if (importer.is_null ()) {
4345  if (! X_in.isConstantStride ()) {
4346  // Not all sparse mat-vec kernels can handle an input MV with
4347  // nonconstant stride correctly, so we have to copy it in that
4348  // case into a constant stride MV. To make a constant stride
4349  // copy of X_in, we force creation of the column (== domain)
4350  // Map MV (if it hasn't already been created, else fetch the
4351  // cached copy). This avoids creating a new MV each time.
4352  RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in, true);
4353  Tpetra::deep_copy (*X_colMapNonConst, X_in);
4354  X_colMap = rcp_const_cast<const MV> (X_colMapNonConst);
4355  }
4356  else {
4357  // The domain and column Maps are the same, so do the local
4358  // multiply using the domain Map input MV X_in.
4359  X_colMap = rcpFromRef (X_in);
4360  }
4361  }
4362  else { // need to Import source (multi)vector
4363  ProfilingRegion regionImport ("Tpetra::CrsMatrix::apply: Import");
4364 
4365  // We're doing an Import anyway, which will copy the relevant
4366  // elements of the domain Map MV X_in into a separate column Map
4367  // MV. Thus, we don't have to worry whether X_in is constant
4368  // stride.
4369  RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in);
4370 
4371  // Import from the domain Map MV to the column Map MV.
4372  X_colMapNonConst->doImport (X_in, *importer, INSERT);
4373  X_colMap = rcp_const_cast<const MV> (X_colMapNonConst);
4374  }
4375 
4376  // Temporary MV for doExport (if needed), or for copying a
4377  // nonconstant stride output MV into a constant stride MV. This
4378  // is null if we don't need the temporary MV, that is, if the
4379  // Export is trivial (null).
4380  RCP<MV> Y_rowMap = getRowMapMultiVector (Y_in);
4381 
4382  // If we have a nontrivial Export object, we must perform an
4383  // Export. In that case, the local multiply result will go into
4384  // the row Map multivector. We don't have to make a
4385  // constant-stride version of Y_in in this case, because we had to
4386  // make a constant stride Y_rowMap MV and do an Export anyway.
4387  if (! exporter.is_null ()) {
4388  this->localApply (*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha, ZERO);
4389  {
4390  ProfilingRegion regionExport ("Tpetra::CrsMatrix::apply: Export");
4391 
4392  // If we're overwriting the output MV Y_in completely (beta ==
4393  // 0), then make sure that it is filled with zeros before we
4394  // do the Export. Otherwise, the ADD combine mode will use
4395  // data in Y_in, which is supposed to be zero.
4396  if (Y_is_overwritten) {
4397  Y_in.putScalar (ZERO);
4398  }
4399  else {
4400  // Scale output MV by beta, so that doExport sums in the
4401  // mat-vec contribution: Y_in = beta*Y_in + alpha*A*X_in.
4402  Y_in.scale (beta);
4403  }
4404  // Do the Export operation.
4405  Y_in.doExport (*Y_rowMap, *exporter, ADD);
4406  }
4407  }
4408  else { // Don't do an Export: row Map and range Map are the same.
4409  //
4410  // If Y_in does not have constant stride, or if the column Map
4411  // MV aliases Y_in, then we can't let the kernel write directly
4412  // to Y_in. Instead, we have to use the cached row (== range)
4413  // Map MV as temporary storage.
4414  //
4415  // FIXME (mfh 05 Jun 2014) This test for aliasing only tests if
4416  // the user passed in the same MultiVector for both X and Y. It
4417  // won't detect whether one MultiVector views the other. We
4418  // should also check the MultiVectors' raw data pointers.
4419  if (! Y_in.isConstantStride () || X_colMap.getRawPtr () == &Y_in) {
4420  // Force creating the MV if it hasn't been created already.
4421  // This will reuse a previously created cached MV.
4422  Y_rowMap = getRowMapMultiVector (Y_in, true);
4423 
4424  // If beta == 0, we don't need to copy Y_in into Y_rowMap,
4425  // since we're overwriting it anyway.
4426  if (beta != ZERO) {
4427  Tpetra::deep_copy (*Y_rowMap, Y_in);
4428  }
4429  this->localApply (*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha, beta);
4430  Tpetra::deep_copy (Y_in, *Y_rowMap);
4431  }
4432  else {
4433  this->localApply (*X_colMap, Y_in, Teuchos::NO_TRANS, alpha, beta);
4434  }
4435  }
4436 
4437  // If the range Map is a locally replicated Map, sum up
4438  // contributions from each process. We set beta = 0 on all
4439  // processes but Proc 0 initially, so this will handle the scaling
4440  // factor beta correctly.
4441  if (Y_is_replicated) {
4442  ProfilingRegion regionReduce ("Tpetra::CrsMatrix::apply: Reduce Y");
4443  Y_in.reduce ();
4444  }
4445  }
4446 
4447  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4448  void
4452  const Teuchos::ETransp mode,
4453  Scalar alpha,
4454  Scalar beta) const
4455  {
4457  using Teuchos::null;
4458  using Teuchos::RCP;
4459  using Teuchos::rcp;
4460  using Teuchos::rcp_const_cast;
4461  using Teuchos::rcpFromRef;
4462  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4463 
4464  // Take shortcuts for alpha == 0.
4465  if (alpha == ZERO) {
4466  // Follow the Sparse BLAS convention by ignoring both the matrix
4467  // and X_in, in this case.
4468  if (beta == ZERO) {
4469  // Follow the Sparse BLAS convention by overwriting any Inf or
4470  // NaN values in Y_in, in this case.
4471  Y_in.putScalar (ZERO);
4472  }
4473  else {
4474  Y_in.scale (beta);
4475  }
4476  return;
4477  }
4478 
4479  const size_t numVectors = X_in.getNumVectors ();
4480 
4481  // We don't allow X_in and Y_in to alias one another. It's hard
4482  // to check this, because advanced users could create views from
4483  // raw pointers. However, if X_in and Y_in reference the same
4484  // object, we will do the user a favor by copying X into new
4485  // storage (with a warning). We only need to do this if we have
4486  // trivial importers; otherwise, we don't actually apply the
4487  // operator from X into Y.
4488  RCP<const import_type> importer = this->getGraph ()->getImporter ();
4489  RCP<const export_type> exporter = this->getGraph ()->getExporter ();
4490  // access X indirectly, in case we need to create temporary storage
4491  RCP<const MV> X;
4492 
4493  // some parameters for below
4494  const bool Y_is_replicated = ! Y_in.isDistributed ();
4495  const bool Y_is_overwritten = (beta == ZERO);
4496  if (Y_is_replicated && this->getComm ()->getRank () > 0) {
4497  beta = ZERO;
4498  }
4499 
4500  // The kernels do not allow input or output with nonconstant stride.
4501  if (! X_in.isConstantStride () && importer.is_null ()) {
4502  X = rcp (new MV (X_in, Teuchos::Copy)); // Constant-stride copy of X_in
4503  } else {
4504  X = rcpFromRef (X_in); // Reference to X_in
4505  }
4506 
4507  // Set up temporary multivectors for Import and/or Export.
4508  if (importer != Teuchos::null) {
4509  if (importMV_ != Teuchos::null && importMV_->getNumVectors() != numVectors) {
4510  importMV_ = null;
4511  }
4512  if (importMV_ == null) {
4513  importMV_ = rcp (new MV (this->getColMap (), numVectors));
4514  }
4515  }
4516  if (exporter != Teuchos::null) {
4517  if (exportMV_ != Teuchos::null && exportMV_->getNumVectors() != numVectors) {
4518  exportMV_ = null;
4519  }
4520  if (exportMV_ == null) {
4521  exportMV_ = rcp (new MV (this->getRowMap (), numVectors));
4522  }
4523  }
4524 
4525  // If we have a non-trivial exporter, we must import elements that
4526  // are permuted or are on other processors.
4527  if (! exporter.is_null ()) {
4528  ProfilingRegion regionImport ("Tpetra::CrsMatrix::apply (transpose): Import");
4529  exportMV_->doImport (X_in, *exporter, INSERT);
4530  X = exportMV_; // multiply out of exportMV_
4531  }
4532 
4533  // If we have a non-trivial importer, we must export elements that
4534  // are permuted or belong to other processors. We will compute
4535  // solution into the to-be-exported MV; get a view.
4536  if (importer != Teuchos::null) {
4537  ProfilingRegion regionExport ("Tpetra::CrsMatrix::apply (transpose): Export");
4538 
4539  // FIXME (mfh 18 Apr 2015) Temporary fix suggested by Clark
4540  // Dohrmann on Fri 17 Apr 2015. At some point, we need to go
4541  // back and figure out why this helps. importMV_ SHOULD be
4542  // completely overwritten in the localMultiply() call below,
4543  // because beta == ZERO there.
4544  importMV_->putScalar (ZERO);
4545  // Do the local computation.
4546  this->localApply (*X, *importMV_, mode, alpha, ZERO);
4547  if (Y_is_overwritten) {
4548  Y_in.putScalar (ZERO);
4549  } else {
4550  Y_in.scale (beta);
4551  }
4552  Y_in.doExport (*importMV_, *importer, ADD);
4553  }
4554  // otherwise, multiply into Y
4555  else {
4556  // can't multiply in-situ; can't multiply into non-strided multivector
4557  //
4558  // FIXME (mfh 05 Jun 2014) This test for aliasing only tests if
4559  // the user passed in the same MultiVector for both X and Y. It
4560  // won't detect whether one MultiVector views the other. We
4561  // should also check the MultiVectors' raw data pointers.
4562  if (! Y_in.isConstantStride () || X.getRawPtr () == &Y_in) {
4563  // Make a deep copy of Y_in, into which to write the multiply result.
4564  MV Y (Y_in, Teuchos::Copy);
4565  this->localApply (*X, Y, mode, alpha, beta);
4566  Tpetra::deep_copy (Y_in, Y);
4567  } else {
4568  this->localApply (*X, Y_in, mode, alpha, beta);
4569  }
4570  }
4571 
4572  // If the range Map is a locally replicated map, sum the
4573  // contributions from each process. (That's why we set beta=0
4574  // above for all processes but Proc 0.)
4575  if (Y_is_replicated) {
4576  ProfilingRegion regionReduce ("Tpetra::CrsMatrix::apply (transpose): Reduce Y");
4577  Y_in.reduce ();
4578  }
4579  }
4580 
4581  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4582  void
4586  const Teuchos::ETransp mode,
4587  const Scalar& alpha,
4588  const Scalar& beta) const
4589  {
4591  ProfilingRegion regionLocalApply ("Tpetra::CrsMatrix::localApply");
4592  this->template localMultiply<Scalar, Scalar> (X, Y, mode, alpha, beta);
4593  }
4594 
4595  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4596  void
4600  Teuchos::ETransp mode,
4601  Scalar alpha,
4602  Scalar beta) const
4603  {
4605  const char fnName[] = "Tpetra::CrsMatrix::apply";
4606 
4607  TEUCHOS_TEST_FOR_EXCEPTION
4608  (! isFillComplete (), std::runtime_error,
4609  fnName << ": Cannot call apply() until fillComplete() "
4610  "has been called.");
4611 
4612  if (mode == Teuchos::NO_TRANS) {
4613  ProfilingRegion regionNonTranspose (fnName);
4614  this->applyNonTranspose (X, Y, alpha, beta);
4615  }
4616  else {
4617  ProfilingRegion regionTranspose ("Tpetra::CrsMatrix::apply (transpose)");
4618 
4619  //Thyra was implicitly assuming that Y gets set to zero / or is overwritten
4620  //when bets==0. This was not the case with transpose in a multithreaded
4621  //environment where a multiplication with subsequent atomic_adds is used
4622  //since 0 is effectively not special cased. Doing the explicit set to zero here
4623  //This catches cases where Y is nan or inf.
4624  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4625  if (beta == ZERO) {
4626  Y.putScalar (ZERO);
4627  }
4628  this->applyTranspose (X, Y, mode, alpha, beta);
4629  }
4630  }
4631 
4632  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4633  void
4638  const Scalar& dampingFactor,
4639  const ESweepDirection direction,
4640  const int numSweeps) const
4641  {
4642  reorderedGaussSeidel (B, X, D, Teuchos::null, dampingFactor, direction, numSweeps);
4643  }
4644 
4645  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4646  void
4651  const Teuchos::ArrayView<LocalOrdinal>& rowIndices,
4652  const Scalar& dampingFactor,
4653  const ESweepDirection direction,
4654  const int numSweeps) const
4655  {
4656  using Teuchos::null;
4657  using Teuchos::RCP;
4658  using Teuchos::rcp;
4659  using Teuchos::rcp_const_cast;
4660  using Teuchos::rcpFromRef;
4661  typedef Scalar ST;
4662 
4663  TEUCHOS_TEST_FOR_EXCEPTION(
4664  isFillComplete() == false, std::runtime_error,
4665  "Tpetra::CrsMatrix::gaussSeidel: cannot call this method until "
4666  "fillComplete() has been called.");
4667  TEUCHOS_TEST_FOR_EXCEPTION(
4668  numSweeps < 0,
4669  std::invalid_argument,
4670  "Tpetra::CrsMatrix::gaussSeidel: The number of sweeps must be , "
4671  "nonnegative but you provided numSweeps = " << numSweeps << " < 0.");
4672 
4673  // Translate from global to local sweep direction.
4674  // While doing this, validate the input.
4675  KokkosClassic::ESweepDirection localDirection;
4676  if (direction == Forward) {
4677  localDirection = KokkosClassic::Forward;
4678  }
4679  else if (direction == Backward) {
4680  localDirection = KokkosClassic::Backward;
4681  }
4682  else if (direction == Symmetric) {
4683  // We'll control local sweep direction manually.
4684  localDirection = KokkosClassic::Forward;
4685  }
4686  else {
4687  TEUCHOS_TEST_FOR_EXCEPTION(true, std::invalid_argument,
4688  "Tpetra::CrsMatrix::gaussSeidel: The 'direction' enum does not have "
4689  "any of its valid values: Forward, Backward, or Symmetric.");
4690  }
4691 
4692  if (numSweeps == 0) {
4693  return; // Nothing to do.
4694  }
4695 
4696  // We don't need the Export object because this method assumes
4697  // that the row, domain, and range Maps are the same. We do need
4698  // the Import object, if there is one, though.
4699  RCP<const import_type> importer = this->getGraph()->getImporter();
4700  RCP<const export_type> exporter = this->getGraph()->getExporter();
4701  TEUCHOS_TEST_FOR_EXCEPTION(
4702  ! exporter.is_null (), std::runtime_error,
4703  "Tpetra's gaussSeidel implementation requires that the row, domain, "
4704  "and range Maps be the same. This cannot be the case, because the "
4705  "matrix has a nontrivial Export object.");
4706 
4707  RCP<const map_type> domainMap = this->getDomainMap ();
4708  RCP<const map_type> rangeMap = this->getRangeMap ();
4709  RCP<const map_type> rowMap = this->getGraph ()->getRowMap ();
4710  RCP<const map_type> colMap = this->getGraph ()->getColMap ();
4711 
4712 #ifdef HAVE_TEUCHOS_DEBUG
4713  {
4714  // The relation 'isSameAs' is transitive. It's also a
4715  // collective, so we don't have to do a "shared" test for
4716  // exception (i.e., a global reduction on the test value).
4717  TEUCHOS_TEST_FOR_EXCEPTION(
4718  ! X.getMap ()->isSameAs (*domainMap),
4719  std::runtime_error,
4720  "Tpetra::CrsMatrix::gaussSeidel requires that the input "
4721  "multivector X be in the domain Map of the matrix.");
4722  TEUCHOS_TEST_FOR_EXCEPTION(
4723  ! B.getMap ()->isSameAs (*rangeMap),
4724  std::runtime_error,
4725  "Tpetra::CrsMatrix::gaussSeidel requires that the input "
4726  "B be in the range Map of the matrix.");
4727  TEUCHOS_TEST_FOR_EXCEPTION(
4728  ! D.getMap ()->isSameAs (*rowMap),
4729  std::runtime_error,
4730  "Tpetra::CrsMatrix::gaussSeidel requires that the input "
4731  "D be in the row Map of the matrix.");
4732  TEUCHOS_TEST_FOR_EXCEPTION(
4733  ! rowMap->isSameAs (*rangeMap),
4734  std::runtime_error,
4735  "Tpetra::CrsMatrix::gaussSeidel requires that the row Map and the "
4736  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
4737  TEUCHOS_TEST_FOR_EXCEPTION(
4738  ! domainMap->isSameAs (*rangeMap),
4739  std::runtime_error,
4740  "Tpetra::CrsMatrix::gaussSeidel requires that the domain Map and "
4741  "the range Map of the matrix be the same.");
4742  }
4743 #else
4744  // Forestall any compiler warnings for unused variables.
4745  (void) rangeMap;
4746  (void) rowMap;
4747 #endif // HAVE_TEUCHOS_DEBUG
4748 
4749  // If B is not constant stride, copy it into a constant stride
4750  // multivector. We'l handle the right-hand side B first and deal
4751  // with X right before the sweeps, to improve locality of the
4752  // first sweep. (If the problem is small enough, then that will
4753  // hopefully keep more of the entries of X in cache. This
4754  // optimizes for the typical case of a small number of sweeps.)
4755  RCP<const MV> B_in;
4756  if (B.isConstantStride()) {
4757  B_in = rcpFromRef (B);
4758  }
4759  else {
4760  // The range Map and row Map are the same in this case, so we
4761  // can use the (possibly cached) row Map multivector to store a
4762  // constant stride copy of B. We don't have to copy back, since
4763  // Gauss-Seidel won't modify B.
4764  RCP<MV> B_in_nonconst = getRowMapMultiVector (B, true);
4765  deep_copy (*B_in_nonconst, B); // Copy from B into B_in(_nonconst).
4766  B_in = rcp_const_cast<const MV> (B_in_nonconst);
4767 
4769  ! B.isConstantStride (),
4770  std::runtime_error,
4771  "gaussSeidel: The current implementation of the Gauss-Seidel kernel "
4772  "requires that X and B both have constant stride. Since B does not "
4773  "have constant stride, we had to make a copy. This is a limitation of "
4774  "the current implementation and not your fault, but we still report it "
4775  "as an efficiency warning for your information.");
4776  }
4777 
4778  // If X is not constant stride, copy it into a constant stride
4779  // multivector. Also, make the column Map multivector X_colMap,
4780  // and its domain Map view X_domainMap. (X actually must be a
4781  // domain Map view of a column Map multivector; exploit this, if X
4782  // has constant stride.)
4783 
4784  RCP<MV> X_domainMap;
4785  RCP<MV> X_colMap;
4786  bool copiedInput = false;
4787 
4788  if (importer.is_null ()) { // Domain and column Maps are the same.
4789  if (X.isConstantStride ()) {
4790  X_domainMap = rcpFromRef (X);
4791  X_colMap = X_domainMap;
4792  copiedInput = false;
4793  }
4794  else {
4795  // Get a temporary column Map multivector, make a domain Map
4796  // view of it, and copy X into the domain Map view. We have
4797  // to copy here because we won't be doing Import operations.
4798  X_colMap = getColumnMapMultiVector (X, true);
4799  X_domainMap = X_colMap; // Domain and column Maps are the same.
4800  deep_copy (*X_domainMap, X); // Copy X into the domain Map view.
4801  copiedInput = true;
4803  ! X.isConstantStride (), std::runtime_error,
4804  "Tpetra::CrsMatrix::gaussSeidel: The current implementation of the "
4805  "Gauss-Seidel kernel requires that X and B both have constant "
4806  "stride. Since X does not have constant stride, we had to make a "
4807  "copy. This is a limitation of the current implementation and not "
4808  "your fault, but we still report it as an efficiency warning for "
4809  "your information.");
4810  }
4811  }
4812  else { // We will be doing Import operations in the sweeps.
4813  if (X.isConstantStride ()) {
4814  X_domainMap = rcpFromRef (X);
4815  // This kernel assumes that X is a domain Map view of a column
4816  // Map multivector. We will only check if this is valid if
4817  // the CMake configure Teuchos_ENABLE_DEBUG is ON.
4818  X_colMap = X_domainMap->offsetViewNonConst (colMap, 0);
4819 
4820  // FIXME (mfh 19 Mar 2013) Do we need to fill the remote
4821  // entries of X_colMap with zeros? Do we need to fill all of
4822  // X_domainMap initially with zeros? Ifpack
4823  // (Ifpack_PointRelaxation.cpp, line 906) creates an entirely
4824  // new MultiVector each time.
4825 
4826  // Do the first Import for the first sweep. This simplifies
4827  // the logic in the sweeps.
4828  X_colMap->doImport (X, *importer, INSERT);
4829  copiedInput = false;
4830  }
4831  else {
4832  // Get a temporary column Map multivector X_colMap, and make a
4833  // domain Map view X_domainMap of it. Instead of copying, we
4834  // do an Import from X into X_domainMap. This saves us a
4835  // copy, since the Import has to copy the data anyway.
4836  X_colMap = getColumnMapMultiVector (X, true);
4837  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
4838  X_colMap->doImport (X, *importer, INSERT);
4839  copiedInput = true;
4841  ! X.isConstantStride (), std::runtime_error,
4842  "Tpetra::CrsMatrix::gaussSeidel: The current implementation of the "
4843  "Gauss-Seidel kernel requires that X and B both have constant stride. "
4844  "Since X does not have constant stride, we had to make a copy. "
4845  "This is a limitation of the current implementation and not your fault, "
4846  "but we still report it as an efficiency warning for your information.");
4847  }
4848  }
4849 
4850  for (int sweep = 0; sweep < numSweeps; ++sweep) {
4851  if (! importer.is_null () && sweep > 0) {
4852  // We already did the first Import for the zeroth sweep.
4853  X_colMap->doImport (*X_domainMap, *importer, INSERT);
4854  }
4855 
4856  // Do local Gauss-Seidel.
4857  if (direction != Symmetric) {
4858  if (rowIndices.is_null ()) {
4859  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
4860  dampingFactor,
4861  localDirection);
4862  }
4863  else {
4864  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
4865  D, rowIndices,
4866  dampingFactor,
4867  localDirection);
4868  }
4869  }
4870  else { // direction == Symmetric
4871  const bool doImportBetweenDirections = false;
4872  if (rowIndices.is_null ()) {
4873  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
4874  dampingFactor,
4875  KokkosClassic::Forward);
4876  // mfh 18 Mar 2013: Aztec's implementation of "symmetric
4877  // Gauss-Seidel" does _not_ do an Import between the forward
4878  // and backward sweeps. This makes sense, because Aztec
4879  // considers "symmetric Gauss-Seidel" a subdomain solver.
4880  if (doImportBetweenDirections) {
4881  // Communicate again before the Backward sweep.
4882  if (! importer.is_null ()) {
4883  X_colMap->doImport (*X_domainMap, *importer, INSERT);
4884  }
4885  }
4886  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
4887  dampingFactor,
4888  KokkosClassic::Backward);
4889  }
4890  else {
4891  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
4892  D, rowIndices,
4893  dampingFactor,
4894  KokkosClassic::Forward);
4895  if (doImportBetweenDirections) {
4896  // Communicate again before the Backward sweep.
4897  if (! importer.is_null ()) {
4898  X_colMap->doImport (*X_domainMap, *importer, INSERT);
4899  }
4900  }
4901  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
4902  D, rowIndices,
4903  dampingFactor,
4904  KokkosClassic::Backward);
4905  }
4906  }
4907  }
4908 
4909  if (copiedInput) {
4910  deep_copy (X, *X_domainMap); // Copy back from X_domainMap to X.
4911  }
4912  }
4913 
4914  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4915  void
4920  const Scalar& dampingFactor,
4921  const ESweepDirection direction,
4922  const int numSweeps,
4923  const bool zeroInitialGuess) const
4924  {
4925  reorderedGaussSeidelCopy (X, B, D, Teuchos::null, dampingFactor, direction,
4926  numSweeps, zeroInitialGuess);
4927  }
4928 
4929  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
4930  void
4935  const Teuchos::ArrayView<LocalOrdinal>& rowIndices,
4936  const Scalar& dampingFactor,
4937  const ESweepDirection direction,
4938  const int numSweeps,
4939  const bool zeroInitialGuess) const
4940  {
4941  using Teuchos::null;
4942  using Teuchos::RCP;
4943  using Teuchos::rcp;
4944  using Teuchos::rcpFromRef;
4945  using Teuchos::rcp_const_cast;
4946  typedef Scalar ST;
4947  const char prefix[] = "Tpetra::CrsMatrix::(reordered)gaussSeidelCopy: ";
4948  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4949 
4950  TEUCHOS_TEST_FOR_EXCEPTION(
4951  ! isFillComplete (), std::runtime_error,
4952  prefix << "The matrix is not fill complete.");
4953  TEUCHOS_TEST_FOR_EXCEPTION(
4954  numSweeps < 0, std::invalid_argument,
4955  prefix << "The number of sweeps must be nonnegative, "
4956  "but you provided numSweeps = " << numSweeps << " < 0.");
4957 
4958  // Translate from global to local sweep direction.
4959  // While doing this, validate the input.
4960  KokkosClassic::ESweepDirection localDirection;
4961  if (direction == Forward) {
4962  localDirection = KokkosClassic::Forward;
4963  }
4964  else if (direction == Backward) {
4965  localDirection = KokkosClassic::Backward;
4966  }
4967  else if (direction == Symmetric) {
4968  // We'll control local sweep direction manually.
4969  localDirection = KokkosClassic::Forward;
4970  }
4971  else {
4972  TEUCHOS_TEST_FOR_EXCEPTION(
4973  true, std::invalid_argument,
4974  prefix << "The 'direction' enum does not have any of its valid "
4975  "values: Forward, Backward, or Symmetric.");
4976  }
4977 
4978  if (numSweeps == 0) {
4979  return;
4980  }
4981 
4982  RCP<const import_type> importer = this->getGraph ()->getImporter ();
4983  RCP<const export_type> exporter = this->getGraph ()->getExporter ();
4984  TEUCHOS_TEST_FOR_EXCEPTION(
4985  ! exporter.is_null (), std::runtime_error,
4986  "This method's implementation currently requires that the matrix's row, "
4987  "domain, and range Maps be the same. This cannot be the case, because "
4988  "the matrix has a nontrivial Export object.");
4989 
4990  RCP<const map_type> domainMap = this->getDomainMap ();
4991  RCP<const map_type> rangeMap = this->getRangeMap ();
4992  RCP<const map_type> rowMap = this->getGraph ()->getRowMap ();
4993  RCP<const map_type> colMap = this->getGraph ()->getColMap ();
4994 
4995 #ifdef HAVE_TEUCHOS_DEBUG
4996  {
4997  // The relation 'isSameAs' is transitive. It's also a
4998  // collective, so we don't have to do a "shared" test for
4999  // exception (i.e., a global reduction on the test value).
5000  TEUCHOS_TEST_FOR_EXCEPTION(
5001  ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
5002  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
5003  "multivector X be in the domain Map of the matrix.");
5004  TEUCHOS_TEST_FOR_EXCEPTION(
5005  ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
5006  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
5007  "B be in the range Map of the matrix.");
5008  TEUCHOS_TEST_FOR_EXCEPTION(
5009  ! D.getMap ()->isSameAs (*rowMap), std::runtime_error,
5010  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
5011  "D be in the row Map of the matrix.");
5012  TEUCHOS_TEST_FOR_EXCEPTION(
5013  ! rowMap->isSameAs (*rangeMap), std::runtime_error,
5014  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the row Map and the "
5015  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
5016  TEUCHOS_TEST_FOR_EXCEPTION(
5017  ! domainMap->isSameAs (*rangeMap), std::runtime_error,
5018  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the domain Map and "
5019  "the range Map of the matrix be the same.");
5020  }
5021 #else
5022  // Forestall any compiler warnings for unused variables.
5023  (void) rangeMap;
5024  (void) rowMap;
5025 #endif // HAVE_TEUCHOS_DEBUG
5026 
5027  // Fetch a (possibly cached) temporary column Map multivector
5028  // X_colMap, and a domain Map view X_domainMap of it. Both have
5029  // constant stride by construction. We know that the domain Map
5030  // must include the column Map, because our Gauss-Seidel kernel
5031  // requires that the row Map, domain Map, and range Map are all
5032  // the same, and that each process owns all of its own diagonal
5033  // entries of the matrix.
5034 
5035  RCP<MV> X_colMap;
5036  RCP<MV> X_domainMap;
5037  bool copyBackOutput = false;
5038  if (importer.is_null ()) {
5039  if (X.isConstantStride ()) {
5040  X_colMap = rcpFromRef (X);
5041  X_domainMap = rcpFromRef (X);
5042  // Column Map and domain Map are the same, so there are no
5043  // remote entries. Thus, if we are not setting the initial
5044  // guess to zero, we don't have to worry about setting remote
5045  // entries to zero, even though we are not doing an Import in
5046  // this case.
5047  if (zeroInitialGuess) {
5048  X_colMap->putScalar (ZERO);
5049  }
5050  // No need to copy back to X at end.
5051  }
5052  else { // We must copy X into a constant stride multivector.
5053  // Just use the cached column Map multivector for that.
5054  // force=true means fill with zeros, so no need to fill
5055  // remote entries (not in domain Map) with zeros.
5056  X_colMap = getColumnMapMultiVector (X, true);
5057  // X_domainMap is always a domain Map view of the column Map
5058  // multivector. In this case, the domain and column Maps are
5059  // the same, so X_domainMap _is_ X_colMap.
5060  X_domainMap = X_colMap;
5061  if (! zeroInitialGuess) { // Don't copy if zero initial guess
5062  try {
5063  deep_copy (*X_domainMap , X); // Copy X into constant stride MV
5064  } catch (std::exception& e) {
5065  std::ostringstream os;
5066  os << "Tpetra::CrsMatrix::reorderedGaussSeidelCopy: "
5067  "deep_copy(*X_domainMap, X) threw an exception: "
5068  << e.what () << ".";
5069  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
5070  }
5071  }
5072  copyBackOutput = true; // Don't forget to copy back at end.
5074  ! X.isConstantStride (),
5075  std::runtime_error,
5076  "gaussSeidelCopy: The current implementation of the Gauss-Seidel "
5077  "kernel requires that X and B both have constant stride. Since X "
5078  "does not have constant stride, we had to make a copy. This is a "
5079  "limitation of the current implementation and not your fault, but we "
5080  "still report it as an efficiency warning for your information.");
5081  }
5082  }
5083  else { // Column Map and domain Map are _not_ the same.
5084  X_colMap = getColumnMapMultiVector (X);
5085  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
5086 
5087 #ifdef HAVE_TPETRA_DEBUG
5088  auto X_colMap_host_view =
5089  X_colMap->template getLocalView<Kokkos::HostSpace> ();
5090  auto X_domainMap_host_view =
5091  X_domainMap->template getLocalView<Kokkos::HostSpace> ();
5092 
5093  if (X_colMap->getLocalLength () != 0 && X_domainMap->getLocalLength ()) {
5094  TEUCHOS_TEST_FOR_EXCEPTION
5095  (X_colMap_host_view.ptr_on_device () != X_domainMap_host_view.ptr_on_device (),
5096  std::logic_error, "Tpetra::CrsMatrix::gaussSeidelCopy: Pointer to "
5097  "start of column Map view of X is not equal to pointer to start of "
5098  "(domain Map view of) X. This may mean that Tpetra::MultiVector::"
5099  "offsetViewNonConst is broken. "
5100  "Please report this bug to the Tpetra developers.");
5101  }
5102 
5103  TEUCHOS_TEST_FOR_EXCEPTION(
5104  X_colMap_host_view.dimension_0 () < X_domainMap_host_view.dimension_0 () ||
5105  X_colMap->getLocalLength () < X_domainMap->getLocalLength (),
5106  std::logic_error, "Tpetra::CrsMatrix::gaussSeidelCopy: "
5107  "X_colMap has fewer local rows than X_domainMap. "
5108  "X_colMap_host_view.dimension_0() = " << X_colMap_host_view.dimension_0 ()
5109  << ", X_domainMap_host_view.dimension_0() = "
5110  << X_domainMap_host_view.dimension_0 ()
5111  << ", X_colMap->getLocalLength() = " << X_colMap->getLocalLength ()
5112  << ", and X_domainMap->getLocalLength() = "
5113  << X_domainMap->getLocalLength ()
5114  << ". This means that Tpetra::MultiVector::offsetViewNonConst "
5115  "is broken. Please report this bug to the Tpetra developers.");
5116 
5117  TEUCHOS_TEST_FOR_EXCEPTION(
5118  X_colMap->getNumVectors () != X_domainMap->getNumVectors (),
5119  std::logic_error, "Tpetra::CrsMatrix::gaussSeidelCopy: "
5120  "X_colMap has a different number of columns than X_domainMap. "
5121  "X_colMap->getNumVectors() = " << X_colMap->getNumVectors ()
5122  << " != X_domainMap->getNumVectors() = "
5123  << X_domainMap->getNumVectors ()
5124  << ". This means that Tpetra::MultiVector::offsetViewNonConst "
5125  "is broken. Please report this bug to the Tpetra developers.");
5126 #endif // HAVE_TPETRA_DEBUG
5127 
5128  if (zeroInitialGuess) {
5129  // No need for an Import, since we're filling with zeros.
5130  X_colMap->putScalar (ZERO);
5131  } else {
5132  // We could just copy X into X_domainMap. However, that
5133  // wastes a copy, because the Import also does a copy (plus
5134  // communication). Since the typical use case for
5135  // Gauss-Seidel is a small number of sweeps (2 is typical), we
5136  // don't want to waste that copy. Thus, we do the Import
5137  // here, and skip the first Import in the first sweep.
5138  // Importing directly from X effects the copy into X_domainMap
5139  // (which is a view of X_colMap).
5140  X_colMap->doImport (X, *importer, INSERT);
5141  }
5142  copyBackOutput = true; // Don't forget to copy back at end.
5143  } // if column and domain Maps are (not) the same
5144 
5145  // The Gauss-Seidel / SOR kernel expects multivectors of constant
5146  // stride. X_colMap is by construction, but B might not be. If
5147  // it's not, we have to make a copy.
5148  RCP<const MV> B_in;
5149  if (B.isConstantStride ()) {
5150  B_in = rcpFromRef (B);
5151  }
5152  else {
5153  // Range Map and row Map are the same in this case, so we can
5154  // use the cached row Map multivector to store a constant stride
5155  // copy of B.
5156  RCP<MV> B_in_nonconst = getRowMapMultiVector (B, true);
5157  try {
5158  deep_copy (*B_in_nonconst, B);
5159  } catch (std::exception& e) {
5160  std::ostringstream os;
5161  os << "Tpetra::CrsMatrix::reorderedGaussSeidelCopy: "
5162  "deep_copy(*B_in_nonconst, B) threw an exception: "
5163  << e.what () << ".";
5164  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
5165  }
5166  B_in = rcp_const_cast<const MV> (B_in_nonconst);
5167 
5169  ! B.isConstantStride (),
5170  std::runtime_error,
5171  "gaussSeidelCopy: The current implementation requires that B have "
5172  "constant stride. Since B does not have constant stride, we had to "
5173  "copy it into a separate constant-stride multivector. This is a "
5174  "limitation of the current implementation and not your fault, but we "
5175  "still report it as an efficiency warning for your information.");
5176  }
5177 
5178  for (int sweep = 0; sweep < numSweeps; ++sweep) {
5179  if (! importer.is_null () && sweep > 0) {
5180  // We already did the first Import for the zeroth sweep above,
5181  // if it was necessary.
5182  X_colMap->doImport (*X_domainMap, *importer, INSERT);
5183  }
5184 
5185  // Do local Gauss-Seidel.
5186  if (direction != Symmetric) {
5187  if (rowIndices.is_null ()) {
5188  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
5189  dampingFactor,
5190  localDirection);
5191  }
5192  else {
5193  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
5194  D, rowIndices,
5195  dampingFactor,
5196  localDirection);
5197  }
5198  }
5199  else { // direction == Symmetric
5200  if (rowIndices.is_null ()) {
5201  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
5202  dampingFactor,
5203  KokkosClassic::Forward);
5204  // mfh 18 Mar 2013: Aztec's implementation of "symmetric
5205  // Gauss-Seidel" does _not_ do an Import between the forward
5206  // and backward sweeps. This makes symmetric Gauss-Seidel a
5207  // symmetric preconditioner if the matrix A is symmetric. We
5208  // imitate Aztec's behavior here.
5209  this->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
5210  dampingFactor,
5211  KokkosClassic::Backward);
5212  }
5213  else {
5214  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
5215  D, rowIndices,
5216  dampingFactor,
5217  KokkosClassic::Forward);
5218  this->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
5219  D, rowIndices,
5220  dampingFactor,
5221  KokkosClassic::Backward);
5222 
5223  }
5224  }
5225  }
5226 
5227  if (copyBackOutput) {
5228  try {
5229  deep_copy (X , *X_domainMap); // Copy result back into X.
5230  } catch (std::exception& e) {
5231  TEUCHOS_TEST_FOR_EXCEPTION(
5232  true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
5233  "threw an exception: " << e.what ());
5234  }
5235  }
5236  }
5237 
5238  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5239  template<class T>
5240  Teuchos::RCP<CrsMatrix<T, LocalOrdinal, GlobalOrdinal, Node, classic> >
5242  convert () const
5243  {
5244  using Teuchos::RCP;
5245  typedef CrsMatrix<T, LocalOrdinal, GlobalOrdinal, Node,
5246  classic> output_matrix_type;
5247  const char tfecfFuncName[] = "convert: ";
5248 
5249  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5250  (! this->isFillComplete (), std::runtime_error, "This matrix (the source "
5251  "of the conversion) is not fill complete. You must first call "
5252  "fillComplete() (possibly with the domain and range Map) without an "
5253  "intervening call to resumeFill(), before you may call this method.");
5254  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5255  (! this->isStaticGraph (), std::logic_error, "This matrix (the source "
5256  "of the conversion) claims to be fill complete, but does not have a "
5257  "static (i.e., constant) graph. Please report this bug to the Tpetra "
5258  "developers.");
5259 
5260  RCP<output_matrix_type> newMatrix
5261  (new output_matrix_type (this->getCrsGraph ()));
5262  // Copy old values into new values. impl_scalar_type and T may
5263  // differ, so we can't use Kokkos::deep_copy.
5264  ::Tpetra::Details::copyConvert (newMatrix->lclMatrix_.values,
5265  this->lclMatrix_.values);
5266  // Since newmat has a static (const) graph, the graph already has
5267  // a column Map, and Import and Export objects already exist (if
5268  // applicable). Thus, calling fillComplete is cheap.
5269  newMatrix->fillComplete (this->getDomainMap (), this->getRangeMap ());
5270 
5271  return newMatrix;
5272  }
5273 
5274 
5275  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5276  void
5279  {
5280 #ifdef HAVE_TPETRA_DEBUG
5281  const char tfecfFuncName[] = "checkInternalState: ";
5282  const char err[] = "Internal state is not consistent. "
5283  "Please report this bug to the Tpetra developers.";
5284 
5285  // This version of the graph (RCP<const crs_graph_type>) must
5286  // always be nonnull.
5287  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5288  staticGraph_.is_null (),
5289  std::logic_error, err);
5290  // myGraph == null means that the matrix has a const ("static")
5291  // graph. Otherwise, the matrix has a dynamic graph (it owns its
5292  // graph).
5293  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5294  ! myGraph_.is_null () && myGraph_ != staticGraph_,
5295  std::logic_error, err);
5296  // if matrix is fill complete, then graph must be fill complete
5297  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5298  isFillComplete () && ! staticGraph_->isFillComplete (),
5299  std::logic_error, err << " Specifically, the matrix is fill complete, "
5300  "but its graph is NOT fill complete.");
5301  // if matrix is storage optimized, it should have a 1D allocation
5302  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5303  isStorageOptimized () && ! values2D_.is_null (),
5304  std::logic_error, err);
5305  // if matrix/graph are static profile, then 2D allocation should not be present
5306  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5307  getProfileType() == StaticProfile && values2D_ != Teuchos::null,
5308  std::logic_error, err);
5309  // if matrix/graph are dynamic profile, then 1D allocation should not be present
5310  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5311  getProfileType() == DynamicProfile && k_values1D_.dimension_0 () > 0,
5312  std::logic_error, err);
5313  // if values are allocated and they are non-zero in number, then
5314  // one of the allocations should be present
5315  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5316  staticGraph_->indicesAreAllocated () &&
5317  staticGraph_->getNodeAllocationSize() > 0 &&
5318  staticGraph_->getNodeNumRows() > 0
5319  && values2D_.is_null () &&
5320  k_values1D_.dimension_0 () == 0,
5321  std::logic_error, err);
5322  // we cannot have both a 1D and 2D allocation
5323  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5324  k_values1D_.dimension_0 () > 0 && values2D_ != Teuchos::null,
5325  std::logic_error, err << " Specifically, k_values1D_ is allocated (has "
5326  "size " << k_values1D_.dimension_0 () << " > 0) and values2D_ is also "
5327  "allocated. CrsMatrix is not suppose to have both a 1-D and a 2-D "
5328  "allocation at the same time.");
5329 #endif
5330  }
5331 
5332  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5333  std::string
5336  {
5337  std::ostringstream os;
5338 
5339  os << "Tpetra::CrsMatrix (Kokkos refactor): {";
5340  if (this->getObjectLabel () != "") {
5341  os << "Label: \"" << this->getObjectLabel () << "\", ";
5342  }
5343  if (isFillComplete ()) {
5344  os << "isFillComplete: true"
5345  << ", global dimensions: [" << getGlobalNumRows () << ", "
5346  << getGlobalNumCols () << "]"
5347  << ", global number of entries: " << getGlobalNumEntries ()
5348  << "}";
5349  }
5350  else {
5351  os << "isFillComplete: false"
5352  << ", global dimensions: [" << getGlobalNumRows () << ", "
5353  << getGlobalNumCols () << "]}";
5354  }
5355  return os.str ();
5356  }
5357 
5358  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5359  void
5361  describe (Teuchos::FancyOStream &out,
5362  const Teuchos::EVerbosityLevel verbLevel) const
5363  {
5364  using std::endl;
5365  using std::setw;
5366  using Teuchos::ArrayView;
5367  using Teuchos::Comm;
5368  using Teuchos::RCP;
5369  using Teuchos::TypeNameTraits;
5370  using Teuchos::VERB_DEFAULT;
5371  using Teuchos::VERB_NONE;
5372  using Teuchos::VERB_LOW;
5373  using Teuchos::VERB_MEDIUM;
5374  using Teuchos::VERB_HIGH;
5375  using Teuchos::VERB_EXTREME;
5376 
5377  const Teuchos::EVerbosityLevel vl = (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
5378 
5379  if (vl == VERB_NONE) {
5380  return; // Don't print anything at all
5381  }
5382  // By convention, describe() always begins with a tab.
5383  Teuchos::OSTab tab0 (out);
5384 
5385  RCP<const Comm<int> > comm = this->getComm();
5386  const int myRank = comm->getRank();
5387  const int numProcs = comm->getSize();
5388  size_t width = 1;
5389  for (size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
5390  ++width;
5391  }
5392  width = std::max<size_t> (width, static_cast<size_t> (11)) + 2;
5393 
5394  // none: print nothing
5395  // low: print O(1) info from node 0
5396  // medium: print O(P) info, num entries per process
5397  // high: print O(N) info, num entries per row
5398  // extreme: print O(NNZ) info: print indices and values
5399  //
5400  // for medium and higher, print constituent objects at specified verbLevel
5401  if (myRank == 0) {
5402  out << "Tpetra::CrsMatrix (Kokkos refactor):" << endl;
5403  }
5404  Teuchos::OSTab tab1 (out);
5405 
5406  if (myRank == 0) {
5407  if (this->getObjectLabel () != "") {
5408  out << "Label: \"" << this->getObjectLabel () << "\", ";
5409  }
5410  {
5411  out << "Template parameters:" << endl;
5412  Teuchos::OSTab tab2 (out);
5413  out << "Scalar: " << TypeNameTraits<Scalar>::name () << endl
5414  << "LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
5415  << "GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
5416  << "Node: " << TypeNameTraits<Node>::name () << endl;
5417  }
5418  if (isFillComplete()) {
5419  out << "isFillComplete: true" << endl
5420  << "Global dimensions: [" << getGlobalNumRows () << ", "
5421  << getGlobalNumCols () << "]" << endl
5422  << "Global number of entries: " << getGlobalNumEntries () << endl
5423  << "Global number of diagonal entries: " << getGlobalNumDiags ()
5424  << endl << "Global max number of entries in a row: "
5425  << getGlobalMaxNumRowEntries () << endl;
5426  }
5427  else {
5428  out << "isFillComplete: false" << endl
5429  << "Global dimensions: [" << getGlobalNumRows () << ", "
5430  << getGlobalNumCols () << "]" << endl;
5431  }
5432  }
5433 
5434  if (vl < VERB_MEDIUM) {
5435  return; // all done!
5436  }
5437 
5438  // Describe the row Map.
5439  if (myRank == 0) {
5440  out << endl << "Row Map:" << endl;
5441  }
5442  if (getRowMap ().is_null ()) {
5443  if (myRank == 0) {
5444  out << "null" << endl;
5445  }
5446  }
5447  else {
5448  if (myRank == 0) {
5449  out << endl;
5450  }
5451  getRowMap ()->describe (out, vl);
5452  }
5453 
5454  // Describe the column Map.
5455  if (myRank == 0) {
5456  out << "Column Map: ";
5457  }
5458  if (getColMap ().is_null ()) {
5459  if (myRank == 0) {
5460  out << "null" << endl;
5461  }
5462  } else if (getColMap () == getRowMap ()) {
5463  if (myRank == 0) {
5464  out << "same as row Map" << endl;
5465  }
5466  } else {
5467  if (myRank == 0) {
5468  out << endl;
5469  }
5470  getColMap ()->describe (out, vl);
5471  }
5472 
5473  // Describe the domain Map.
5474  if (myRank == 0) {
5475  out << "Domain Map: ";
5476  }
5477  if (getDomainMap ().is_null ()) {
5478  if (myRank == 0) {
5479  out << "null" << endl;
5480  }
5481  } else if (getDomainMap () == getRowMap ()) {
5482  if (myRank == 0) {
5483  out << "same as row Map" << endl;
5484  }
5485  } else if (getDomainMap () == getColMap ()) {
5486  if (myRank == 0) {
5487  out << "same as column Map" << endl;
5488  }
5489  } else {
5490  if (myRank == 0) {
5491  out << endl;
5492  }
5493  getDomainMap ()->describe (out, vl);
5494  }
5495 
5496  // Describe the range Map.
5497  if (myRank == 0) {
5498  out << "Range Map: ";
5499  }
5500  if (getRangeMap ().is_null ()) {
5501  if (myRank == 0) {
5502  out << "null" << endl;
5503  }
5504  } else if (getRangeMap () == getDomainMap ()) {
5505  if (myRank == 0) {
5506  out << "same as domain Map" << endl;
5507  }
5508  } else if (getRangeMap () == getRowMap ()) {
5509  if (myRank == 0) {
5510  out << "same as row Map" << endl;
5511  }
5512  } else {
5513  if (myRank == 0) {
5514  out << endl;
5515  }
5516  getRangeMap ()->describe (out, vl);
5517  }
5518 
5519  // O(P) data
5520  for (int curRank = 0; curRank < numProcs; ++curRank) {
5521  if (myRank == curRank) {
5522  out << "Process rank: " << curRank << endl;
5523  Teuchos::OSTab tab2 (out);
5524  if (! staticGraph_->indicesAreAllocated ()) {
5525  out << "Graph indices not allocated" << endl;
5526  }
5527  else {
5528  out << "Number of allocated entries: "
5529  << staticGraph_->getNodeAllocationSize () << endl;
5530  }
5531  out << "Number of entries: " << getNodeNumEntries () << endl;
5532  if (isFillComplete ()) {
5533  out << "Number of diagonal entries: " << getNodeNumDiags () << endl;
5534  }
5535  out << "Max number of entries per row: " << getNodeMaxNumRowEntries ()
5536  << endl;
5537  }
5538  // Give output time to complete by executing some barriers.
5539  comm->barrier ();
5540  comm->barrier ();
5541  comm->barrier ();
5542  }
5543 
5544  if (vl < VERB_HIGH) {
5545  return; // all done!
5546  }
5547 
5548  // O(N) and O(NNZ) data
5549  for (int curRank = 0; curRank < numProcs; ++curRank) {
5550  if (myRank == curRank) {
5551  out << std::setw(width) << "Proc Rank"
5552  << std::setw(width) << "Global Row"
5553  << std::setw(width) << "Num Entries";
5554  if (vl == VERB_EXTREME) {
5555  out << std::setw(width) << "(Index,Value)";
5556  }
5557  out << endl;
5558  for (size_t r = 0; r < getNodeNumRows (); ++r) {
5559  const size_t nE = getNumEntriesInLocalRow(r);
5560  GlobalOrdinal gid = getRowMap()->getGlobalElement(r);
5561  out << std::setw(width) << myRank
5562  << std::setw(width) << gid
5563  << std::setw(width) << nE;
5564  if (vl == VERB_EXTREME) {
5565  if (isGloballyIndexed()) {
5566  ArrayView<const GlobalOrdinal> rowinds;
5567  ArrayView<const Scalar> rowvals;
5568  getGlobalRowView (gid, rowinds, rowvals);
5569  for (size_t j = 0; j < nE; ++j) {
5570  out << " (" << rowinds[j]
5571  << ", " << rowvals[j]
5572  << ") ";
5573  }
5574  }
5575  else if (isLocallyIndexed()) {
5576  ArrayView<const LocalOrdinal> rowinds;
5577  ArrayView<const Scalar> rowvals;
5578  getLocalRowView (r, rowinds, rowvals);
5579  for (size_t j=0; j < nE; ++j) {
5580  out << " (" << getColMap()->getGlobalElement(rowinds[j])
5581  << ", " << rowvals[j]
5582  << ") ";
5583  }
5584  } // globally or locally indexed
5585  } // vl == VERB_EXTREME
5586  out << endl;
5587  } // for each row r on this process
5588  } // if (myRank == curRank)
5589 
5590  // Give output time to complete
5591  comm->barrier ();
5592  comm->barrier ();
5593  comm->barrier ();
5594  } // for each process p
5595  }
5596 
5597  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5598  bool
5601  {
5602  // It's not clear what kind of compatibility checks on sizes can
5603  // be performed here. Epetra_CrsGraph doesn't check any sizes for
5604  // compatibility.
5605 
5606  // Currently, the source object must be a RowMatrix with the same
5607  // four template parameters as the target CrsMatrix. We might
5608  // relax this requirement later.
5610  const row_matrix_type* srcRowMat =
5611  dynamic_cast<const row_matrix_type*> (&source);
5612  return (srcRowMat != NULL);
5613  }
5614 
5615  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5616  void
5619  size_t numSameIDs,
5620  const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
5621  const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs)
5622  {
5624  using Teuchos::Array;
5625  using Teuchos::ArrayView;
5626  typedef LocalOrdinal LO;
5627  typedef GlobalOrdinal GO;
5628  typedef node_type NT;
5629  // Method name string for TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC.
5630  const char tfecfFuncName[] = "copyAndPermute: ";
5631  ProfilingRegion regionCAP ("Tpetra::CrsMatrix::copyAndPermute");
5632 
5633  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5634  (permuteToLIDs.size () != permuteFromLIDs.size (),
5635  std::invalid_argument, "permuteToLIDs.size() = " << permuteToLIDs.size ()
5636  << "!= permuteFromLIDs.size() = " << permuteFromLIDs.size () << ".");
5637 
5638  // This dynamic cast should succeed, because we've already tested
5639  // it in checkSizes().
5640  typedef RowMatrix<Scalar, LO, GO, NT> row_matrix_type;
5641  const row_matrix_type& srcMat = dynamic_cast<const row_matrix_type&> (source);
5642 
5643  const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
5644  //
5645  // Copy the first numSame row from source to target (this matrix).
5646  // This involves copying rows corresponding to LIDs [0, numSame-1].
5647  //
5648  const map_type& srcRowMap = * (srcMat.getRowMap ());
5649  Array<GO> rowInds;
5650  Array<Scalar> rowVals;
5651  const LO numSameIDs_as_LID = static_cast<LO> (numSameIDs);
5652  for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
5653  // Global ID for the current row index in the source matrix.
5654  // The first numSameIDs GIDs in the two input lists are the
5655  // same, so sourceGID == targetGID in this case.
5656  const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
5657  const GO targetGID = sourceGID;
5658 
5659  // Input views for the combineGlobalValues() call below.
5660  ArrayView<const GO> rowIndsConstView;
5661  ArrayView<const Scalar> rowValsConstView;
5662 
5663  if (sourceIsLocallyIndexed) {
5664  const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5665  if (rowLength > static_cast<size_t> (rowInds.size())) {
5666  rowInds.resize (rowLength);
5667  rowVals.resize (rowLength);
5668  }
5669  // Resizing invalidates an Array's views, so we must make new
5670  // ones, even if rowLength hasn't changed.
5671  ArrayView<GO> rowIndsView = rowInds.view (0, rowLength);
5672  ArrayView<Scalar> rowValsView = rowVals.view (0, rowLength);
5673 
5674  // The source matrix is locally indexed, so we have to get a
5675  // copy. Really it's the GIDs that have to be copied (because
5676  // they have to be converted from LIDs).
5677  size_t checkRowLength = 0;
5678  srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView, checkRowLength);
5679 
5680 #ifdef HAVE_TPETRA_DEBUG
5681  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowLength != checkRowLength,
5682  std::logic_error, "For global row index " << sourceGID << ", the source"
5683  " matrix's getNumEntriesInGlobalRow() method returns a row length of "
5684  << rowLength << ", but the getGlobalRowCopy() method reports that "
5685  "the row length is " << checkRowLength << ". Please report this bug "
5686  "to the Tpetra developers.");
5687 #endif // HAVE_TPETRA_DEBUG
5688 
5689  rowIndsConstView = rowIndsView.view (0, rowLength);
5690  rowValsConstView = rowValsView.view (0, rowLength);
5691  }
5692  else { // source matrix is globally indexed.
5693  srcMat.getGlobalRowView (sourceGID, rowIndsConstView, rowValsConstView);
5694  }
5695 
5696  // Combine the data into the target matrix.
5697  if (isStaticGraph()) {
5698  // Applying a permutation to a matrix with a static graph
5699  // means REPLACE-ing entries.
5700  combineGlobalValues (targetGID, rowIndsConstView, rowValsConstView, REPLACE);
5701  }
5702  else {
5703  // Applying a permutation to a matrix with a dynamic graph
5704  // means INSERT-ing entries. This has the same effect as
5705  // ADD, if the target graph already has an entry there.
5706  combineGlobalValues (targetGID, rowIndsConstView, rowValsConstView, INSERT);
5707  }
5708  } // For each of the consecutive source and target IDs that are the same
5709 
5710  //
5711  // Permute the remaining rows.
5712  //
5713  const map_type& tgtRowMap = * (this->getRowMap ());
5714  const size_t numPermuteToLIDs = static_cast<size_t> (permuteToLIDs.size ());
5715  for (size_t p = 0; p < numPermuteToLIDs; ++p) {
5716  const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
5717  const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
5718 
5719  // Input views for the combineGlobalValues() call below.
5720  ArrayView<const GO> rowIndsConstView;
5721  ArrayView<const Scalar> rowValsConstView;
5722 
5723  if (sourceIsLocallyIndexed) {
5724  const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5725  if (rowLength > static_cast<size_t> (rowInds.size ())) {
5726  rowInds.resize (rowLength);
5727  rowVals.resize (rowLength);
5728  }
5729  // Resizing invalidates an Array's views, so we must make new
5730  // ones, even if rowLength hasn't changed.
5731  ArrayView<GO> rowIndsView = rowInds.view (0, rowLength);
5732  ArrayView<Scalar> rowValsView = rowVals.view (0, rowLength);
5733 
5734  // The source matrix is locally indexed, so we have to get a
5735  // copy. Really it's the GIDs that have to be copied (because
5736  // they have to be converted from LIDs).
5737  size_t checkRowLength = 0;
5738  srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView, checkRowLength);
5739 
5740 #ifdef HAVE_TPETRA_DEBUG
5741  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(rowLength != checkRowLength,
5742  std::logic_error, "For the source matrix's global row index "
5743  << sourceGID << ", the source matrix's getNumEntriesInGlobalRow() "
5744  "method returns a row length of " << rowLength << ", but the "
5745  "getGlobalRowCopy() method reports that the row length is "
5746  << checkRowLength << ". Please report this bug to the Tpetra "
5747  "developers.");
5748 #endif // HAVE_TPETRA_DEBUG
5749 
5750  rowIndsConstView = rowIndsView.view (0, rowLength);
5751  rowValsConstView = rowValsView.view (0, rowLength);
5752  }
5753  else {
5754  srcMat.getGlobalRowView (sourceGID, rowIndsConstView, rowValsConstView);
5755  }
5756 
5757  // Combine the data into the target matrix.
5758  if (isStaticGraph()) {
5759  this->combineGlobalValues (targetGID, rowIndsConstView,
5760  rowValsConstView, REPLACE);
5761  }
5762  else {
5763  this->combineGlobalValues (targetGID, rowIndsConstView,
5764  rowValsConstView, INSERT);
5765  }
5766  } // For each ID to permute
5767  }
5768 
5769  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5770  void
5772  packAndPrepare (const SrcDistObject& source,
5773  const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5774  Teuchos::Array<char>& exports,
5775  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5776  size_t& constantNumPackets,
5777  Distributor& distor)
5778  {
5780  using Teuchos::Array;
5781  using Teuchos::ArrayView;
5782  using Teuchos::av_reinterpret_cast;
5783  typedef LocalOrdinal LO;
5784  typedef GlobalOrdinal GO;
5785  const char tfecfFuncName[] = "packAndPrepare: ";
5786  ProfilingRegion regionPAP ("Tpetra::CrsMatrix::packAndPrepare");
5787 
5788  // Attempt to cast the source object to RowMatrix. If the cast
5789  // succeeds, use the source object's pack method to pack its data
5790  // for communication. If the source object is really a CrsMatrix,
5791  // this will pick up the CrsMatrix's more efficient override. If
5792  // the RowMatrix cast fails, then the source object doesn't have
5793  // the right type.
5794  //
5795  // FIXME (mfh 30 Jun 2013) We don't even need the RowMatrix to
5796  // have the same Node type. Unfortunately, we don't have a way to
5797  // ask if the RowMatrix is "a RowMatrix with any Node type," since
5798  // RowMatrix doesn't have a base class. A hypothetical
5799  // RowMatrixBase<Scalar, LO, GO> class, which does not currently
5800  // exist, would satisfy this requirement.
5801  //
5802  // Why RowMatrixBase<Scalar, LO, GO>? The source object's Scalar
5803  // type doesn't technically need to match the target object's
5804  // Scalar type, so we could just have RowMatrixBase<LO, GO>. LO
5805  // and GO need not be the same, as long as there is no overflow of
5806  // the indices. However, checking for index overflow is global
5807  // and therefore undesirable.
5808  typedef RowMatrix<Scalar, LO, GO, Node> row_matrix_type;
5809  const row_matrix_type* srcRowMat =
5810  dynamic_cast<const row_matrix_type*> (&source);
5811  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5812  srcRowMat == NULL, std::invalid_argument,
5813  "The source object of the Import or Export operation is neither a "
5814  "CrsMatrix (with the same template parameters as the target object), "
5815  "nor a RowMatrix (with the same first four template parameters as the "
5816  "target object).");
5817 #ifdef HAVE_TPETRA_DEBUG
5818  {
5819  using Teuchos::reduceAll;
5820  std::ostringstream msg;
5821  int lclBad = 0;
5822  try {
5823  srcRowMat->pack (exportLIDs, exports, numPacketsPerLID,
5824  constantNumPackets, distor);
5825  } catch (std::exception& e) {
5826  lclBad = 1;
5827  msg << e.what ();
5828  }
5829  int gblBad = 0;
5830  const Teuchos::Comm<int>& comm = * (this->getComm ());
5831  reduceAll<int, int> (comm, Teuchos::REDUCE_MAX,
5832  lclBad, Teuchos::outArg (gblBad));
5833  if (gblBad != 0) {
5834  const int myRank = comm.getRank ();
5835  const int numProcs = comm.getSize ();
5836  for (int r = 0; r < numProcs; ++r) {
5837  if (r == myRank && lclBad != 0) {
5838  std::ostringstream os;
5839  os << "Proc " << myRank << ": " << msg.str () << std::endl;
5840  std::cerr << os.str ();
5841  }
5842  comm.barrier ();
5843  comm.barrier ();
5844  comm.barrier ();
5845  }
5846  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
5847  true, std::logic_error, "pack() threw an exception on one or "
5848  "more participating processes.");
5849  }
5850  }
5851 #else
5852  srcRowMat->pack (exportLIDs, exports, numPacketsPerLID,
5853  constantNumPackets, distor);
5854 #endif // HAVE_TPETRA_DEBUG
5855  }
5856 
5857  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5858  bool
5860  packRow (char* const numEntOut,
5861  char* const valOut,
5862  char* const indOut,
5863  const size_t numEnt,
5864  const LocalOrdinal lclRow) const
5865  {
5866  using Teuchos::ArrayView;
5867  typedef LocalOrdinal LO;
5868  typedef GlobalOrdinal GO;
5869 
5870  const LO numEntLO = static_cast<LO> (numEnt);
5871  memcpy (numEntOut, &numEntLO, sizeof (LO));
5872  if (this->isLocallyIndexed ()) {
5873  // If the matrix is locally indexed on the calling process, we
5874  // have to use its column Map (which it _must_ have in this
5875  // case) to convert to global indices.
5876  ArrayView<const LO> indIn;
5877  ArrayView<const Scalar> valIn;
5878  this->getLocalRowView (lclRow, indIn, valIn);
5879  const map_type& colMap = * (this->getColMap ());
5880  // Copy column indices one at a time, so that we don't need
5881  // temporary storage.
5882  for (size_t k = 0; k < numEnt; ++k) {
5883  const GO gblIndIn = colMap.getGlobalElement (indIn[k]);
5884  memcpy (indOut + k * sizeof (GO), &gblIndIn, sizeof (GO));
5885  }
5886  memcpy (valOut, valIn.getRawPtr (), numEnt * sizeof (Scalar));
5887  }
5888  else if (this->isGloballyIndexed ()) {
5889  // If the matrix is globally indexed on the calling process,
5890  // then we can use the column indices directly. However, we
5891  // have to get the global row index. The calling process must
5892  // have a row Map, since otherwise it shouldn't be participating
5893  // in packing operations.
5894  ArrayView<const GO> indIn;
5895  ArrayView<const Scalar> valIn;
5896  const map_type& rowMap = * (this->getRowMap ());
5897  const GO gblRow = rowMap.getGlobalElement (lclRow);
5898  this->getGlobalRowView (gblRow, indIn, valIn);
5899  memcpy (indOut, indIn.getRawPtr (), numEnt * sizeof (GO));
5900  memcpy (valOut, valIn.getRawPtr (), numEnt * sizeof (Scalar));
5901  }
5902  else {
5903  if (numEnt != 0) {
5904  return false;
5905  }
5906  }
5907  return true;
5908  }
5909 
5910  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5911  bool
5913  unpackRow (impl_scalar_type* const valInTmp,
5914  GlobalOrdinal* const indInTmp,
5915  const size_t tmpSize,
5916  const char* const valIn,
5917  const char* const indIn,
5918  const size_t numEnt,
5919  const LocalOrdinal lclRow,
5920  const Tpetra::CombineMode combineMode)
5921  {
5922  if (tmpSize < numEnt || (numEnt != 0 && (valInTmp == NULL || indInTmp == NULL))) {
5923  return false;
5924  }
5925  memcpy (valInTmp, valIn, numEnt * sizeof (Scalar));
5926  memcpy (indInTmp, indIn, numEnt * sizeof (GlobalOrdinal));
5927 
5928  // FIXME (mfh 23 Mar 2017) It would make sense to use the return
5929  // value here as more than just a "did it succeed" Boolean test.
5930 
5931  // FIXME (mfh 23 Mar 2017) CrsMatrix_NonlocalSumInto_Ignore test
5932  // expects this method to ignore incoming entries that do not
5933  // exist on the process that owns those rows. We would like to
5934  // distinguish between "errors" resulting from ignored entries,
5935  // vs. actual errors.
5936 
5937  //const LocalOrdinal numModified =
5938  this->combineGlobalValuesRaw (lclRow, numEnt, valInTmp, indInTmp,
5939  combineMode);
5940  return true; // FIXME (mfh 23 Mar 2013) See above.
5941  //return numModified == numEnt;
5942  }
5943 
5944 
5945  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5946  void
5948  allocatePackSpace (Teuchos::Array<char>& exports,
5949  size_t& totalNumEntries,
5950  const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs) const
5951  {
5952  typedef impl_scalar_type IST;
5953  typedef LocalOrdinal LO;
5954  typedef GlobalOrdinal GO;
5955  //const char tfecfFuncName[] = "allocatePackSpace: ";
5956 
5957  // The number of export LIDs must fit in LocalOrdinal, assuming
5958  // that the LIDs are distinct and valid on the calling process.
5959  const LO numExportLIDs = static_cast<LO> (exportLIDs.size ());
5960 
5961  // Count the total number of matrix entries to send.
5962  totalNumEntries = 0;
5963  for (LO i = 0; i < numExportLIDs; ++i) {
5964  const LO lclRow = exportLIDs[i];
5965  size_t curNumEntries = this->getNumEntriesInLocalRow (lclRow);
5966  // FIXME (mfh 25 Jan 2015) We should actually report invalid row
5967  // indices as an error. Just consider them nonowned for now.
5968  if (curNumEntries == Teuchos::OrdinalTraits<size_t>::invalid ()) {
5969  curNumEntries = 0;
5970  }
5971  totalNumEntries += curNumEntries;
5972  }
5973 
5974  // FIXME (mfh 24 Feb 2013, 24 Mar 2017) This code is only correct
5975  // if sizeof(IST) is a meaningful representation of the amount of
5976  // data in a Scalar instance. (LO and GO are always built-in
5977  // integer types.)
5978  //
5979  // Allocate the exports array. It does NOT need padding for
5980  // alignment, since we use memcpy to write to / read from send /
5981  // receive buffers.
5982  const size_t allocSize =
5983  static_cast<size_t> (numExportLIDs) * sizeof (LO) +
5984  totalNumEntries * (sizeof (IST) + sizeof (GO));
5985  if (static_cast<size_t> (exports.size ()) < allocSize) {
5986  exports.resize (allocSize);
5987  }
5988  }
5989 
5990  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
5991  void
5993  pack (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5994  Teuchos::Array<char>& exports,
5995  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5996  size_t& constantNumPackets,
5997  Distributor& dist) const
5998  {
5999  using Details::packCrsMatrix;
6000 
6001  if (this->isStaticGraph ()) {
6002  const map_type& colMap = * (this->staticGraph_->colMap_);
6003  const auto lclColMap = colMap.getLocalMap ();
6004  const int myRank =
6005  colMap.getComm ().is_null () ? 0 : colMap.getComm ()->getRank ();
6006  std::unique_ptr<std::string> errStr;
6007 #ifdef HAVE_TPETRA_DEBUG
6008  using Teuchos::outArg;
6009  using Teuchos::REDUCE_MIN;
6010  using Teuchos::reduceAll;
6011  const bool locallyCorrect =
6012  packCrsMatrix (this->lclMatrix_, lclColMap, errStr,
6013  exports, numPacketsPerLID, constantNumPackets,
6014  exportLIDs, myRank, dist);
6015  const int lclOK = locallyCorrect ? 1 : 0;
6016  int gblOK = 1; // output argument
6017  if (! colMap.getComm ().is_null ()) {
6018  reduceAll<int, int> (* (colMap.getComm ()), REDUCE_MIN,
6019  lclOK, outArg (gblOK));
6020  if (gblOK != 1) {
6021  std::ostringstream out;
6022  if (colMap.getComm ()->getRank () == 0) {
6023  out << "Error in packCrsMatrix!" << std::endl;
6024  }
6025  using ::Tpetra::Details::gathervPrint;
6026  const std::string errStr2 =
6027  errStr.get () == NULL ? std::string ("") : *errStr;
6028  gathervPrint (out, errStr2, * (colMap.getComm ()));
6029  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, out.str ());
6030  }
6031  }
6032 #else // NOT HAVE_TPETRA_DEBUG
6033  (void) packCrsMatrix (this->lclMatrix_, lclColMap, errStr,
6034  exports, numPacketsPerLID, constantNumPackets,
6035  exportLIDs, myRank, dist);
6036 #endif // HAVE_TPETRA_DEBUG
6037  }
6038  else {
6039  this->packNonStatic (exportLIDs, exports, numPacketsPerLID,
6040  constantNumPackets, dist);
6041  }
6042  }
6043 
6044  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
6045  void
6047  packNonStatic (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
6048  Teuchos::Array<char>& exports,
6049  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
6050  size_t& constantNumPackets,
6051  Distributor& distor) const
6052  {
6053  typedef impl_scalar_type IST;
6054  typedef LocalOrdinal LO;
6055  typedef GlobalOrdinal GO;
6056  const char tfecfFuncName[] = "pack: ";
6057 
6058  const size_t numExportLIDs = static_cast<size_t> (exportLIDs.size ());
6059  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6060  (numExportLIDs != static_cast<size_t> (numPacketsPerLID.size ()),
6061  std::invalid_argument, "exportLIDs.size() = " << numExportLIDs
6062  << " != numPacketsPerLID.size() = " << numPacketsPerLID.size () << ".");
6063 
6064  // Setting this to zero tells the caller to expect a possibly
6065  // different ("nonconstant") number of packets per local index
6066  // (i.e., a possibly different number of entries per row).
6067  constantNumPackets = 0;
6068 
6069  // The pack buffer 'exports' enters this method possibly
6070  // unallocated. Do the first two parts of "Count, allocate, fill,
6071  // compute."
6072  size_t totalNumEntries = 0;
6073  this->allocatePackSpace (exports, totalNumEntries, exportLIDs);
6074  const size_t bufSize = static_cast<size_t> (exports.size ());
6075 
6076  // Compute the number of "packets" (in this case, bytes) per
6077  // export LID (in this case, local index of the row to send), and
6078  // actually pack the data.
6079  //
6080  // FIXME (mfh 24 Feb 2013, 25 Jan 2015) This code is only correct
6081  // if sizeof(Scalar) is a meaningful representation of the amount
6082  // of data in a Scalar instance. (LO and GO are always built-in
6083  // integer types.)
6084 
6085  // Variables for error reporting in the loop.
6086  size_t firstBadIndex = 0; // only valid if outOfBounds == true.
6087  size_t firstBadOffset = 0; // only valid if outOfBounds == true.
6088  size_t firstBadNumBytes = 0; // only valid if outOfBounds == true.
6089  bool outOfBounds = false;
6090  bool packErr = false;
6091 
6092  char* const exportsRawPtr = exports.getRawPtr ();
6093  size_t offset = 0; // current index into 'exports' array.
6094  for (size_t i = 0; i < numExportLIDs; ++i) {
6095  const LO lclRow = exportLIDs[i];
6096 
6097  size_t numEnt;
6098  numEnt = this->getNumEntriesInLocalRow (lclRow);
6099 
6100  // Only pack this row's data if it has a nonzero number of
6101  // entries. We can do this because receiving processes get the
6102  // number of packets, and will know that zero packets means zero
6103  // entries.
6104  if (numEnt == 0) {
6105  numPacketsPerLID[i] = 0;
6106  }
6107  else {
6108  char* const numEntBeg = exportsRawPtr + offset;
6109  char* const numEntEnd = numEntBeg + sizeof (LO);
6110  char* const valBeg = numEntEnd;
6111  char* const valEnd = valBeg + numEnt * sizeof (Scalar);
6112  char* const indBeg = valEnd;
6113  const size_t numBytes = sizeof (LO) +
6114  numEnt * (sizeof (IST) + sizeof (GO));
6115  if (offset > bufSize || offset + numBytes > bufSize) {
6116  firstBadIndex = i;
6117  firstBadOffset = offset;
6118  firstBadNumBytes = numBytes;
6119  outOfBounds = true;
6120  break;
6121  }
6122  packErr = ! this->packRow (numEntBeg, valBeg, indBeg, numEnt, lclRow);
6123  if (packErr) {
6124  firstBadIndex = i;
6125  firstBadOffset = offset;
6126  firstBadNumBytes = numBytes;
6127  break;
6128  }
6129  // numPacketsPerLID[i] is the number of "packets" in the
6130  // current local row i. Packet=char (really "byte") so use
6131  // the number of bytes of the packed data for that row.
6132  numPacketsPerLID[i] = numBytes;
6133  offset += numBytes;
6134  }
6135  }
6136 
6137  TEUCHOS_TEST_FOR_EXCEPTION(
6138  outOfBounds, std::logic_error, "First invalid offset into 'exports' "
6139  "pack buffer at index i = " << firstBadIndex << ". exportLIDs[i]: "
6140  << exportLIDs[firstBadIndex] << ", bufSize: " << bufSize << ", offset: "
6141  << firstBadOffset << ", numBytes: " << firstBadNumBytes << ".");
6142  TEUCHOS_TEST_FOR_EXCEPTION(
6143  packErr, std::logic_error, "First error in packRow() at index i = "
6144  << firstBadIndex << ". exportLIDs[i]: " << exportLIDs[firstBadIndex]
6145  << ", bufSize: " << bufSize << ", offset: " << firstBadOffset
6146  << ", numBytes: " << firstBadNumBytes << ".");
6147  }
6148 
6149  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
6150  LocalOrdinal
6152  combineGlobalValuesRaw (const LocalOrdinal lclRow,
6153  const LocalOrdinal numEnt,
6154  const impl_scalar_type vals[],
6155  const GlobalOrdinal cols[],
6156  const Tpetra::CombineMode combineMode)
6157  {
6158  typedef GlobalOrdinal GO;
6159  //const char tfecfFuncName[] = "combineGlobalValuesRaw: ";
6160 
6161  // mfh 23 Mar 2017: This branch is not thread safe in a debug
6162  // build, due to use of Teuchos::ArrayView; see #229.
6163  const GO gblRow = this->myGraph_->rowMap_->getGlobalElement (lclRow);
6164  Teuchos::ArrayView<const GO> cols_av (numEnt == 0 ? NULL : cols, numEnt);
6165  Teuchos::ArrayView<const Scalar> vals_av (numEnt == 0 ? NULL : reinterpret_cast<const Scalar*> (vals), numEnt);
6166 
6167  // FIXME (mfh 23 Mar 2017) This is a work-around for less common
6168  // combine modes. combineGlobalValues throws on error; it does
6169  // not return an error code. Thus, if it returns, it succeeded.
6170  this->combineGlobalValues (gblRow, cols_av, vals_av, combineMode);
6171  return numEnt;
6172  }
6173 
6174 
6175  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
6176  void
6178  combineGlobalValues (const GlobalOrdinal globalRowIndex,
6179  const Teuchos::ArrayView<const GlobalOrdinal>& columnIndices,
6180  const Teuchos::ArrayView<const Scalar>& values,
6181  const Tpetra::CombineMode combineMode)
6182  {
6183  const char tfecfFuncName[] = "combineGlobalValues: ";
6184 
6185  if (isStaticGraph ()) {
6186  // INSERT doesn't make sense for a static graph, since you
6187  // aren't allowed to change the structure of the graph.
6188  // However, all the other combine modes work.
6189  if (combineMode == ADD) {
6190  sumIntoGlobalValues (globalRowIndex, columnIndices, values);
6191  }
6192  else if (combineMode == REPLACE) {
6193  replaceGlobalValues (globalRowIndex, columnIndices, values);
6194  }
6195  else if (combineMode == ABSMAX) {
6196  using Details::AbsMax;
6197  AbsMax<Scalar> f;
6198  this->template transformGlobalValues<AbsMax<Scalar> > (globalRowIndex,
6199  columnIndices,
6200  values, f);
6201  }
6202  else if (combineMode == INSERT) {
6203  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6204  isStaticGraph () && combineMode == INSERT, std::invalid_argument,
6205  "INSERT combine mode is not allowed if the matrix has a static graph "
6206  "(i.e., was constructed with the CrsMatrix constructor that takes a "
6207  "const CrsGraph pointer).");
6208  }
6209  else {
6210  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6211  true, std::logic_error, "Invalid combine mode; should never get "
6212  "here! Please report this bug to the Tpetra developers.");
6213  }
6214  }
6215  else { // The matrix has a dynamic graph.
6216  if (combineMode == ADD || combineMode == INSERT) {
6217  // For a dynamic graph, all incoming column indices are
6218  // inserted into the target graph. Duplicate indices will
6219  // have their values summed. In this context, ADD and INSERT
6220  // are equivalent. We need to call insertGlobalValues()
6221  // anyway if the column indices don't yet exist in this row,
6222  // so we just call insertGlobalValues() for both cases.
6223  insertGlobalValuesFiltered (globalRowIndex, columnIndices, values);
6224  }
6225  // FIXME (mfh 14 Mar 2012):
6226  //
6227  // Implementing ABSMAX or REPLACE for a dynamic graph would
6228  // require modifying assembly to attach a possibly different
6229  // combine mode to each inserted (i, j, A_ij) entry. For
6230  // example, consider two different Export operations to the same
6231  // target CrsMatrix, the first with ABSMAX combine mode and the
6232  // second with REPLACE. This isn't a common use case, so we
6233  // won't mess with it for now.
6234  else if (combineMode == ABSMAX) {
6235  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6236  ! isStaticGraph () && combineMode == ABSMAX, std::logic_error,
6237  "ABSMAX combine mode when the matrix has a dynamic graph is not yet "
6238  "implemented.");
6239  }
6240  else if (combineMode == REPLACE) {
6241  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6242  ! isStaticGraph () && combineMode == REPLACE, std::logic_error,
6243  "REPLACE combine mode when the matrix has a dynamic graph is not yet "
6244  "implemented.");
6245  }
6246  else {
6247  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6248  true, std::logic_error, "Should never get here! Please report this "
6249  "bug to the Tpetra developers.");
6250  }
6251  }
6252  }
6253 
6254 
6255  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
6256  void
6258  unpackAndCombine (const Teuchos::ArrayView<const LocalOrdinal>& importLIDs,
6259  const Teuchos::ArrayView<const char>& imports,
6260  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
6261  size_t constantNumPackets,
6262  Distributor& distor,
6263  CombineMode combineMode)
6264  {
6266  ProfilingRegion regionUAC ("Tpetra::CrsMatrix::unpackAndCombine");
6267 
6268 #ifdef HAVE_TPETRA_DEBUG
6269  const char tfecfFuncName[] = "unpackAndCombine: ";
6270  const CombineMode validModes[4] = {ADD, REPLACE, ABSMAX, INSERT};
6271  const char* validModeNames[4] = {"ADD", "REPLACE", "ABSMAX", "INSERT"};
6272  const int numValidModes = 4;
6273 
6274  if (std::find (validModes, validModes+numValidModes, combineMode) ==
6275  validModes+numValidModes) {
6276  std::ostringstream os;
6277  os << "Invalid combine mode. Valid modes are {";
6278  for (int k = 0; k < numValidModes; ++k) {
6279  os << validModeNames[k];
6280  if (k < numValidModes - 1) {
6281  os << ", ";
6282  }
6283  }
6284  os << "}.";
6285  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6286  true, std::invalid_argument, os.str ());
6287  }
6288 
6289  {
6290  using Teuchos::reduceAll;
6291  std::ostringstream msg;
6292  int lclBad = 0;
6293  try {
6294  this->unpackAndCombineImpl (importLIDs, imports, numPacketsPerLID,
6295  constantNumPackets, distor, combineMode);
6296  } catch (std::exception& e) {
6297  lclBad = 1;
6298  msg << e.what ();
6299  }
6300  int gblBad = 0;
6301  const Teuchos::Comm<int>& comm = * (this->getComm ());
6302  reduceAll<int, int> (comm, Teuchos::REDUCE_MAX,
6303  lclBad, Teuchos::outArg (gblBad));
6304  if (gblBad != 0) {
6305  const int myRank = comm.getRank ();
6306  std::ostringstream os;
6307  os << "Proc " << myRank << ": " << msg.str () << std::endl;
6308  ::Tpetra::Details::gathervPrint (std::cerr, os.str (), comm);
6309  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6310  (true, std::logic_error, "unpackAndCombineImpl() threw an "
6311  "exception on one or more participating processes.");
6312  }
6313  }
6314 #else
6315  this->unpackAndCombineImpl (importLIDs, imports, numPacketsPerLID,
6316  constantNumPackets, distor, combineMode);
6317 #endif // HAVE_TPETRA_DEBUG
6318  }
6319 
6320  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
6321  void
6323  unpackAndCombineImpl (const Teuchos::ArrayView<const LocalOrdinal>& importLIDs,
6324  const Teuchos::ArrayView<const char>& imports,
6325  const Teuchos::ArrayView<const size_t>& numPacketsPerLID,
6326  size_t constantNumPackets,
6327  Distributor & distor,
6328  CombineMode combineMode,
6329  const bool atomic)
6330  {
6331  if (this->isStaticGraph()) {
6333  const map_type& colMap = * (this->staticGraph_->colMap_);
6334  const auto lclColMap = colMap.getLocalMap ();
6335  const Teuchos::Comm<int>& comm = * (this->getComm ());
6336  const int myRank = comm.getRank ();
6337  std::unique_ptr<std::string> errStr;
6338  bool locallyCorrect = unpackCrsMatrixAndCombine (
6339  this->lclMatrix_, lclColMap, errStr, importLIDs, imports,
6340  numPacketsPerLID, constantNumPackets, myRank, distor, combineMode, atomic);
6341  TEUCHOS_TEST_FOR_EXCEPTION(!locallyCorrect, std::runtime_error, *errStr);
6342  }
6343  else {
6344  this->unpackAndCombineImplNonStatic (importLIDs, imports, numPacketsPerLID,
6345  constantNumPackets, distor, combineMode);
6346  }
6347  }
6348 
6349  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
6350  void
6353  const Teuchos::ArrayView<const LocalOrdinal>& importLIDs,
6354  const Teuchos::ArrayView<const char>& imports,
6355  const Teuchos::ArrayView<const size_t>& numPacketsPerLID,
6356  size_t constantNumPackets,
6357  Distributor & /* distor */,
6358  CombineMode combineMode)
6359  {
6360  typedef impl_scalar_type IST;
6361  typedef LocalOrdinal LO;
6362  typedef GlobalOrdinal GO;
6363  typedef typename Teuchos::ArrayView<const LO>::size_type size_type;
6364  const char tfecfFuncName[] = "unpackAndCombine: ";
6365 
6366  const size_type numImportLIDs = importLIDs.size ();
6367  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6368  numImportLIDs != numPacketsPerLID.size (), std::invalid_argument,
6369  "importLIDs.size() = " << numImportLIDs << " != numPacketsPerLID.size()"
6370  << " = " << numPacketsPerLID.size () << ".");
6371 
6372  // If a sanity check fails, keep track of some state at the
6373  // "first" place where it fails. After the first failure, "run
6374  // through the motions" until the end of this method, then raise
6375  // an error with an informative message.
6376  size_type firstBadIndex = 0;
6377  size_t firstBadOffset = 0;
6378  size_t firstBadExpectedNumBytes = 0;
6379  size_t firstBadNumBytes = 0;
6380  LO firstBadNumEnt = 0;
6381  // We have sanity checks for three kinds of errors:
6382  //
6383  // 1. Offset into array of all the incoming data (for all rows)
6384  // is out of bounds
6385  // 2. Too few bytes of incoming data for a row, given the
6386  // reported number of entries in those incoming data
6387  // 3. Error in unpacking the row's incoming data
6388  //
6389  bool outOfBounds = false;
6390  bool wrongNumBytes = false;
6391  bool unpackErr = false;
6392 
6393  const size_t bufSize = static_cast<size_t> (imports.size ());
6394  const char* const importsRawPtr = imports.getRawPtr ();
6395  size_t offset = 0;
6396 
6397  // Temporary storage for incoming values and indices. We need
6398  // this because the receive buffer does not align storage; it's
6399  // just contiguous bytes. In order to avoid violating ANSI
6400  // aliasing rules, we memcpy each incoming row's data into these
6401  // temporary arrays. We double their size every time we run out
6402  // of storage.
6403  std::vector<IST> valInTmp;
6404  std::vector<GO> indInTmp;
6405  for (size_type i = 0; i < numImportLIDs; ++i) {
6406  const LO lclRow = importLIDs[i];
6407  const size_t numBytes = numPacketsPerLID[i];
6408 
6409  if (numBytes > 0) { // there is actually something in the row
6410  const char* const numEntBeg = importsRawPtr + offset;
6411  const char* const numEntEnd = numEntBeg + sizeof (LO);
6412 
6413  // Now we know how many entries to expect in the received data
6414  // for this row.
6415  LO numEnt = 0;
6416  memcpy (&numEnt, numEntBeg, sizeof (LO));
6417 
6418  const char* const valBeg = numEntEnd;
6419  const char* const valEnd =
6420  valBeg + static_cast<size_t> (numEnt) * sizeof (IST);
6421  const char* const indBeg = valEnd;
6422  const size_t expectedNumBytes = sizeof (LO) +
6423  static_cast<size_t> (numEnt) * (sizeof (IST) + sizeof (GO));
6424 
6425  if (expectedNumBytes > numBytes) {
6426  firstBadIndex = i;
6427  firstBadOffset = offset;
6428  firstBadExpectedNumBytes = expectedNumBytes;
6429  firstBadNumBytes = numBytes;
6430  firstBadNumEnt = numEnt;
6431  wrongNumBytes = true;
6432  break;
6433  }
6434  if (offset > bufSize || offset + numBytes > bufSize) {
6435  firstBadIndex = i;
6436  firstBadOffset = offset;
6437  firstBadExpectedNumBytes = expectedNumBytes;
6438  firstBadNumBytes = numBytes;
6439  firstBadNumEnt = numEnt;
6440  outOfBounds = true;
6441  break;
6442  }
6443  size_t tmpNumEnt = static_cast<size_t> (valInTmp.size ());
6444  if (tmpNumEnt < static_cast<size_t> (numEnt) ||
6445  static_cast<size_t> (indInTmp.size ()) < static_cast<size_t> (numEnt)) {
6446  // Double the size of the temporary arrays for incoming data.
6447  tmpNumEnt = std::max (static_cast<size_t> (numEnt), tmpNumEnt * 2);
6448  valInTmp.resize (tmpNumEnt);
6449  indInTmp.resize (tmpNumEnt);
6450  }
6451  unpackErr =
6452  ! unpackRow (valInTmp.data (), indInTmp.data (), tmpNumEnt,
6453  valBeg, indBeg, numEnt, lclRow, combineMode);
6454  if (unpackErr) {
6455  firstBadIndex = i;
6456  firstBadOffset = offset;
6457  firstBadExpectedNumBytes = expectedNumBytes;
6458  firstBadNumBytes = numBytes;
6459  firstBadNumEnt = numEnt;
6460  break;
6461  }
6462  offset += numBytes;
6463  }
6464  }
6465 
6466  if (wrongNumBytes || outOfBounds || unpackErr) {
6467  std::ostringstream os;
6468  os << " importLIDs[i]: " << importLIDs[firstBadIndex]
6469  << ", bufSize: " << bufSize
6470  << ", offset: " << firstBadOffset
6471  << ", numBytes: " << firstBadNumBytes
6472  << ", expectedNumBytes: " << firstBadExpectedNumBytes
6473  << ", numEnt: " << firstBadNumEnt;
6474  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6475  wrongNumBytes, std::logic_error, "At index i = " << firstBadIndex
6476  << ", expectedNumBytes > numBytes." << os.str ());
6477  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6478  outOfBounds, std::logic_error, "First invalid offset into 'imports' "
6479  "unpack buffer at index i = " << firstBadIndex << "." << os.str ());
6480  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6481  unpackErr, std::logic_error, "First error in unpackRow() at index i = "
6482  << firstBadIndex << "." << os.str ());
6483  }
6484  }
6485 
6486  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
6487  Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
6489  getColumnMapMultiVector (const MV& X_domainMap,
6490  const bool force) const
6491  {
6492  using Teuchos::null;
6493  using Teuchos::RCP;
6494  using Teuchos::rcp;
6495 
6496  TEUCHOS_TEST_FOR_EXCEPTION(
6497  ! this->hasColMap (), std::runtime_error, "Tpetra::CrsMatrix::getColumn"
6498  "MapMultiVector: You may only call this method if the matrix has a "
6499  "column Map. If the matrix does not yet have a column Map, you should "
6500  "first call fillComplete (with domain and range Map if necessary).");
6501 
6502  // If the graph is not fill complete, then the Import object (if
6503  // one should exist) hasn't been constructed yet.
6504  TEUCHOS_TEST_FOR_EXCEPTION(
6505  ! this->getGraph ()->isFillComplete (), std::runtime_error, "Tpetra::"
6506  "CrsMatrix::getColumnMapMultiVector: You may only call this method if "
6507  "this matrix's graph is fill complete.");
6508 
6509  const size_t numVecs = X_domainMap.getNumVectors ();
6510  RCP<const import_type> importer = this->getGraph ()->getImporter ();
6511  RCP<const map_type> colMap = this->getColMap ();
6512 
6513  RCP<MV> X_colMap; // null by default
6514 
6515  // If the Import object is trivial (null), then we don't need a
6516  // separate column Map multivector. Just return null in that
6517  // case. The caller is responsible for knowing not to use the
6518  // returned null pointer.
6519  //
6520  // If the Import is nontrivial, then we do need a separate
6521  // column Map multivector for the Import operation. Check in
6522  // that case if we have to (re)create the column Map
6523  // multivector.
6524  if (! importer.is_null () || force) {
6525  if (importMV_.is_null () || importMV_->getNumVectors () != numVecs) {
6526  X_colMap = rcp (new MV (colMap, numVecs));
6527 
6528  // Cache the newly created multivector for later reuse.
6529  importMV_ = X_colMap;
6530  }
6531  else { // Yay, we can reuse the cached multivector!
6532  X_colMap = importMV_;
6533  // mfh 09 Jan 2013: We don't have to fill with zeros first,
6534  // because the Import uses INSERT combine mode, which overwrites
6535  // existing entries.
6536  //
6537  //X_colMap->putScalar (ZERO);
6538  }
6539  }
6540  return X_colMap;
6541  }
6542 
6543  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
6544  Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node, classic> >
6547  const bool force) const
6548  {
6549  using Teuchos::null;
6550  using Teuchos::RCP;
6551  using Teuchos::rcp;
6552 
6553  // If the graph is not fill complete, then the Export object (if
6554  // one should exist) hasn't been constructed yet.
6555  TEUCHOS_TEST_FOR_EXCEPTION(
6556  ! this->getGraph ()->isFillComplete (), std::runtime_error, "Tpetra::"
6557  "CrsMatrix::getRowMapMultiVector: You may only call this method if this "
6558  "matrix's graph is fill complete.");
6559 
6560  const size_t numVecs = Y_rangeMap.getNumVectors ();
6561  RCP<const export_type> exporter = this->getGraph ()->getExporter ();
6562  // Every version of the constructor takes either a row Map, or a
6563  // graph (all of whose constructors take a row Map). Thus, the
6564  // matrix always has a row Map.
6565  RCP<const map_type> rowMap = this->getRowMap ();
6566 
6567  RCP<MV> Y_rowMap; // null by default
6568 
6569  // If the Export object is trivial (null), then we don't need a
6570  // separate row Map multivector. Just return null in that case.
6571  // The caller is responsible for knowing not to use the returned
6572  // null pointer.
6573  //
6574  // If the Export is nontrivial, then we do need a separate row
6575  // Map multivector for the Export operation. Check in that case
6576  // if we have to (re)create the row Map multivector.
6577  if (! exporter.is_null () || force) {
6578  if (exportMV_.is_null () || exportMV_->getNumVectors () != numVecs) {
6579  Y_rowMap = rcp (new MV (rowMap, numVecs));
6580  exportMV_ = Y_rowMap; // Cache the newly created MV for later reuse.
6581  }
6582  else { // Yay, we can reuse the cached multivector!
6583  Y_rowMap = exportMV_;
6584  }
6585  }
6586  return Y_rowMap;
6587  }
6588 
6589  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
6590  void
6592  removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& newMap)
6593  {
6594  TEUCHOS_TEST_FOR_EXCEPTION(
6595  myGraph_.is_null (), std::logic_error, "Tpetra::CrsMatrix::"
6596  "removeEmptyProcessesInPlace: This method does not work when the matrix "
6597  "was created with a constant graph (that is, when it was created using "
6598  "the version of its constructor that takes an RCP<const CrsGraph>). "
6599  "This is because the matrix is not allowed to modify the graph in that "
6600  "case, but removing empty processes requires modifying the graph.");
6601  myGraph_->removeEmptyProcessesInPlace (newMap);
6602  // Even though CrsMatrix's row Map (as returned by getRowMap())
6603  // comes from its CrsGraph, CrsMatrix still implements DistObject,
6604  // so we also have to change the DistObject's Map.
6605  this->map_ = this->getRowMap ();
6606  // In the nonconst graph case, staticGraph_ is just a const
6607  // pointer to myGraph_. This assignment is probably redundant,
6608  // but it doesn't hurt.
6609  staticGraph_ = Teuchos::rcp_const_cast<const Graph> (myGraph_);
6610  }
6611 
6612  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
6613  Teuchos::RCP<RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
6615  add (const Scalar& alpha,
6617  const Scalar& beta,
6618  const Teuchos::RCP<const map_type>& domainMap,
6619  const Teuchos::RCP<const map_type>& rangeMap,
6620  const Teuchos::RCP<Teuchos::ParameterList>& params) const
6621  {
6622  using Teuchos::Array;
6623  using Teuchos::ArrayRCP;
6624  using Teuchos::ArrayView;
6625  using Teuchos::ParameterList;
6626  using Teuchos::RCP;
6627  using Teuchos::rcp;
6628  using Teuchos::rcp_implicit_cast;
6629  using Teuchos::sublist;
6630  typedef LocalOrdinal LO;
6631  typedef GlobalOrdinal GO;
6634 
6635  const crs_matrix_type& B = *this; // a convenient abbreviation
6636  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
6637  const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one ();
6638 
6639  // If the user didn't supply a domain or range Map, then try to
6640  // get one from B first (if it has them), then from A (if it has
6641  // them). If we don't have any domain or range Maps, scold the
6642  // user.
6643  RCP<const map_type> A_domainMap = A.getDomainMap ();
6644  RCP<const map_type> A_rangeMap = A.getRangeMap ();
6645  RCP<const map_type> B_domainMap = B.getDomainMap ();
6646  RCP<const map_type> B_rangeMap = B.getRangeMap ();
6647 
6648  RCP<const map_type> theDomainMap = domainMap;
6649  RCP<const map_type> theRangeMap = rangeMap;
6650 
6651  if (domainMap.is_null ()) {
6652  if (B_domainMap.is_null ()) {
6653  TEUCHOS_TEST_FOR_EXCEPTION(
6654  A_domainMap.is_null (), std::invalid_argument,
6655  "Tpetra::CrsMatrix::add: If neither A nor B have a domain Map, "
6656  "then you must supply a nonnull domain Map to this method.");
6657  theDomainMap = A_domainMap;
6658  } else {
6659  theDomainMap = B_domainMap;
6660  }
6661  }
6662  if (rangeMap.is_null ()) {
6663  if (B_rangeMap.is_null ()) {
6664  TEUCHOS_TEST_FOR_EXCEPTION(
6665  A_rangeMap.is_null (), std::invalid_argument,
6666  "Tpetra::CrsMatrix::add: If neither A nor B have a range Map, "
6667  "then you must supply a nonnull range Map to this method.");
6668  theRangeMap = A_rangeMap;
6669  } else {
6670  theRangeMap = B_rangeMap;
6671  }
6672  }
6673 
6674 #ifdef HAVE_TPETRA_DEBUG
6675  // In a debug build, check that A and B have matching domain and
6676  // range Maps, if they have domain and range Maps at all. (If
6677  // they aren't fill complete, then they may not yet have them.)
6678  if (! A_domainMap.is_null () && ! A_rangeMap.is_null ()) {
6679  if (! B_domainMap.is_null () && ! B_rangeMap.is_null ()) {
6680  TEUCHOS_TEST_FOR_EXCEPTION(
6681  ! B_domainMap->isSameAs (*A_domainMap), std::invalid_argument,
6682  "Tpetra::CrsMatrix::add: The input RowMatrix A must have a domain Map "
6683  "which is the same as (isSameAs) this RowMatrix's domain Map.");
6684  TEUCHOS_TEST_FOR_EXCEPTION(
6685  ! B_rangeMap->isSameAs (*A_rangeMap), std::invalid_argument,
6686  "Tpetra::CrsMatrix::add: The input RowMatrix A must have a range Map "
6687  "which is the same as (isSameAs) this RowMatrix's range Map.");
6688  TEUCHOS_TEST_FOR_EXCEPTION(
6689  ! domainMap.is_null () && ! domainMap->isSameAs (*B_domainMap),
6690  std::invalid_argument,
6691  "Tpetra::CrsMatrix::add: The input domain Map must be the same as "
6692  "(isSameAs) this RowMatrix's domain Map.");
6693  TEUCHOS_TEST_FOR_EXCEPTION(
6694  ! rangeMap.is_null () && ! rangeMap->isSameAs (*B_rangeMap),
6695  std::invalid_argument,
6696  "Tpetra::CrsMatrix::add: The input range Map must be the same as "
6697  "(isSameAs) this RowMatrix's range Map.");
6698  }
6699  }
6700  else if (! B_domainMap.is_null () && ! B_rangeMap.is_null ()) {
6701  TEUCHOS_TEST_FOR_EXCEPTION(
6702  ! domainMap.is_null () && ! domainMap->isSameAs (*B_domainMap),
6703  std::invalid_argument,
6704  "Tpetra::CrsMatrix::add: The input domain Map must be the same as "
6705  "(isSameAs) this RowMatrix's domain Map.");
6706  TEUCHOS_TEST_FOR_EXCEPTION(
6707  ! rangeMap.is_null () && ! rangeMap->isSameAs (*B_rangeMap),
6708  std::invalid_argument,
6709  "Tpetra::CrsMatrix::add: The input range Map must be the same as "
6710  "(isSameAs) this RowMatrix's range Map.");
6711  }
6712  else {
6713  TEUCHOS_TEST_FOR_EXCEPTION(
6714  domainMap.is_null () || rangeMap.is_null (), std::invalid_argument,
6715  "Tpetra::CrsMatrix::add: If neither A nor B have a domain and range "
6716  "Map, then you must supply a nonnull domain and range Map to this "
6717  "method.");
6718  }
6719 #endif // HAVE_TPETRA_DEBUG
6720 
6721  // What parameters do we pass to C's constructor? Do we call
6722  // fillComplete on C after filling it? And if so, what parameters
6723  // do we pass to C's fillComplete call?
6724  bool callFillComplete = true;
6725  RCP<ParameterList> constructorSublist;
6726  RCP<ParameterList> fillCompleteSublist;
6727  if (! params.is_null ()) {
6728  callFillComplete = params->get ("Call fillComplete", callFillComplete);
6729  constructorSublist = sublist (params, "Constructor parameters");
6730  fillCompleteSublist = sublist (params, "fillComplete parameters");
6731  }
6732 
6733  RCP<const map_type> A_rowMap = A.getRowMap ();
6734  RCP<const map_type> B_rowMap = B.getRowMap ();
6735  RCP<const map_type> C_rowMap = B_rowMap; // see discussion in documentation
6736  RCP<crs_matrix_type> C; // The result matrix.
6737 
6738  // If A and B's row Maps are the same, we can compute an upper
6739  // bound on the number of entries in each row of C, before
6740  // actually computing the sum. A reasonable upper bound is the
6741  // sum of the two entry counts in each row. If we choose this as
6742  // the actual per-row upper bound, we can use static profile.
6743  if (A_rowMap->isSameAs (*B_rowMap)) {
6744  const LO localNumRows = static_cast<LO> (A_rowMap->getNodeNumElements ());
6745  ArrayRCP<size_t> C_maxNumEntriesPerRow (localNumRows, 0);
6746 
6747  // Get the number of entries in each row of A.
6748  if (alpha != ZERO) {
6749  for (LO localRow = 0; localRow < localNumRows; ++localRow) {
6750  const size_t A_numEntries = A.getNumEntriesInLocalRow (localRow);
6751  C_maxNumEntriesPerRow[localRow] += A_numEntries;
6752  }
6753  }
6754  // Get the number of entries in each row of B.
6755  if (beta != ZERO) {
6756  for (LO localRow = 0; localRow < localNumRows; ++localRow) {
6757  const size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
6758  C_maxNumEntriesPerRow[localRow] += B_numEntries;
6759  }
6760  }
6761  // Construct the result matrix C.
6762  if (constructorSublist.is_null ()) {
6763  C = rcp (new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow,
6764  StaticProfile));
6765  } else {
6766  C = rcp (new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow,
6767  StaticProfile, constructorSublist));
6768  }
6769  // Since A and B have the same row Maps, we could add them
6770  // together all at once and merge values before we call
6771  // insertGlobalValues. However, we don't really need to, since
6772  // we've already allocated enough space in each row of C for C
6773  // to do the merge itself.
6774  }
6775  else { // the row Maps of A and B are not the same
6776  // Construct the result matrix C.
6777  if (constructorSublist.is_null ()) {
6778  C = rcp (new crs_matrix_type (C_rowMap, 0, DynamicProfile));
6779  } else {
6780  C = rcp (new crs_matrix_type (C_rowMap, 0, DynamicProfile,
6781  constructorSublist));
6782  }
6783  }
6784 
6785 #ifdef HAVE_TPETRA_DEBUG
6786  TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::logic_error,
6787  "Tpetra::RowMatrix::add: C should not be null at this point. "
6788  "Please report this bug to the Tpetra developers.");
6789 #endif // HAVE_TPETRA_DEBUG
6790  //
6791  // Compute C = alpha*A + beta*B.
6792  //
6793  Array<GO> ind;
6794  Array<Scalar> val;
6795 
6796  if (alpha != ZERO) {
6797  const LO A_localNumRows = static_cast<LO> (A_rowMap->getNodeNumElements ());
6798  for (LO localRow = 0; localRow < A_localNumRows; ++localRow) {
6799  size_t A_numEntries = A.getNumEntriesInLocalRow (localRow);
6800  const GO globalRow = A_rowMap->getGlobalElement (localRow);
6801  if (A_numEntries > static_cast<size_t> (ind.size ())) {
6802  ind.resize (A_numEntries);
6803  val.resize (A_numEntries);
6804  }
6805  ArrayView<GO> indView = ind (0, A_numEntries);
6806  ArrayView<Scalar> valView = val (0, A_numEntries);
6807  A.getGlobalRowCopy (globalRow, indView, valView, A_numEntries);
6808 
6809  if (alpha != ONE) {
6810  for (size_t k = 0; k < A_numEntries; ++k) {
6811  valView[k] *= alpha;
6812  }
6813  }
6814  C->insertGlobalValues (globalRow, indView, valView);
6815  }
6816  }
6817 
6818  if (beta != ZERO) {
6819  const LO B_localNumRows = static_cast<LO> (B_rowMap->getNodeNumElements ());
6820  for (LO localRow = 0; localRow < B_localNumRows; ++localRow) {
6821  size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
6822  const GO globalRow = B_rowMap->getGlobalElement (localRow);
6823  if (B_numEntries > static_cast<size_t> (ind.size ())) {
6824  ind.resize (B_numEntries);
6825  val.resize (B_numEntries);
6826  }
6827  ArrayView<GO> indView = ind (0, B_numEntries);
6828  ArrayView<Scalar> valView = val (0, B_numEntries);
6829  B.getGlobalRowCopy (globalRow, indView, valView, B_numEntries);
6830 
6831  if (beta != ONE) {
6832  for (size_t k = 0; k < B_numEntries; ++k) {
6833  valView[k] *= beta;
6834  }
6835  }
6836  C->insertGlobalValues (globalRow, indView, valView);
6837  }
6838  }
6839 
6840  if (callFillComplete) {
6841  if (fillCompleteSublist.is_null ()) {
6842  C->fillComplete (theDomainMap, theRangeMap);
6843  } else {
6844  C->fillComplete (theDomainMap, theRangeMap, fillCompleteSublist);
6845  }
6846  }
6847  return rcp_implicit_cast<row_matrix_type> (C);
6848  }
6849 
6850  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
6851  void
6854  const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
6855  const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > & domainTransfer,
6856  const Teuchos::RCP<const map_type>& domainMap,
6857  const Teuchos::RCP<const map_type>& rangeMap,
6858  const Teuchos::RCP<Teuchos::ParameterList>& params) const
6859  {
6861  using Teuchos::ArrayRCP;
6862  using Teuchos::ArrayView;
6863  using Teuchos::Comm;
6864  using Teuchos::ParameterList;
6865  using Teuchos::RCP;
6866  typedef LocalOrdinal LO;
6867  typedef GlobalOrdinal GO;
6868  typedef node_type NT;
6870  typedef Vector<int, LO, GO, NT> IntVectorType;
6871 
6872 #ifdef HAVE_TPETRA_MMM_TIMINGS
6873  std::string label;
6874  if(!params.is_null())
6875  label = params->get("Timer Label",label);
6876  std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
6877  using Teuchos::TimeMonitor;
6878  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC Pack-1"))));
6879 #endif
6880 
6881  // Make sure that the input argument rowTransfer is either an
6882  // Import or an Export. Import and Export are the only two
6883  // subclasses of Transfer that we defined, but users might
6884  // (unwisely, for now at least) decide to implement their own
6885  // subclasses. Exclude this possibility.
6886  const import_type* xferAsImport = dynamic_cast<const import_type*> (&rowTransfer);
6887  const export_type* xferAsExport = dynamic_cast<const export_type*> (&rowTransfer);
6888  TEUCHOS_TEST_FOR_EXCEPTION(
6889  xferAsImport == NULL && xferAsExport == NULL, std::invalid_argument,
6890  "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' input "
6891  "argument must be either an Import or an Export, and its template "
6892  "parameters must match the corresponding template parameters of the "
6893  "CrsMatrix.");
6894 
6895  // Make sure that the input argument domainTransfer is either an
6896  // Import or an Export. Import and Export are the only two
6897  // subclasses of Transfer that we defined, but users might
6898  // (unwisely, for now at least) decide to implement their own
6899  // subclasses. Exclude this possibility.
6900  Teuchos::RCP<const import_type> xferDomainAsImport = Teuchos::rcp_dynamic_cast<const import_type> (domainTransfer);
6901  Teuchos::RCP<const export_type> xferDomainAsExport = Teuchos::rcp_dynamic_cast<const export_type> (domainTransfer);
6902 
6903  if(! domainTransfer.is_null()) {
6904  TEUCHOS_TEST_FOR_EXCEPTION(
6905  (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
6906  "Tpetra::CrsMatrix::transferAndFillComplete: The 'domainTransfer' input "
6907  "argument must be either an Import or an Export, and its template "
6908  "parameters must match the corresponding template parameters of the "
6909  "CrsMatrix.");
6910 
6911  TEUCHOS_TEST_FOR_EXCEPTION(
6912  ( xferAsImport != NULL || ! xferDomainAsImport.is_null() ) &&
6913  (( xferAsImport != NULL && xferDomainAsImport.is_null() ) ||
6914  ( xferAsImport == NULL && ! xferDomainAsImport.is_null() )), std::invalid_argument,
6915  "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
6916  "arguments must be of the same type (either Import or Export).");
6917 
6918  TEUCHOS_TEST_FOR_EXCEPTION(
6919  ( xferAsExport != NULL || ! xferDomainAsExport.is_null() ) &&
6920  (( xferAsExport != NULL && xferDomainAsExport.is_null() ) ||
6921  ( xferAsExport == NULL && ! xferDomainAsExport.is_null() )), std::invalid_argument,
6922  "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
6923  "arguments must be of the same type (either Import or Export).");
6924  } // domainTransfer != null
6925 
6926 
6927  // FIXME (mfh 15 May 2014) Wouldn't communication still be needed,
6928  // if the source Map is not distributed but the target Map is?
6929  const bool communication_needed = rowTransfer.getSourceMap ()->isDistributed ();
6930 
6931  //
6932  // Get the caller's parameters
6933  //
6934 
6935  bool reverseMode = false; // Are we in reverse mode?
6936  bool restrictComm = false; // Do we need to restrict the communicator?
6937  RCP<ParameterList> matrixparams; // parameters for the destination matrix
6938  if (! params.is_null ()) {
6939  reverseMode = params->get ("Reverse Mode", reverseMode);
6940  restrictComm = params->get ("Restrict Communicator", restrictComm);
6941  matrixparams = sublist (params, "CrsMatrix");
6942  }
6943 
6944  // Get the new domain and range Maps. We need some of them for
6945  // error checking, now that we have the reverseMode parameter.
6946  RCP<const map_type> MyRowMap = reverseMode ?
6947  rowTransfer.getSourceMap () : rowTransfer.getTargetMap ();
6948  RCP<const map_type> MyColMap; // create this below
6949  RCP<const map_type> MyDomainMap = ! domainMap.is_null () ?
6950  domainMap : getDomainMap ();
6951  RCP<const map_type> MyRangeMap = ! rangeMap.is_null () ?
6952  rangeMap : getRangeMap ();
6953  RCP<const map_type> BaseRowMap = MyRowMap;
6954  RCP<const map_type> BaseDomainMap = MyDomainMap;
6955 
6956  // If the user gave us a nonnull destMat, then check whether it's
6957  // "pristine." That means that it has no entries.
6958  //
6959  // FIXME (mfh 15 May 2014) If this is not true on all processes,
6960  // then this exception test may hang. It would be better to
6961  // forward an error flag to the next communication phase.
6962  if (! destMat.is_null ()) {
6963  // FIXME (mfh 15 May 2014): The classic Petra idiom for checking
6964  // whether a graph or matrix has no entries on the calling
6965  // process, is that it is neither locally nor globally indexed.
6966  // This may change eventually with the Kokkos refactor version
6967  // of Tpetra, so it would be better just to check the quantity
6968  // of interest directly. Note that with the Kokkos refactor
6969  // version of Tpetra, asking for the total number of entries in
6970  // a graph or matrix that is not fill complete might require
6971  // computation (kernel launch), since it is not thread scalable
6972  // to update a count every time an entry is inserted.
6973  const bool NewFlag = ! destMat->getGraph ()->isLocallyIndexed () &&
6974  ! destMat->getGraph ()->isGloballyIndexed ();
6975  TEUCHOS_TEST_FOR_EXCEPTION(
6976  ! NewFlag, std::invalid_argument, "Tpetra::CrsMatrix::"
6977  "transferAndFillComplete: The input argument 'destMat' is only allowed "
6978  "to be nonnull, if its graph is empty (neither locally nor globally "
6979  "indexed).");
6980  // FIXME (mfh 15 May 2014) At some point, we want to change
6981  // graphs and matrices so that their DistObject Map
6982  // (this->getMap()) may differ from their row Map. This will
6983  // make redistribution for 2-D distributions more efficient. I
6984  // hesitate to change this check, because I'm not sure how much
6985  // the code here depends on getMap() and getRowMap() being the
6986  // same.
6987  TEUCHOS_TEST_FOR_EXCEPTION(
6988  ! destMat->getRowMap ()->isSameAs (*MyRowMap), std::invalid_argument,
6989  "Tpetra::CrsMatrix::transferAndFillComplete: The (row) Map of the "
6990  "input argument 'destMat' is not the same as the (row) Map specified "
6991  "by the input argument 'rowTransfer'.");
6992  TEUCHOS_TEST_FOR_EXCEPTION(
6993  ! destMat->checkSizes (*this), std::invalid_argument,
6994  "Tpetra::CrsMatrix::transferAndFillComplete: You provided a nonnull "
6995  "destination matrix, but checkSizes() indicates that it is not a legal "
6996  "legal target for redistribution from the source matrix (*this). This "
6997  "may mean that they do not have the same dimensions.");
6998  }
6999 
7000  // If forward mode (the default), then *this's (row) Map must be
7001  // the same as the source Map of the Transfer. If reverse mode,
7002  // then *this's (row) Map must be the same as the target Map of
7003  // the Transfer.
7004  //
7005  // FIXME (mfh 15 May 2014) At some point, we want to change graphs
7006  // and matrices so that their DistObject Map (this->getMap()) may
7007  // differ from their row Map. This will make redistribution for
7008  // 2-D distributions more efficient. I hesitate to change this
7009  // check, because I'm not sure how much the code here depends on
7010  // getMap() and getRowMap() being the same.
7011  TEUCHOS_TEST_FOR_EXCEPTION(
7012  ! (reverseMode || getRowMap ()->isSameAs (*rowTransfer.getSourceMap ())),
7013  std::invalid_argument, "Tpetra::CrsMatrix::transferAndFillComplete: "
7014  "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
7015  TEUCHOS_TEST_FOR_EXCEPTION(
7016  ! (! reverseMode || getRowMap ()->isSameAs (*rowTransfer.getTargetMap ())),
7017  std::invalid_argument, "Tpetra::CrsMatrix::transferAndFillComplete: "
7018  "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
7019 
7020  // checks for domainTransfer
7021  TEUCHOS_TEST_FOR_EXCEPTION(
7022  ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
7023  std::invalid_argument,
7024  "Tpetra::CrsMatrix::transferAndFillComplete: The target map of the 'domainTransfer' input "
7025  "argument must be the same as the rebalanced domain map 'domainMap'");
7026 
7027  TEUCHOS_TEST_FOR_EXCEPTION(
7028  ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
7029  std::invalid_argument,
7030  "Tpetra::CrsMatrix::transferAndFillComplete: The source map of the 'domainTransfer' input "
7031  "argument must be the same as the rebalanced domain map 'domainMap'");
7032 
7033  // The basic algorithm here is:
7034  //
7035  // 1. Call the moral equivalent of "distor.do" to handle the import.
7036  // 2. Copy all the Imported and Copy/Permuted data into the raw
7037  // CrsMatrix / CrsGraphData pointers, still using GIDs.
7038  // 3. Call an optimized version of MakeColMap that avoids the
7039  // Directory lookups (since the importer knows who owns all the
7040  // GIDs) AND reindexes to LIDs.
7041  // 4. Call expertStaticFillComplete()
7042 
7043  // Get information from the Importer
7044  const size_t NumSameIDs = rowTransfer.getNumSameIDs();
7045  ArrayView<const LO> ExportLIDs = reverseMode ?
7046  rowTransfer.getRemoteLIDs () : rowTransfer.getExportLIDs ();
7047  ArrayView<const LO> RemoteLIDs = reverseMode ?
7048  rowTransfer.getExportLIDs () : rowTransfer.getRemoteLIDs ();
7049  ArrayView<const LO> PermuteToLIDs = reverseMode ?
7050  rowTransfer.getPermuteFromLIDs () : rowTransfer.getPermuteToLIDs ();
7051  ArrayView<const LO> PermuteFromLIDs = reverseMode ?
7052  rowTransfer.getPermuteToLIDs () : rowTransfer.getPermuteFromLIDs ();
7053  Distributor& Distor = rowTransfer.getDistributor ();
7054 
7055  // Owning PIDs
7056  Teuchos::Array<int> SourcePids;
7057  Teuchos::Array<int> TargetPids;
7058  int MyPID = getComm ()->getRank ();
7059 
7060  // Temp variables for sub-communicators
7061  RCP<const map_type> ReducedRowMap, ReducedColMap,
7062  ReducedDomainMap, ReducedRangeMap;
7063  RCP<const Comm<int> > ReducedComm;
7064 
7065  // If the user gave us a null destMat, then construct the new
7066  // destination matrix. We will replace its column Map later.
7067  if (destMat.is_null ()) {
7068  destMat = rcp (new this_type (MyRowMap, 0, StaticProfile, matrixparams));
7069  }
7070 
7071  /***************************************************/
7072  /***** 1) First communicator restriction phase ****/
7073  /***************************************************/
7074  if (restrictComm) {
7075  ReducedRowMap = MyRowMap->removeEmptyProcesses ();
7076  ReducedComm = ReducedRowMap.is_null () ?
7077  Teuchos::null :
7078  ReducedRowMap->getComm ();
7079  destMat->removeEmptyProcessesInPlace (ReducedRowMap);
7080 
7081  ReducedDomainMap = MyRowMap.getRawPtr () == MyDomainMap.getRawPtr () ?
7082  ReducedRowMap :
7083  MyDomainMap->replaceCommWithSubset (ReducedComm);
7084  ReducedRangeMap = MyRowMap.getRawPtr () == MyRangeMap.getRawPtr () ?
7085  ReducedRowMap :
7086  MyRangeMap->replaceCommWithSubset (ReducedComm);
7087 
7088  // Reset the "my" maps
7089  MyRowMap = ReducedRowMap;
7090  MyDomainMap = ReducedDomainMap;
7091  MyRangeMap = ReducedRangeMap;
7092 
7093  // Update my PID, if we've restricted the communicator
7094  if (! ReducedComm.is_null ()) {
7095  MyPID = ReducedComm->getRank ();
7096  }
7097  else {
7098  MyPID = -2; // For debugging
7099  }
7100  }
7101  else {
7102  ReducedComm = MyRowMap->getComm ();
7103  }
7104 
7105  /***************************************************/
7106  /***** 2) From Tpera::DistObject::doTransfer() ****/
7107  /***************************************************/
7108 #ifdef HAVE_TPETRA_MMM_TIMINGS
7109  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC ImportSetup"))));
7110 #endif
7111  // Get the owning PIDs
7112  RCP<const import_type> MyImporter = getGraph ()->getImporter ();
7113 
7114  // check whether domain maps of source matrix and base domain map is the same
7115  bool bSameDomainMap = BaseDomainMap->isSameAs (*getDomainMap ());
7116 
7117  if (! restrictComm && ! MyImporter.is_null () && bSameDomainMap ) {
7118  // Same domain map as source matrix
7119  //
7120  // NOTE: This won't work for restrictComm (because the Import
7121  // doesn't know the restricted PIDs), though writing an
7122  // optimized version for that case would be easy (Import an
7123  // IntVector of the new PIDs). Might want to add this later.
7124  Import_Util::getPids (*MyImporter, SourcePids, false);
7125  }
7126  else if (restrictComm && ! MyImporter.is_null () && bSameDomainMap) {
7127  // Same domain map as source matrix (restricted communicator)
7128  // We need one import from the domain to the column map
7129  IntVectorType SourceDomain_pids(getDomainMap (),true);
7130  IntVectorType SourceCol_pids(getColMap());
7131  // SourceDomain_pids contains the restricted pids
7132  SourceDomain_pids.putScalar(MyPID);
7133 
7134  SourceCol_pids.doImport (SourceDomain_pids, *MyImporter, INSERT);
7135  SourcePids.resize (getColMap ()->getNodeNumElements ());
7136  SourceCol_pids.get1dCopy (SourcePids ());
7137  }
7138  else if (MyImporter.is_null () && bSameDomainMap) {
7139  // Matrix has no off-process entries
7140  SourcePids.resize (getColMap ()->getNodeNumElements ());
7141  SourcePids.assign (getColMap ()->getNodeNumElements (), MyPID);
7142  }
7143  else if ( ! MyImporter.is_null () &&
7144  ! domainTransfer.is_null () ) {
7145  // general implementation for rectangular matrices with
7146  // domain map different than SourceMatrix domain map.
7147  // User has to provide a DomainTransfer object. We need
7148  // to communications (import/export)
7149 
7150  // TargetDomain_pids lives on the rebalanced new domain map
7151  IntVectorType TargetDomain_pids (domainMap);
7152  TargetDomain_pids.putScalar (MyPID);
7153 
7154  // SourceDomain_pids lives on the non-rebalanced old domain map
7155  IntVectorType SourceDomain_pids (getDomainMap ());
7156 
7157  // SourceCol_pids lives on the non-rebalanced old column map
7158  IntVectorType SourceCol_pids (getColMap ());
7159 
7160  if (! reverseMode && ! xferDomainAsImport.is_null() ) {
7161  SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsImport, INSERT);
7162  }
7163  else if (reverseMode && ! xferDomainAsExport.is_null() ) {
7164  SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsExport, INSERT);
7165  }
7166  else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
7167  SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsExport, INSERT);
7168  }
7169  else if (reverseMode && ! xferDomainAsImport.is_null() ) {
7170  SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsImport, INSERT);
7171  }
7172  else {
7173  TEUCHOS_TEST_FOR_EXCEPTION(
7174  true, std::logic_error, "Tpetra::CrsMatrix::"
7175  "transferAndFillComplete: Should never get here! "
7176  "Please report this bug to a Tpetra developer.");
7177  }
7178  SourceCol_pids.doImport (SourceDomain_pids, *MyImporter, INSERT);
7179  SourcePids.resize (getColMap ()->getNodeNumElements ());
7180  SourceCol_pids.get1dCopy (SourcePids ());
7181  }
7182  else if (BaseDomainMap->isSameAs (*BaseRowMap) &&
7183  getDomainMap ()->isSameAs (*getRowMap ())) {
7184  // We can use the rowTransfer + SourceMatrix's Import to find out who owns what.
7185  IntVectorType TargetRow_pids (domainMap);
7186  IntVectorType SourceRow_pids (getRowMap ());
7187  IntVectorType SourceCol_pids (getColMap ());
7188 
7189  TargetRow_pids.putScalar (MyPID);
7190  if (! reverseMode && xferAsImport != NULL) {
7191  SourceRow_pids.doExport (TargetRow_pids, *xferAsImport, INSERT);
7192  }
7193  else if (reverseMode && xferAsExport != NULL) {
7194  SourceRow_pids.doExport (TargetRow_pids, *xferAsExport, INSERT);
7195  }
7196  else if (! reverseMode && xferAsExport != NULL) {
7197  SourceRow_pids.doImport (TargetRow_pids, *xferAsExport, INSERT);
7198  }
7199  else if (reverseMode && xferAsImport != NULL) {
7200  SourceRow_pids.doImport (TargetRow_pids, *xferAsImport, INSERT);
7201  }
7202  else {
7203  TEUCHOS_TEST_FOR_EXCEPTION(
7204  true, std::logic_error, "Tpetra::CrsMatrix::"
7205  "transferAndFillComplete: Should never get here! "
7206  "Please report this bug to a Tpetra developer.");
7207  }
7208  SourceCol_pids.doImport (SourceRow_pids, *MyImporter, INSERT);
7209  SourcePids.resize (getColMap ()->getNodeNumElements ());
7210  SourceCol_pids.get1dCopy (SourcePids ());
7211  }
7212  else {
7213  TEUCHOS_TEST_FOR_EXCEPTION(
7214  true, std::invalid_argument, "Tpetra::CrsMatrix::"
7215  "transferAndFillComplete: This method only allows either domainMap == "
7216  "getDomainMap (), or (domainMap == rowTransfer.getTargetMap () and "
7217  "getDomainMap () == getRowMap ()).");
7218  }
7219 #ifdef HAVE_TPETRA_MMM_TIMINGS
7220  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC Pack-2"))));
7221 #endif
7222 
7223  // Tpetra-specific stuff
7224  size_t constantNumPackets = destMat->constantNumberOfPackets ();
7225  if (constantNumPackets == 0) {
7226  // FIXME (mfh 25 Apr 2016) Once we've finished fixing #227, we
7227  // may be able to remove these fences that protect allocations.
7228  execution_space::fence ();
7229  destMat->numExportPacketsPerLID_ =
7230  decltype (destMat->numExportPacketsPerLID_) ("numExportPacketsPerLID",
7231  ExportLIDs.size ());
7232  execution_space::fence ();
7233  destMat->numImportPacketsPerLID_ =
7234  decltype (destMat->numImportPacketsPerLID_) ("numImportPacketsPerLID",
7235  RemoteLIDs.size ());
7236  execution_space::fence ();
7237  }
7238  else {
7239  // There are a constant number of packets per element. We
7240  // already know (from the number of "remote" (incoming)
7241  // elements) how many incoming elements we expect, so we can
7242  // resize the buffer accordingly.
7243  const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
7244  destMat->reallocImportsIfNeeded (rbufLen);
7245  }
7246 
7247  // Pack & Prepare w/ owning PIDs
7248 #ifdef HAVE_TPETRA_DEBUG
7249  {
7250  using Teuchos::outArg;
7251  using Teuchos::REDUCE_MAX;
7252  using Teuchos::reduceAll;
7253  using std::cerr;
7254  using std::endl;
7255  RCP<const Teuchos::Comm<int> > comm = this->getComm ();
7256  const int myRank = comm->getRank ();
7257  const int numProcs = comm->getSize ();
7258 
7259  std::ostringstream os;
7260  int lclErr = 0;
7261  try {
7262  // packAndPrepare* methods modify numExportPacketsPerLID_.
7263  destMat->numExportPacketsPerLID_.template modify<Kokkos::HostSpace> ();
7264  Teuchos::ArrayView<size_t> numExportPacketsPerLID =
7265  getArrayViewFromDualView (destMat->numExportPacketsPerLID_);
7266  Import_Util::packAndPrepareWithOwningPIDs (*this, ExportLIDs,
7267  destMat->exports_,
7268  numExportPacketsPerLID,
7269  constantNumPackets, Distor,
7270  SourcePids);
7271  }
7272  catch (std::exception& e) {
7273  os << "Proc " << myRank << ": " << e.what ();
7274  lclErr = 1;
7275  }
7276  int gblErr = 0;
7277  if (! comm.is_null ()) {
7278  reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
7279  }
7280  if (gblErr != 0) {
7281  if (myRank == 0) {
7282  cerr << "packAndPrepareWithOwningPIDs threw an exception: " << endl;
7283  }
7284  std::ostringstream err;
7285  for (int r = 0; r < numProcs; ++r) {
7286  if (r == myRank && lclErr != 0) {
7287  cerr << os.str () << endl;
7288  }
7289  comm->barrier ();
7290  comm->barrier ();
7291  comm->barrier ();
7292  }
7293 
7294  TEUCHOS_TEST_FOR_EXCEPTION(
7295  true, std::logic_error, "packAndPrepareWithOwningPIDs threw an "
7296  "exception.");
7297  }
7298  }
7299 
7300 #else
7301  {
7302  // packAndPrepare* methods modify numExportPacketsPerLID_.
7303  destMat->numExportPacketsPerLID_.template modify<Kokkos::HostSpace> ();
7304  Teuchos::ArrayView<size_t> numExportPacketsPerLID =
7305  getArrayViewFromDualView (destMat->numExportPacketsPerLID_);
7306  Import_Util::packAndPrepareWithOwningPIDs (*this, ExportLIDs,
7307  destMat->exports_,
7308  numExportPacketsPerLID,
7309  constantNumPackets, Distor,
7310  SourcePids);
7311  }
7312 #endif // HAVE_TPETRA_DEBUG
7313 
7314  // Do the exchange of remote data.
7315 #ifdef HAVE_TPETRA_MMM_TIMINGS
7316  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC Transfer"))));
7317 #endif
7318 
7319  if (communication_needed) {
7320  if (reverseMode) {
7321  if (constantNumPackets == 0) { // variable number of packets per LID
7322  // Make sure that host has the latest version, since we're
7323  // using the version on host. If host has the latest
7324  // version, syncing to host does nothing.
7325  destMat->numExportPacketsPerLID_.template sync<Kokkos::HostSpace> ();
7326  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7327  getArrayViewFromDualView (destMat->numExportPacketsPerLID_);
7328  destMat->numImportPacketsPerLID_.template sync<Kokkos::HostSpace> ();
7329  Teuchos::ArrayView<size_t> numImportPacketsPerLID =
7330  getArrayViewFromDualView (destMat->numImportPacketsPerLID_);
7331  Distor.doReversePostsAndWaits (numExportPacketsPerLID, 1,
7332  numImportPacketsPerLID);
7333  size_t totalImportPackets = 0;
7334  for (Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
7335  totalImportPackets += numImportPacketsPerLID[i];
7336  }
7337 
7338  // Reallocation MUST go before setting the modified flag,
7339  // because it may clear out the flags.
7340  destMat->reallocImportsIfNeeded (totalImportPackets);
7341  destMat->imports_.template modify<Kokkos::HostSpace> ();
7342  Teuchos::ArrayView<char> hostImports =
7343  getArrayViewFromDualView (destMat->imports_);
7344  // This is a legacy host pack/unpack path, so use the host
7345  // version of exports_.
7346  destMat->exports_.template sync<Kokkos::HostSpace> ();
7347  Teuchos::ArrayView<const char> hostExports =
7348  getArrayViewFromDualView (destMat->exports_);
7349  Distor.doReversePostsAndWaits (hostExports,
7350  numExportPacketsPerLID,
7351  hostImports,
7352  numImportPacketsPerLID);
7353  }
7354  else { // constant number of packets per LI
7355  destMat->imports_.template modify<Kokkos::HostSpace> ();
7356  Teuchos::ArrayView<char> hostImports =
7357  getArrayViewFromDualView (destMat->imports_);
7358  // This is a legacy host pack/unpack path, so use the host
7359  // version of exports_.
7360  destMat->exports_.template sync<Kokkos::HostSpace> ();
7361  Teuchos::ArrayView<const char> hostExports =
7362  getArrayViewFromDualView (destMat->exports_);
7363  Distor.doReversePostsAndWaits (hostExports,
7364  constantNumPackets,
7365  hostImports);
7366  }
7367  }
7368  else { // forward mode (the default)
7369  if (constantNumPackets == 0) { // variable number of packets per LID
7370  // Make sure that host has the latest version, since we're
7371  // using the version on host. If host has the latest
7372  // version, syncing to host does nothing.
7373  destMat->numExportPacketsPerLID_.template sync<Kokkos::HostSpace> ();
7374  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7375  getArrayViewFromDualView (destMat->numExportPacketsPerLID_);
7376  destMat->numImportPacketsPerLID_.template sync<Kokkos::HostSpace> ();
7377  Teuchos::ArrayView<size_t> numImportPacketsPerLID =
7378  getArrayViewFromDualView (destMat->numImportPacketsPerLID_);
7379  Distor.doPostsAndWaits (numExportPacketsPerLID, 1,
7380  numImportPacketsPerLID);
7381  size_t totalImportPackets = 0;
7382  for (Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
7383  totalImportPackets += numImportPacketsPerLID[i];
7384  }
7385 
7386  // Reallocation MUST go before setting the modified flag,
7387  // because it may clear out the flags.
7388  destMat->reallocImportsIfNeeded (totalImportPackets);
7389  destMat->imports_.template modify<Kokkos::HostSpace> ();
7390  Teuchos::ArrayView<char> hostImports =
7391  getArrayViewFromDualView (destMat->imports_);
7392  // This is a legacy host pack/unpack path, so use the host
7393  // version of exports_.
7394  destMat->exports_.template sync<Kokkos::HostSpace> ();
7395  Teuchos::ArrayView<const char> hostExports =
7396  getArrayViewFromDualView (destMat->exports_);
7397  Distor.doPostsAndWaits (hostExports,
7398  numExportPacketsPerLID,
7399  hostImports,
7400  numImportPacketsPerLID);
7401  }
7402  else { // constant number of packets per LID
7403  destMat->imports_.template modify<Kokkos::HostSpace> ();
7404  Teuchos::ArrayView<char> hostImports =
7405  getArrayViewFromDualView (destMat->imports_);
7406  // This is a legacy host pack/unpack path, so use the host
7407  // version of exports_.
7408  destMat->exports_.template sync<Kokkos::HostSpace> ();
7409  Teuchos::ArrayView<const char> hostExports =
7410  getArrayViewFromDualView (destMat->exports_);
7411  Distor.doPostsAndWaits (hostExports,
7412  constantNumPackets,
7413  hostImports);
7414  }
7415  }
7416  }
7417 
7418  /*********************************************************************/
7419  /**** 3) Copy all of the Same/Permute/Remote data into CSR_arrays ****/
7420  /*********************************************************************/
7421 
7422 #ifdef HAVE_TPETRA_MMM_TIMINGS
7423  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC Unpack-1"))));
7424 #endif
7425 
7426  // Backwards compatibility measure. We'll use this again below.
7427  destMat->numImportPacketsPerLID_.template sync<Kokkos::HostSpace> ();
7428  Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
7429  getArrayViewFromDualView (destMat->numImportPacketsPerLID_);
7430  destMat->imports_.template sync<Kokkos::HostSpace> ();
7431  Teuchos::ArrayView<const char> hostImports =
7432  getArrayViewFromDualView (destMat->imports_);
7433  size_t mynnz =
7434  Import_Util::unpackAndCombineWithOwningPIDsCount (*this, RemoteLIDs,
7435  hostImports,
7436  numImportPacketsPerLID,
7437  constantNumPackets,
7438  Distor, INSERT,
7439  NumSameIDs,
7440  PermuteToLIDs,
7441  PermuteFromLIDs);
7442  size_t N = BaseRowMap->getNodeNumElements ();
7443 
7444  // Allocations
7445  ArrayRCP<size_t> CSR_rowptr(N+1);
7446  ArrayRCP<GO> CSR_colind_GID;
7447  ArrayRCP<LO> CSR_colind_LID;
7448  ArrayRCP<Scalar> CSR_vals;
7449  CSR_colind_GID.resize (mynnz);
7450  CSR_vals.resize (mynnz);
7451 
7452  // If LO and GO are the same, we can reuse memory when
7453  // converting the column indices from global to local indices.
7454  if (typeid (LO) == typeid (GO)) {
7455  CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO> (CSR_colind_GID);
7456  }
7457  else {
7458  CSR_colind_LID.resize (mynnz);
7459  }
7460 
7461  // FIXME (mfh 15 May 2014) Why can't we abstract this out as an
7462  // unpackAndCombine method on a "CrsArrays" object? This passing
7463  // in a huge list of arrays is icky. Can't we have a bit of an
7464  // abstraction? Implementing a concrete DistObject subclass only
7465  // takes five methods.
7466  Import_Util::unpackAndCombineIntoCrsArrays (*this, RemoteLIDs, hostImports,
7467  numImportPacketsPerLID,
7468  constantNumPackets, Distor, INSERT, NumSameIDs,
7469  PermuteToLIDs, PermuteFromLIDs, N, mynnz, MyPID,
7470  CSR_rowptr (), CSR_colind_GID (), CSR_vals (),
7471  SourcePids (), TargetPids);
7472 
7473  /**************************************************************/
7474  /**** 4) Call Optimized MakeColMap w/ no Directory Lookups ****/
7475  /**************************************************************/
7476 #ifdef HAVE_TPETRA_MMM_TIMINGS
7477  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC Unpack-2"))));
7478 #endif
7479  // Call an optimized version of makeColMap that avoids the
7480  // Directory lookups (since the Import object knows who owns all
7481  // the GIDs).
7482  Teuchos::Array<int> RemotePids;
7483  Import_Util::lowCommunicationMakeColMapAndReindex (CSR_rowptr (),
7484  CSR_colind_LID (),
7485  CSR_colind_GID (),
7486  BaseDomainMap,
7487  TargetPids, RemotePids,
7488  MyColMap);
7489 
7490  /*******************************************************/
7491  /**** 4) Second communicator restriction phase ****/
7492  /*******************************************************/
7493  if (restrictComm) {
7494  ReducedColMap = (MyRowMap.getRawPtr () == MyColMap.getRawPtr ()) ?
7495  ReducedRowMap :
7496  MyColMap->replaceCommWithSubset (ReducedComm);
7497  MyColMap = ReducedColMap; // Reset the "my" maps
7498  }
7499 
7500  // Replace the col map
7501  destMat->replaceColMap (MyColMap);
7502 
7503  // Short circuit if the processor is no longer in the communicator
7504  //
7505  // NOTE: Epetra replaces modifies all "removed" processes so they
7506  // have a dummy (serial) Map that doesn't touch the original
7507  // communicator. Duplicating that here might be a good idea.
7508  if (ReducedComm.is_null ()) {
7509  return;
7510  }
7511 
7512  /***************************************************/
7513  /**** 5) Sort ****/
7514  /***************************************************/
7515 #ifdef HAVE_TPETRA_MMM_TIMINGS
7516  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC Unpack-3"))));
7517 #endif
7518  if ((! reverseMode && xferAsImport != NULL) ||
7519  (reverseMode && xferAsExport != NULL)) {
7520  Import_Util::sortCrsEntries (CSR_rowptr (),
7521  CSR_colind_LID (),
7522  CSR_vals ());
7523  }
7524  else if ((! reverseMode && xferAsExport != NULL) ||
7525  (reverseMode && xferAsImport != NULL)) {
7526  Import_Util::sortAndMergeCrsEntries (CSR_rowptr (),
7527  CSR_colind_LID (),
7528  CSR_vals ());
7529  if (CSR_rowptr[N] != mynnz) {
7530  CSR_colind_LID.resize (CSR_rowptr[N]);
7531  CSR_vals.resize (CSR_rowptr[N]);
7532  }
7533  }
7534  else {
7535  TEUCHOS_TEST_FOR_EXCEPTION(
7536  true, std::logic_error, "Tpetra::CrsMatrix::"
7537  "transferAndFillComplete: Should never get here! "
7538  "Please report this bug to a Tpetra developer.");
7539  }
7540  /***************************************************/
7541  /**** 6) Reset the colmap and the arrays ****/
7542  /***************************************************/
7543 
7544  // Call constructor for the new matrix (restricted as needed)
7545  //
7546  // NOTE (mfh 15 May 2014) This should work fine for the Kokkos
7547  // refactor version of CrsMatrix, though it reserves the right to
7548  // make a deep copy of the arrays.
7549  destMat->setAllValues (CSR_rowptr, CSR_colind_LID, CSR_vals);
7550 
7551  /***************************************************/
7552  /**** 7) Build Importer & Call ESFC ****/
7553  /***************************************************/
7554  // Pre-build the importer using the existing PIDs
7555  Teuchos::ParameterList esfc_params;
7556 #ifdef HAVE_TPETRA_MMM_TIMINGS
7557  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC CreateImporter"))));
7558 #endif
7559  RCP<import_type> MyImport = rcp (new import_type (MyDomainMap, MyColMap, RemotePids));
7560 #ifdef HAVE_TPETRA_MMM_TIMINGS
7561  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC ESFC"))));
7562 
7563  esfc_params.set("Timer Label",prefix + std::string("TAFC"));
7564 #endif
7565  if(!params.is_null())
7566  esfc_params.set("compute global constants",params->get("compute global constants",true));
7567 
7568  destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap, MyImport,Teuchos::null,rcp(&esfc_params,false));
7569  }
7570 
7571  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
7572  void
7575  const import_type& importer,
7576  const Teuchos::RCP<const map_type>& domainMap,
7577  const Teuchos::RCP<const map_type>& rangeMap,
7578  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7579  {
7580  transferAndFillComplete (destMatrix, importer, Teuchos::null, domainMap, rangeMap, params);
7581  }
7582 
7583  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
7584  void
7587  const import_type& rowImporter,
7588  const import_type& domainImporter,
7589  const Teuchos::RCP<const map_type>& domainMap,
7590  const Teuchos::RCP<const map_type>& rangeMap,
7591  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7592  {
7593  transferAndFillComplete (destMatrix, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
7594  }
7595 
7596  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
7597  void
7600  const export_type& exporter,
7601  const Teuchos::RCP<const map_type>& domainMap,
7602  const Teuchos::RCP<const map_type>& rangeMap,
7603  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7604  {
7605  transferAndFillComplete (destMatrix, exporter, Teuchos::null, domainMap, rangeMap, params);
7606  }
7607 
7608  template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node, const bool classic>
7609  void
7612  const export_type& rowExporter,
7613  const export_type& domainExporter,
7614  const Teuchos::RCP<const map_type>& domainMap,
7615  const Teuchos::RCP<const map_type>& rangeMap,
7616  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7617  {
7618  transferAndFillComplete (destMatrix, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
7619  }
7620 
7621 } // namespace Tpetra
7622 
7623 //
7624 // Explicit instantiation macro
7625 //
7626 // Must be expanded from within the Tpetra namespace!
7627 //
7628 
7629 #define TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR,LO,GO,NODE) \
7630  \
7631  template class CrsMatrix< SCALAR , LO , GO , NODE >; \
7632  template Teuchos::RCP< CrsMatrix< SCALAR , LO , GO , NODE > > \
7633  CrsMatrix< SCALAR , LO , GO , NODE >::convert< SCALAR > () const;
7634 
7635 #define TPETRA_CRSMATRIX_CONVERT_INSTANT(SO,SI,LO,GO,NODE) \
7636  \
7637  template Teuchos::RCP< CrsMatrix< SO , LO , GO , NODE > > \
7638  CrsMatrix< SI , LO , GO , NODE >::convert< SO > () const;
7639 
7640 #define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
7641  template<> \
7642  Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
7643  importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
7644  const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7645  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7646  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& importer, \
7647  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7648  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7649  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
7650  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7651  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7652  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
7653  const Teuchos::RCP<Teuchos::ParameterList>& params);
7654 
7655 #define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
7656  template<> \
7657  Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
7658  importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
7659  const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7660  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7661  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowImporter, \
7662  const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7663  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7664  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainImporter, \
7665  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7666  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7667  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
7668  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7669  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7670  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
7671  const Teuchos::RCP<Teuchos::ParameterList>& params);
7672 
7673 
7674 #define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
7675  template<> \
7676  Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
7677  exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
7678  const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7679  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7680  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& exporter, \
7681  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7682  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7683  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
7684  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7685  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7686  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
7687  const Teuchos::RCP<Teuchos::ParameterList>& params);
7688 
7689 #define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
7690  template<> \
7691  Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
7692  exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
7693  const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7694  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7695  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowExporter, \
7696  const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7697  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7698  CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainExporter, \
7699  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7700  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7701  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
7702  const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
7703  CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
7704  CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
7705  const Teuchos::RCP<Teuchos::ParameterList>& params);
7706 
7707 
7708 #define TPETRA_CRSMATRIX_INSTANT(SCALAR, LO, GO ,NODE) \
7709  TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR, LO, GO, NODE) \
7710  TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
7711  TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
7712  TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
7713  TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE)
7714 
7715 #endif // TPETRA_CRSMATRIX_DEF_HPP
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
ProfileType getProfileType() const
Returns true if the matrix was allocated with static data structures.
void doPostsAndWaits(const Teuchos::ArrayView< const Packet > &exports, size_t numPackets, const Teuchos::ArrayView< Packet > &imports)
Execute the (forward) communication plan.
LocalOrdinal replaceGlobalValues(const GlobalOrdinal globalRow, const typename UnmanagedView< GlobalIndicesViewType >::type &inputInds, const typename UnmanagedView< ImplScalarViewType >::type &inputVals) const
Replace one or more entries&#39; values, using global indices.
void getGlobalRowView(GlobalOrdinal GlobalRow, Teuchos::ArrayView< const GlobalOrdinal > &indices, Teuchos::ArrayView< const Scalar > &values) const
Get a constant, nonpersisting view of a row of this matrix, using global row and column indices...
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
void reindexColumns(crs_graph_type *const graph, const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
ProfileType getProfileType() const
Returns true if the graph was allocated with static data structures.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Functions for unpacking the entries of a Tpetra::CrsMatrix for communication, in the case where it is...
Functor for the the ABSMAX CombineMode of Import and Export operations.
size_t getNodeMaxNumRowEntries() const
Returns the maximum number of entries across all rows/columns on this node.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types...
virtual bool isLocallyIndexed() const =0
Whether matrix indices are locally indexed.
dual_view_type getDualView() const
Get the Kokkos::DualView which implements local storage.
std::string description() const
A one-line description of this object.
mag_type getFrobeniusNorm() const
Compute and return the Frobenius norm of the matrix.
LocalOrdinal local_ordinal_type
This class&#39; second template parameter; the type of local indices.
size_t getNodeNumEntries() const
The local number of entries in this matrix.
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
LocalOrdinal sumIntoLocalValues(const LocalOrdinal localRow, const typename UnmanagedView< LocalIndicesViewType >::type &inputInds, const typename UnmanagedView< ImplScalarViewType >::type &inputVals, const bool atomic=useAtomicUpdatesByDefault) const
Sum into one or more sparse matrix entries, using local row and column indices.
size_t getNodeNumEntries() const
Returns the local number of entries in the graph.
LocalOrdinal replaceLocalValues(const LocalOrdinal localRow, const typename UnmanagedView< LocalIndicesViewType >::type &inputInds, const typename UnmanagedView< ImplScalarViewType >::type &inputVals) const
Replace one or more entries&#39; values, using local row and column indices.
bool indicesAreSorted_
Whether the graph&#39;s indices are sorted in each row, on this process.
Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const
This matrix&#39;s graph, as a RowGraph.
global_size_t getGlobalNumDiags() const
Returns the number of global diagonal entries, based on global row/column index comparisons.
bool noRedundancies_
Whether the graph&#39;s indices are non-redundant (merged) in each row, on this process.
bool isFillActive() const
Whether the matrix is not fill complete.
bool isNodeGlobalElement(GlobalOrdinal globalIndex) const
Whether the given global index is owned by this Map on the calling process.
global_size_t getGlobalNumEntries() const
Returns the global number of entries in the graph.
Teuchos::RCP< CrsMatrix< T, LocalOrdinal, GlobalOrdinal, Node, classic > > convert() const
Return another CrsMatrix with the same entries, but converted to a different Scalar type T...
virtual void copyAndPermute(const SrcDistObject &source, size_t numSameIDs, const Teuchos::ArrayView< const LocalOrdinal > &permuteToLIDs, const Teuchos::ArrayView< const LocalOrdinal > &permuteFromLIDs)
Perform copies and permutations that are local to this process.
size_t getNodeNumDiags() const
Returns the number of local diagonal entries, based on global row/column index comparisons.
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Resume operations that may change the values or structure of the matrix.
Teuchos::RCP< const map_type > getRowMap() const
Returns the Map that describes the row distribution in this graph.
Teuchos::ArrayView< const impl_scalar_type > getView(RowInfo rowinfo) const
Constant view of all entries (including extra space) in the given row.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
global_size_t getGlobalNumEntries() const
The global number of entries in this matrix.
void gaussSeidelCopy(MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &X, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &B, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &D, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps, const bool zeroInitialGuess) const
Version of gaussSeidel(), with fewer requirements on X.
void rightScale(const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &x)
bool isLocallyComplete() const
Do all source Map indices on the calling process exist on at least one process (not necessarily this ...
Details::EStorageStatus storageStatus_
Status of the matrix&#39;s storage, when not in a fill-complete state.
Teuchos::ArrayView< impl_scalar_type > getViewNonConst(const RowInfo &rowinfo) const
Nonconst view of all entries (including extra space) in the given row.
size_t getNodeNumCols() const
The number of columns connected to the locally owned rows of this matrix.
Teuchos::RCP< const map_type > getRowMap() const
The Map that describes the row distribution in this matrix.
bool hasColMap() const
Indicates whether the matrix has a well-defined column map.
bool packCrsMatrix(const LocalMatrixType &lclMatrix, const LocalMapType &lclColMap, std::unique_ptr< std::string > &errStr, Teuchos::Array< char > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t &constantNumPackets, const Teuchos::ArrayView< const typename LocalMatrixType::ordinal_type > &exportLIDs, const int myRank, Distributor &)
Pack specified entries of the given local sparse matrix for communication.
Teuchos::RCP< const map_type > getRangeMap() const
Returns the Map associated with the domain of this graph.
One or more distributed dense vectors.
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
The current number of entries on the calling process in the specified local row.
size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const
Returns the current number of entries on this node in the specified global row.
Teuchos::RCP< node_type > getNode() const
The Kokkos Node instance.
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular...
global_size_t getGlobalNumRows() const
Returns the number of global rows in the graph.
void deep_copy(MultiVector< DS, DL, DG, DN, dstClassic > &dst, const MultiVector< SS, SL, SG, SN, srcClassic > &src)
Copy the contents of the MultiVector src into dst.
Teuchos::RCP< MV > getColumnMapMultiVector(const MV &X_domainMap, const bool force=false) const
Create a (or fetch a cached) column Map MultiVector.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with some verbosity level to an FancyOStream object.
size_t getNodeNumDiags() const
Returns the number of local diagonal entries, based on global row/column index comparisons.
void apply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
Compute a sparse matrix-MultiVector multiply.
size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const
Returns the current number of entries on this node in the specified global row.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the matrix&#39;s column Map with the given Map.
virtual bool checkSizes(const SrcDistObject &source)
Compare the source and target (this) objects for compatibility.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Returns the communicator.
size_t nodeNumEntries_
Local number of (populated) entries; must always be consistent.
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
Node node_type
This class&#39; fourth template parameter; the Kokkos device type.
bool fillComplete_
Whether the matrix is fill complete.
bool isLowerTriangular() const
Indicates whether the matrix is lower triangular.
Node::device_type device_type
The Kokkos device type.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRangeMap() const =0
The Map associated with the range of this operator, which must be compatible with Y...
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.
LocalOrdinal getLocalElement(GlobalOrdinal globalIndex) const
The local index corresponding to the given global index.
RowInfo getRowInfo(const LocalOrdinal myRow) const
Get information about the locally owned row with local index myRow.
virtual bool supportsRowViews() const
Return true if getLocalRowView() and getGlobalRowView() are valid for this object.
void getLocalDiagCopy(Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &diag) const
Get a copy of the diagonal entries of the matrix.
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
void doReversePostsAndWaits(const Teuchos::ArrayView< const Packet > &exports, size_t numPackets, const Teuchos::ArrayView< Packet > &imports)
Execute the reverse communication plan.
Functions for packing the entries of a Tpetra::CrsMatrix for communication, in the case where it is v...
size_t getNodeNumCols() const
Returns the number of columns connected to the locally owned rows of this graph.
Teuchos::RCP< node_type > getNode() const
Returns the underlying node.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap)
Remove processes owning zero rows from the Maps and their communicator.
device_type::execution_space execution_space
The Kokkos execution space.
mag_type frobNorm_
Cached Frobenius norm of the (global) matrix.
Implementation details of Tpetra.
void insertGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using global column indices.
void reduce()
Sum values of a locally replicated multivector across all processes.
Teuchos::RCP< const map_type > getDomainMap() const
Returns the Map associated with the domain of this graph.
void fillLocalMatrix(const Teuchos::RCP< Teuchos::ParameterList > &params)
Fill data into the local matrix.
size_t global_size_t
Global size_t object.
LocalOrdinal sumIntoGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals, const bool atomic=useAtomicUpdatesByDefault)
Sum into one or more sparse matrix entries, using global indices.
void merge2(IT1 &indResultOut, IT2 &valResultOut, IT1 indBeg, IT1 indEnd, IT2 valBeg, IT2 valEnd)
Merge values in place, additively, with the same index.
virtual void getGlobalRowCopy(GlobalOrdinal GlobalRow, const Teuchos::ArrayView< GlobalOrdinal > &Indices, const Teuchos::ArrayView< Scalar > &Values, size_t &NumEntries) const =0
Get a copy of the given global row&#39;s entries.
Kokkos::StaticCrsGraph< LocalOrdinal, Kokkos::LayoutLeft, execution_space > local_graph_type
The type of the part of the sparse graph on each MPI process.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
Insert new values that don&#39;t currently exist.
Teuchos::RCP< const import_type > getImporter() const
Returns the importer associated with this graph.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Creates a one-to-one version of the given Map where each GID is owned by only one process...
void exportAndFillComplete(Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > > &destMatrix, const export_type &exporter, const Teuchos::RCP< const map_type > &domainMap=Teuchos::null, const Teuchos::RCP< const map_type > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Export from this to the given destination matrix, and make the result fill complete.
global_size_t getGlobalNumCols() const
The number of global columns in the matrix.
bool haveGlobalConstants() const
Returns true if globalConstants have been computed; false otherwise.
bool isConstantStride() const
Whether this multivector has constant stride between columns.
void applyTranspose(const MV &X_in, MV &Y_in, const Teuchos::ETransp mode, Scalar alpha, Scalar beta) const
Special case of apply() for mode != Teuchos::NO_TRANS.
void getLocalDiagOffsets(Teuchos::ArrayRCP< size_t > &offsets) const
Get offsets of the diagonal entries in the matrix.
global_size_t getGlobalNumRows() const
Number of global elements in the row map of this matrix.
bool isFillComplete() const
Whether the matrix is fill complete.
ESweepDirection
Sweep direction for Gauss-Seidel or Successive Over-Relaxation (SOR).
void allocateValues(ELocalGlobal lg, GraphAllocationStatus gas)
Allocate values (and optionally indices) using the Node.
Declare and define the function Tpetra::Details::computeOffsetsFromCounts, an implementation detail o...
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Teuchos::RCP< const export_type > getExporter() const
Returns the exporter associated with this graph.
void scale(const Scalar &alpha)
Scale the matrix&#39;s values: this := alpha*this.
bool unpackCrsMatrixAndCombine(LocalMatrixType &lclMatrix, const LocalMapType &lclColMap, std::unique_ptr< std::string > &errStr, const Teuchos::ArrayView< const typename LocalMatrixType::ordinal_type > &importLIDs, const Teuchos::ArrayView< const char > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, const int myRank, Distributor &, CombineMode combineMode, const bool atomic)
Unpack the imported column indices and values, and combine into matrix.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getDomainMap() const =0
The Map associated with the domain of this operator, which must be compatible with X...
#define TPETRA_ABUSE_WARNING(throw_exception_test, Exception, msg)
Handle an abuse warning, according to HAVE_TPETRA_THROW_ABUSE_WARNINGS and HAVE_TPETRA_PRINT_ABUSE_WA...
void unpackAndCombine(const Teuchos::ArrayView< const LocalOrdinal > &importLIDs, const Teuchos::ArrayView< const char > &imports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t constantNumPackets, Distributor &distor, CombineMode combineMode)
Unpack the imported column indices and values, and combine into matrix.
Sets up and executes a communication plan for a Tpetra DistObject.
bool isStorageOptimized() const
Returns true if storage has been optimized.
void getLocalRowView(const LocalOrdinal lclRow, Teuchos::ArrayView< const LocalOrdinal > &lclColInds) const
Get a const, non-persisting view of the given local row&#39;s local column indices, as a Teuchos::ArrayVi...
CombineMode
Rule for combining data in an Import or Export.
Sum new values into existing values.
void setAllValues(const typename local_matrix_type::row_map_type &ptr, const typename local_graph_type::entries_type::non_const_type &ind, const typename local_matrix_type::values_type &val)
Set the local matrix using three (compressed sparse row) arrays.
Teuchos::RCP< const map_type > map_
The Map over which this object is distributed.
LocalOrdinal getLocalRowViewRaw(const LocalOrdinal lclRow, LocalOrdinal &numEnt, const LocalOrdinal *&lclColInds, const Scalar *&vals) const
Get a constant, nonpersisting, locally indexed view of the given row of the matrix, using "raw" pointers instead of Teuchos::ArrayView.
bool isUpperTriangular() const
Indicates whether the matrix is upper triangular.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
Utility functions for packing and unpacking sparse matrix entries.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
bool isStaticGraph() const
Indicates that the graph is static, so that new entries cannot be added to this matrix.
KokkosSparse::CrsMatrix< impl_scalar_type, LocalOrdinal, execution_space, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
virtual ~CrsMatrix()
Destructor.
void getLocalRowView(LocalOrdinal LocalRow, Teuchos::ArrayView< const LocalOrdinal > &indices, Teuchos::ArrayView< const Scalar > &values) const
Get a constant, nonpersisting view of a row of this matrix, using local row and column indices...
Replace old value with maximum of magnitudes of old and new values.
GlobalOrdinal getGlobalElement(LocalOrdinal localIndex) const
The global index corresponding to the given local index.
Abstract base class for objects that can be the source of an Import or Export operation.
size_t getNodeNumRows() const
Returns the number of graph rows owned on the calling node.
size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const
Returns the current number of entries on this node in the specified local row.
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given objects.
static LocalMapType::local_ordinal_type getDiagCopyWithoutOffsets(const DiagType &D, const LocalMapType &rowMap, const LocalMapType &colMap, const CrsMatrixType &A)
Given a locally indexed, local sparse matrix, and corresponding local row and column Maps...
LocalOrdinal getViewRaw(impl_scalar_type *&vals, LocalOrdinal &numEnt, const RowInfo &rowinfo) const
Nonconst pointer to all entries (including extra space) in the given row.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Tell the matrix that you are done changing its structure or values, and that you are ready to do comp...
LO getLocalDiagCopyWithoutOffsetsNotFillComplete(::Tpetra::Vector< SC, LO, GO, NT > &diag, const ::Tpetra::RowMatrix< SC, LO, GO, NT > &A, const bool debug=false)
Given a locally indexed, global sparse matrix, extract the matrix&#39;s diagonal entries into a Tpetra::V...
Replace existing values with new values.
#define TPETRA_EFFICIENCY_WARNING(throw_exception_test, Exception, msg)
Print or throw an efficency warning.
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
void computeGlobalConstants()
Compute matrix properties that require collectives.
bool hasTransposeApply() const
Whether apply() allows applying the transpose or conjugate transpose.
bool isLocallyIndexed() const
If graph indices are in the local range, this function returns true. Otherwise, this function returns...
Replace old values with zero.
size_t getNodeMaxNumRowEntries() const
Maximum number of entries in all rows owned by the calling process.
bool isUpperTriangular() const
Whether the graph is locally upper triangular.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
void reorderedGaussSeidel(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &B, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &X, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &D, const Teuchos::ArrayView< LocalOrdinal > &rowIndices, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps) const
Reordered "Hybrid" Jacobi + (Gauss-Seidel or SOR) on .
void importAndFillComplete(Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > > &destMatrix, const import_type &importer, const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Import from this to the given destination matrix, and make the result fill complete.
size_t getGlobalMaxNumRowEntries() const
Maximum number of entries in all rows over all processes.
void getGlobalRowCopy(GlobalOrdinal GlobalRow, const Teuchos::ArrayView< GlobalOrdinal > &Indices, const Teuchos::ArrayView< Scalar > &Values, size_t &NumEntries) const
Fill given arrays with a deep copy of the locally owned entries of the matrix in a given row...
bool isLowerTriangular() const
Whether the graph is locally lower triangular.
void getLocalRowCopy(LocalOrdinal localRow, const Teuchos::ArrayView< LocalOrdinal > &colInds, const Teuchos::ArrayView< Scalar > &vals, size_t &numEntries) const
Fill given arrays with a deep copy of the locally owned entries of the matrix in a given row...
Kokkos::Details::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
size_t getNodeNumRows() const
The number of matrix rows owned by the calling process.
bool haveGlobalConstants() const
Returns true if globalConstants have been computed; false otherwise.
bool isGloballyIndexed() const
If graph indices are in the global range, this function returns true. Otherwise, this function return...
void leftScale(const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &x)
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2)
Sort the first array, and apply the resulting permutation to the second array.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Teuchos::RCP< MV > importMV_
Column Map MultiVector used in apply() and gaussSeidel().
void checkInternalState() const
Check that this object&#39;s state is sane; throw if it&#39;s not.
void reorderedGaussSeidelCopy(MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &X, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &B, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &D, const Teuchos::ArrayView< LocalOrdinal > &rowIndices, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps, const bool zeroInitialGuess) const
Version of reorderedGaussSeidel(), with fewer requirements on X.
A parallel distribution of indices over processes.
Teuchos::RCP< const map_type > getColMap() const
Returns the Map that describes the column distribution in this graph.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
void applyNonTranspose(const MV &X_in, MV &Y_in, Scalar alpha, Scalar beta) const
Special case of apply() for mode == Teuchos::NO_TRANS.
size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const
Get the number of entries in the given row (local index).
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
size_t getNumVectors() const
Number of columns in the multivector.
Teuchos::RCP< Node > getNode() const
Get this Map&#39;s Node object.
A read-only, row-oriented interface to a sparse matrix.
size_t getGlobalMaxNumRowEntries() const
Returns the maximum number of entries across all rows/columns on all nodes.
Scalar operator()(const Scalar &x, const Scalar &y)
Return the maximum of the magnitudes (absolute values) of x and y.
std::map< GlobalOrdinal, std::pair< Teuchos::Array< GlobalOrdinal >, Teuchos::Array< Scalar > > > nonlocals_
Nonlocal data added using insertGlobalValues().
size_t mergeRowIndicesAndValues(crs_graph_type &graph, const RowInfo &rowInfo)
Merge duplicate row indices in the given row, along with their corresponding values.
A distributed dense vector.
bool isGloballyIndexed() const
Whether the matrix is globally indexed on the calling process.
void gaussSeidel(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &B, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &X, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node, classic > &D, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps) const
"Hybrid" Jacobi + (Gauss-Seidel or SOR) on .
void insertLocalValues(const LocalOrdinal localRow, const Teuchos::ArrayView< const LocalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using local column indices.
Teuchos::RCP< const map_type > getColMap() const
The Map that describes the column distribution in this matrix.
Teuchos::RCP< const map_type > getDomainMap() const
The domain Map of this matrix.
Kokkos::View< size_t *, Kokkos::LayoutLeft, device_type >::HostMirror k_numRowEntries_
The number of local entries in each locally owned row.
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, CombineMode CM)
Export data into this object using an Export object ("forward mode").
void sortAndMergeIndicesAndValues(const bool sorted, const bool merged)
Sort and merge duplicate local column indices in all rows on the calling process, along with their co...
void globalAssemble()
Communicate nonlocal contributions to other processes.
Kokkos::Details::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
Teuchos::RCP< MV > exportMV_
Row Map MultiVector used in apply().
Teuchos::RCP< MV > getRowMapMultiVector(const MV &Y_rangeMap, const bool force=false) const
Create a (or fetch a cached) row Map MultiVector.
bool isDistributed() const
Whether this is a globally distributed object.
Teuchos::RCP< const map_type > getRangeMap() const
The range Map of this matrix.
Declaration and definition of Tpetra::Details::getEntryOnHost.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Perform a fillComplete on a matrix that already has data.
bool isStorageOptimized() const
Returns true if storage has been optimized.
bool isNodeLocalElement(LocalOrdinal localIndex) const
Whether the given local index is valid for this Map on the calling process.
virtual Teuchos::RCP< RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > add(const Scalar &alpha, const RowMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params) const
Implementation of RowMatrix::add: return alpha*A + beta*this.
Teuchos::RCP< const crs_graph_type > getCrsGraph() const
This matrix&#39;s graph, as a CrsGraph.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix is distributed.
void clearGlobalConstants()
Clear matrix properties that require collectives.
global_size_t getGlobalNumCols() const
Returns the number of global columns in the graph.
bool hasColMap() const
Whether the graph has a column Map.
global_size_t getGlobalNumDiags() const
Returns the number of global diagonal entries, based on global row/column index comparisons.
LocalOrdinal getViewRawConst(const impl_scalar_type *&vals, LocalOrdinal &numEnt, const RowInfo &rowinfo) const
Const pointer to all entries (including extra space) in the given row.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes the distribution of rows over processes.
virtual void pack(const Teuchos::ArrayView< const LocalOrdinal > &exportLIDs, Teuchos::Array< char > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t &constantNumPackets, Distributor &distor) const
Pack this object&#39;s data for an Import or Export.
bool isLocallyIndexed() const
Whether the matrix is locally indexed on the calling process.
void doImport(const SrcDistObject &source, const Import< LocalOrdinal, GlobalOrdinal, Node > &importer, CombineMode CM)
Import data into this object using an Import object ("forward mode").
local_matrix_type lclMatrix_
The local sparse matrix.
void fillLocalGraphAndMatrix(const Teuchos::RCP< Teuchos::ParameterList > &params)
Fill data into the local graph and matrix.
GlobalOrdinal getIndexBase() const
The index base for global indices for this matrix.