Tpetra parallel linear algebra  Version of the Day
Tpetra_CrsMatrixMultiplyOp.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_CRSMATRIXMULTIPLYOP_HPP
43 #define TPETRA_CRSMATRIXMULTIPLYOP_HPP
44 
49 
51 #include "Tpetra_CrsMatrix.hpp"
52 #include "Tpetra_Util.hpp"
55 #include "Tpetra_LocalCrsMatrixOperator.hpp"
56 
57 namespace Tpetra {
58 
95  template <class Scalar,
96  class MatScalar,
97  class LocalOrdinal,
98  class GlobalOrdinal,
99  class Node>
101  public Operator<Scalar, LocalOrdinal, GlobalOrdinal, Node>
102  {
103  public:
109 
110  private:
111  using local_matrix_device_type =
113 
114  public:
116 
117 
122  CrsMatrixMultiplyOp (const Teuchos::RCP<const crs_matrix_type>& A) :
123  matrix_ (A),
124  localMultiply_ (std::make_shared<local_matrix_device_type> (
125  A->getLocalMatrixDevice ()))
126  {}
127 
129  ~CrsMatrixMultiplyOp () override = default;
130 
132 
134 
137  void
140  Teuchos::ETransp mode = Teuchos::NO_TRANS,
141  Scalar alpha = Teuchos::ScalarTraits<Scalar>::one (),
142  Scalar beta = Teuchos::ScalarTraits<Scalar>::zero ()) const override
143  {
144  TEUCHOS_TEST_FOR_EXCEPTION
145  (! matrix_->isFillComplete (), std::runtime_error,
146  Teuchos::typeName (*this) << "::apply(): underlying matrix is not fill-complete.");
147  TEUCHOS_TEST_FOR_EXCEPTION
148  (X.getNumVectors () != Y.getNumVectors (), std::runtime_error,
149  Teuchos::typeName (*this) << "::apply(X,Y): X and Y must have the same number of vectors.");
150  TEUCHOS_TEST_FOR_EXCEPTION
151  (Teuchos::ScalarTraits<Scalar>::isComplex && mode == Teuchos::TRANS, std::logic_error,
152  Teuchos::typeName (*this) << "::apply() does not currently support transposed multiplications for complex scalar types.");
153  if (mode == Teuchos::NO_TRANS) {
154  applyNonTranspose (X, Y, alpha, beta);
155  }
156  else {
157  applyTranspose (X, Y, mode, alpha, beta);
158  }
159  }
160 
166  bool hasTransposeApply() const override {
167  return true;
168  }
169 
171  Teuchos::RCP<const map_type> getDomainMap () const override {
172  return matrix_->getDomainMap ();
173  }
174 
176  Teuchos::RCP<const map_type> getRangeMap () const override {
177  return matrix_->getRangeMap ();
178  }
179 
181 
182  protected:
184 
186  const Teuchos::RCP<const crs_matrix_type> matrix_;
187 
189  LocalCrsMatrixOperator<Scalar, MatScalar, typename
191 
204  mutable Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > importMV_;
205 
218  mutable Teuchos::RCP<MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > exportMV_;
219 
222  void
225  Teuchos::ETransp mode,
226  Scalar alpha,
227  Scalar beta) const
228  {
229  using Teuchos::null;
230  using Teuchos::RCP;
231  using Teuchos::rcp;
232  using export_type = Export<LocalOrdinal, GlobalOrdinal, Node>;
233  using import_type = Import<LocalOrdinal, GlobalOrdinal, Node>;
234  using STS = Teuchos::ScalarTraits<Scalar>;
235  typedef typename MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>:: dual_view_type::t_dev nonconst_view_type;
236 
237  const size_t numVectors = X_in.getNumVectors();
238  // because of Views, it is difficult to determine if X and Y point to the same data.
239  // however, if they reference the exact same object, we will do the user the favor of copying X into new storage (with a warning)
240  // we ony need to do this if we have trivial importers; otherwise, we don't actually apply the operator from X into Y
241  RCP<const import_type> importer = matrix_->getGraph()->getImporter();
242  RCP<const export_type> exporter = matrix_->getGraph()->getExporter();
243 
244  // some parameters for below
245  const bool Y_is_replicated = ! Y_in.isDistributed ();
246  const bool Y_is_overwritten = (beta == STS::zero ());
247  const int myRank = matrix_->getComm ()->getRank ();
248  if (Y_is_replicated && myRank != 0) {
249  beta = STS::zero ();
250  }
251 
252  // access X indirectly, in case we need to create temporary storage
253  RCP<const MV> X;
254  // currently, cannot multiply from multivector of non-constant stride
255  if (! X_in.isConstantStride () && importer == null) {
256  // generate a strided copy of X_in
257  X = Teuchos::rcp (new MV (X_in, Teuchos::Copy));
258  }
259  else {
260  // just temporary, so this non-owning RCP is okay
261  X = Teuchos::rcpFromRef (X_in);
262  }
263 
264  // set up import/export temporary multivectors
265  if (importer != null) {
266  if (importMV_ != null && importMV_->getNumVectors () != numVectors) {
267  importMV_ = null;
268  }
269  if (importMV_ == null) {
270  importMV_ = rcp (new MV (matrix_->getColMap (), numVectors));
271  }
272  }
273  if (exporter != null) {
274  if (exportMV_ != null && exportMV_->getNumVectors () != numVectors) {
275  exportMV_ = null;
276  }
277  if (exportMV_ == null) {
278  exportMV_ = rcp (new MV (matrix_->getRowMap (), numVectors));
279  }
280  }
281 
282  // If we have a non-trivial exporter, we must import elements that are permuted or are on other processors
283  if (exporter != null) {
284  exportMV_->doImport(X_in,*exporter,INSERT);
285  X = exportMV_; // multiply out of exportMV_
286  }
287 
288  auto X_lcl = X->getLocalViewDevice(Access::ReadOnly);
289 
290  // If we have a non-trivial importer, we must export elements that are permuted or belong to other processors
291  // We will compute solution into the to-be-exported MV; get a view
292  if (importer != null) {
293  // Beta is zero here, so we clobber Y_lcl
294  auto Y_lcl = importMV_->getLocalViewDevice(Access::OverwriteAll);
295 
296  localMultiply_.apply (X_lcl, Y_lcl, mode, alpha, STS::zero ());
297  if (Y_is_overwritten) {
298  Y_in.putScalar (STS::zero ());
299  }
300  else {
301  Y_in.scale (beta);
302  }
303  Y_in.doExport (*importMV_, *importer, ADD_ASSIGN);
304  }
305  // otherwise, multiply into Y
306  else {
307  // can't multiply in-situ; can't multiply into non-strided multivector
308  if (! Y_in.isConstantStride () || X.getRawPtr () == &Y_in) {
309  // generate a strided copy of Y
310  MV Y (Y_in, Teuchos::Copy);
311  nonconst_view_type Y_lcl;
312  if(Y_is_overwritten) Y_lcl = Y.getLocalViewDevice(Access::OverwriteAll);
313  else Y_lcl = Y.getLocalViewDevice(Access::ReadWrite);
314 
315  localMultiply_.apply (X_lcl, Y_lcl, mode, alpha, beta);
316  Tpetra::deep_copy (Y_in, Y);
317  }
318  else {
319  nonconst_view_type Y_lcl;
320  if(Y_is_overwritten) Y_lcl = Y_in.getLocalViewDevice(Access::OverwriteAll);
321  else Y_lcl = Y_in.getLocalViewDevice(Access::ReadWrite);
322 
323  localMultiply_.apply (X_lcl, Y_lcl, mode, alpha, beta);
324  }
325  }
326  // Handle case of rangemap being a local replicated map: in this case, sum contributions from each processor
327  if (Y_is_replicated) {
328  Y_in.reduce();
329  }
330  }
331 
333  void
336  Scalar alpha,
337  Scalar beta) const
338  {
340  using Teuchos::NO_TRANS;
341  using Teuchos::RCP;
342  using Teuchos::rcp;
343  using Teuchos::rcp_const_cast;
344  using Teuchos::rcpFromRef;
345  typedef Export<LocalOrdinal,GlobalOrdinal,Node> export_type;
346  typedef Import<LocalOrdinal,GlobalOrdinal,Node> import_type;
347  typedef Teuchos::ScalarTraits<Scalar> STS;
348  typedef typename MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>:: dual_view_type::t_dev nonconst_view_type;
349 
350  if (alpha == STS::zero ()) {
351  if (beta == STS::zero ()) {
352  Y_in.putScalar (STS::zero ());
353  } else if (beta != STS::one ()) {
354  Y_in.scale (beta);
355  }
356  return;
357  }
358 
359  // It's possible that X is a view of Y or vice versa. We don't
360  // allow this (apply() requires that X and Y not alias one
361  // another), but it's helpful to detect and work around this
362  // case. We don't try to to detect the more subtle cases (e.g.,
363  // one is a subview of the other, but their initial pointers
364  // differ). We only need to do this if this matrix's Import is
365  // trivial; otherwise, we don't actually apply the operator from
366  // X into Y.
367 
368  RCP<const import_type> importer = matrix_->getGraph()->getImporter();
369  RCP<const export_type> exporter = matrix_->getGraph()->getExporter();
370 
371  // If beta == 0, then the output MV will be overwritten; none of
372  // its entries should be read. (Sparse BLAS semantics say that we
373  // must ignore any Inf or NaN entries in Y_in, if beta is zero.)
374  // This matters if we need to do an Export operation; see below.
375  const bool Y_is_overwritten = (beta == STS::zero());
376 
377  // We treat the case of a replicated MV output specially.
378  const bool Y_is_replicated =
379  (! Y_in.isDistributed () && matrix_->getComm ()->getSize () != 1);
380 
381  // This is part of the special case for replicated MV output.
382  // We'll let each process do its thing, but do an all-reduce at
383  // the end to sum up the results. Setting beta=0 on all
384  // processes but Proc 0 makes the math work out for the
385  // all-reduce. (This assumes that the replicated data is
386  // correctly replicated, so that the data are the same on all
387  // processes.)
388  if (Y_is_replicated && matrix_->getComm ()->getRank () > 0) {
389  beta = STS::zero();
390  }
391 
392  // Temporary MV for Import operation. After the block of code
393  // below, this will be an (Imported if necessary) column Map MV
394  // ready to give to localMultiply_.apply(...).
395  RCP<const MV> X_colMap;
396  if (importer.is_null ()) {
397  if (! X_in.isConstantStride ()) {
398  // Not all sparse mat-vec kernels can handle an input MV with
399  // nonconstant stride correctly, so we have to copy it in that
400  // case into a constant stride MV. To make a constant stride
401  // copy of X_in, we force creation of the column (== domain)
402  // Map MV (if it hasn't already been created, else fetch the
403  // cached copy). This avoids creating a new MV each time.
404  RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in, true);
405  Tpetra::deep_copy (*X_colMapNonConst, X_in);
406  X_colMap = rcp_const_cast<const MV> (X_colMapNonConst);
407  }
408  else {
409  // The domain and column Maps are the same, so do the local
410  // multiply using the domain Map input MV X_in.
411  X_colMap = rcpFromRef (X_in);
412  }
413  }
414  else { // need to Import source (multi)vector
415  ProfilingRegion regionImport ("Tpetra::CrsMatrixMultiplyOp::apply: Import");
416 
417  // We're doing an Import anyway, which will copy the relevant
418  // elements of the domain Map MV X_in into a separate column Map
419  // MV. Thus, we don't have to worry whether X_in is constant
420  // stride.
421  RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in);
422 
423  // Import from the domain Map MV to the column Map MV.
424  X_colMapNonConst->doImport (X_in, *importer, INSERT);
425  X_colMap = rcp_const_cast<const MV> (X_colMapNonConst);
426  }
427 
428  // Temporary MV for doExport (if needed), or for copying a
429  // nonconstant stride output MV into a constant stride MV. This
430  // is null if we don't need the temporary MV, that is, if the
431  // Export is trivial (null).
432  RCP<MV> Y_rowMap = getRowMapMultiVector (Y_in);
433 
434  auto X_lcl = X_colMap->getLocalViewDevice(Access::ReadOnly);
435 
436  // If we have a nontrivial Export object, we must perform an
437  // Export. In that case, the local multiply result will go into
438  // the row Map multivector. We don't have to make a
439  // constant-stride version of Y_in in this case, because we had to
440  // make a constant stride Y_rowMap MV and do an Export anyway.
441  if (! exporter.is_null ()) {
442  auto Y_lcl = Y_rowMap->getLocalViewDevice(Access::OverwriteAll);
443 
444  localMultiply_.apply (X_lcl, Y_lcl, NO_TRANS,
445  alpha, STS::zero ());
446  {
447  ProfilingRegion regionExport ("Tpetra::CrsMatrixMultiplyOp::apply: Export");
448 
449  // If we're overwriting the output MV Y_in completely (beta
450  // == 0), then make sure that it is filled with zeros before
451  // we do the Export. Otherwise, the ADD combine mode will
452  // use data in Y_in, which is supposed to be zero.
453  if (Y_is_overwritten) {
454  Y_in.putScalar (STS::zero());
455  }
456  else {
457  // Scale the output MV by beta, so that the Export sums in
458  // the mat-vec contribution: Y_in = beta*Y_in + alpha*A*X_in.
459  Y_in.scale (beta);
460  }
461  // Do the Export operation.
462  Y_in.doExport (*Y_rowMap, *exporter, ADD_ASSIGN);
463  }
464  }
465  else { // Don't do an Export: row Map and range Map are the same.
466  //
467  // If Y_in does not have constant stride, or if the column Map
468  // MV aliases Y_in, then we can't let the kernel write directly
469  // to Y_in. Instead, we have to use the cached row (== range)
470  // Map MV as temporary storage.
471  //
472  // FIXME (mfh 05 Jun 2014, mfh 07 Dec 2018) This test for
473  // aliasing only tests if the user passed in the same
474  // MultiVector for both X and Y. It won't detect whether one
475  // MultiVector views the other. We should also check the
476  // MultiVectors' raw data pointers.
477  if (! Y_in.isConstantStride () || X_colMap.getRawPtr () == &Y_in) {
478  // Force creating the MV if it hasn't been created already.
479  // This will reuse a previously created cached MV.
480  Y_rowMap = getRowMapMultiVector (Y_in, true);
481 
482  // If beta == 0, we don't need to copy Y_in into Y_rowMap,
483  // since we're overwriting it anyway.
484  if (beta != STS::zero ()) {
485  Tpetra::deep_copy (*Y_rowMap, Y_in);
486  }
487  nonconst_view_type Y_lcl;
488  if(Y_is_overwritten) Y_lcl = Y_rowMap->getLocalViewDevice(Access::OverwriteAll);
489  else Y_lcl = Y_rowMap->getLocalViewDevice(Access::ReadWrite);
490 
491  localMultiply_.apply (X_lcl, Y_lcl, NO_TRANS, alpha, beta);
492  Tpetra::deep_copy (Y_in, *Y_rowMap);
493  }
494  else {
495  nonconst_view_type Y_lcl;
496  if(Y_is_overwritten) Y_lcl = Y_in.getLocalViewDevice(Access::OverwriteAll);
497  else Y_lcl = Y_in.getLocalViewDevice(Access::ReadWrite);
498 
499  localMultiply_.apply (X_lcl, Y_lcl, NO_TRANS, alpha, beta);
500  }
501  }
502 
503  // If the range Map is a locally replicated Map, sum up
504  // contributions from each process. We set beta = 0 on all
505  // processes but Proc 0 initially, so this will handle the scaling
506  // factor beta correctly.
507  if (Y_is_replicated) {
508  ProfilingRegion regionReduce ("Tpetra::CrsMatrixMultiplyOp::apply: Reduce Y");
509  Y_in.reduce ();
510  }
511  }
512 
513  private:
533  Teuchos::RCP<MV>
534  getColumnMapMultiVector (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& X_domainMap,
535  const bool force = false) const
536  {
537  using Teuchos::null;
538  using Teuchos::RCP;
539  using Teuchos::rcp;
540  typedef Import<LocalOrdinal,GlobalOrdinal,Node> import_type;
541 
542  const size_t numVecs = X_domainMap.getNumVectors ();
543  RCP<const import_type> importer = matrix_->getGraph ()->getImporter ();
544  RCP<const map_type> colMap = matrix_->getColMap ();
545 
546  RCP<MV> X_colMap; // null by default
547 
548  // If the Import object is trivial (null), then we don't need a
549  // separate column Map multivector. Just return null in that
550  // case. The caller is responsible for knowing not to use the
551  // returned null pointer.
552  //
553  // If the Import is nontrivial, then we do need a separate
554  // column Map multivector for the Import operation. Check in
555  // that case if we have to (re)create the column Map
556  // multivector.
557  if (! importer.is_null () || force) {
558  if (importMV_.is_null () || importMV_->getNumVectors () != numVecs) {
559  X_colMap = rcp (new MV (colMap, numVecs));
560 
561  // Cache the newly created multivector for later reuse.
562  importMV_ = X_colMap;
563  }
564  else { // Yay, we can reuse the cached multivector!
565  X_colMap = importMV_;
566  // mfh 09 Jan 2013: We don't have to fill with zeros first,
567  // because the Import uses INSERT combine mode, which overwrites
568  // existing entries.
569  //
570  //X_colMap->putScalar (STS::zero ());
571  }
572  }
573  return X_colMap;
574  }
575 
597  Teuchos::RCP<MV>
598  getRowMapMultiVector (const MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>& Y_rangeMap,
599  const bool force = false) const
600  {
601  using Teuchos::null;
602  using Teuchos::RCP;
603  using Teuchos::rcp;
604  typedef Export<LocalOrdinal,GlobalOrdinal,Node> export_type;
605 
606  const size_t numVecs = Y_rangeMap.getNumVectors ();
607  RCP<const export_type> exporter = matrix_->getGraph ()->getExporter ();
608  RCP<const map_type> rowMap = matrix_->getRowMap ();
609 
610  RCP<MV> Y_rowMap; // null by default
611 
612  // If the Export object is trivial (null), then we don't need a
613  // separate row Map multivector. Just return null in that case.
614  // The caller is responsible for knowing not to use the returned
615  // null pointer.
616  //
617  // If the Export is nontrivial, then we do need a separate row
618  // Map multivector for the Export operation. Check in that case
619  // if we have to (re)create the row Map multivector.
620  if (! exporter.is_null () || force) {
621  if (exportMV_.is_null () || exportMV_->getNumVectors () != numVecs) {
622  Y_rowMap = rcp (new MV (rowMap, numVecs));
623  exportMV_ = Y_rowMap; // Cache the newly created MV for later reuse
624  }
625  else { // Yay, we can reuse the cached multivector!
626  Y_rowMap = exportMV_;
627  }
628  }
629  return Y_rowMap;
630  }
631  };
632 
640  template <class OpScalar,
641  class MatScalar,
642  class LocalOrdinal,
643  class GlobalOrdinal,
644  class Node>
645  Teuchos::RCP<
646  CrsMatrixMultiplyOp<OpScalar, MatScalar, LocalOrdinal, GlobalOrdinal, Node> >
647  createCrsMatrixMultiplyOp (const Teuchos::RCP<
649  {
650  typedef CrsMatrixMultiplyOp<OpScalar, MatScalar, LocalOrdinal,
651  GlobalOrdinal, Node> op_type;
652  return Teuchos::rcp (new op_type (A));
653  }
654 
655 } // end of namespace Tpetra
656 
657 #endif // TPETRA_CRSMATRIXMULTIPLYOP_HPP
Forward declaration of Tpetra::CrsMatrixMultiplyOp.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Stand-alone utility functions and macros.
A class for wrapping a CrsMatrix multiply in a Operator.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > importMV_
Column Map MultiVector used in apply().
Teuchos::RCP< const map_type > getRangeMap() const override
The range Map of this Operator.
bool hasTransposeApply() const override
Whether this Operator's apply() method can apply the transpose or conjugate transpose.
void applyNonTranspose(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X_in, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y_in, Scalar alpha, Scalar beta) const
Apply the matrix (not its transpose) to X_in, producing Y_in.
void apply(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const override
Compute Y = beta*Y + alpha*Op(A)*X, where Op(A) is either A, , or .
Teuchos::RCP< const map_type > getDomainMap() const override
The domain Map of this Operator.
void applyTranspose(const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &X_in, MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Y_in, Teuchos::ETransp mode, Scalar alpha, Scalar beta) const
Apply the transpose or conjugate transpose of the matrix to X_in, producing Y_in.
CrsMatrixMultiplyOp(const Teuchos::RCP< const crs_matrix_type > &A)
Constructor.
~CrsMatrixMultiplyOp() override=default
Destructor (virtual for memory safety of derived classes).
Teuchos::RCP< CrsMatrixMultiplyOp< OpScalar, MatScalar, LocalOrdinal, GlobalOrdinal, Node > > createCrsMatrixMultiplyOp(const Teuchos::RCP< const CrsMatrix< MatScalar, LocalOrdinal, GlobalOrdinal, Node > > &A)
Non-member function to create a CrsMatrixMultiplyOp.
const Teuchos::RCP< const crs_matrix_type > matrix_
The underlying CrsMatrix object.
Teuchos::RCP< MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > exportMV_
Row Map MultiVector used in apply().
LocalCrsMatrixOperator< Scalar, MatScalar, typename crs_matrix_type::device_type > localMultiply_
Implementation of local sparse matrix-vector multiply.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries.
typename Node::device_type device_type
The Kokkos device type.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object ("forward mode").
bool isDistributed() const
Whether this is a globally distributed object.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Abstract interface for local operators (e.g., matrices and preconditioners).
A parallel distribution of indices over processes.
One or more distributed dense vectors.
void reduce()
Sum values of a locally replicated multivector across all processes.
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
size_t getNumVectors() const
Number of columns in the multivector.
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
bool isConstantStride() const
Whether this multivector has constant stride between columns.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
Abstract interface for operators (e.g., matrices and preconditioners).
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
@ ADD_ASSIGN
Accumulate new values into existing values (may not be supported in all classes)
@ INSERT
Insert new values that don't currently exist.